pysodafair 0.1.62__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. pysodafair-0.1.62/LICENSE +21 -0
  2. pysodafair-0.1.62/PKG-INFO +190 -0
  3. pysodafair-0.1.62/README.md +164 -0
  4. pysodafair-0.1.62/pyproject.toml +26 -0
  5. pysodafair-0.1.62/pysoda/__init__.py +0 -0
  6. pysodafair-0.1.62/pysoda/constants.py +3 -0
  7. pysodafair-0.1.62/pysoda/core/__init__.py +10 -0
  8. pysodafair-0.1.62/pysoda/core/dataset_generation/__init__.py +11 -0
  9. pysodafair-0.1.62/pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
  10. pysodafair-0.1.62/pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
  11. pysodafair-0.1.62/pysoda/core/dataset_generation/upload.py +3951 -0
  12. pysodafair-0.1.62/pysoda/core/dataset_importing/__init__.py +1 -0
  13. pysodafair-0.1.62/pysoda/core/dataset_importing/import_dataset.py +662 -0
  14. pysodafair-0.1.62/pysoda/core/metadata/__init__.py +20 -0
  15. pysodafair-0.1.62/pysoda/core/metadata/code_description.py +109 -0
  16. pysodafair-0.1.62/pysoda/core/metadata/constants.py +32 -0
  17. pysodafair-0.1.62/pysoda/core/metadata/dataset_description.py +188 -0
  18. pysodafair-0.1.62/pysoda/core/metadata/excel_utils.py +41 -0
  19. pysodafair-0.1.62/pysoda/core/metadata/helpers.py +250 -0
  20. pysodafair-0.1.62/pysoda/core/metadata/manifest.py +112 -0
  21. pysodafair-0.1.62/pysoda/core/metadata/manifest_package/__init__.py +2 -0
  22. pysodafair-0.1.62/pysoda/core/metadata/manifest_package/manifest.py +0 -0
  23. pysodafair-0.1.62/pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
  24. pysodafair-0.1.62/pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
  25. pysodafair-0.1.62/pysoda/core/metadata/performances.py +46 -0
  26. pysodafair-0.1.62/pysoda/core/metadata/resources.py +53 -0
  27. pysodafair-0.1.62/pysoda/core/metadata/samples.py +184 -0
  28. pysodafair-0.1.62/pysoda/core/metadata/sites.py +51 -0
  29. pysodafair-0.1.62/pysoda/core/metadata/subjects.py +172 -0
  30. pysodafair-0.1.62/pysoda/core/metadata/submission.py +91 -0
  31. pysodafair-0.1.62/pysoda/core/metadata/text_metadata.py +47 -0
  32. pysodafair-0.1.62/pysoda/core/metadata_templates/CHANGES +1 -0
  33. pysodafair-0.1.62/pysoda/core/metadata_templates/LICENSE +1 -0
  34. pysodafair-0.1.62/pysoda/core/metadata_templates/README.md +4 -0
  35. pysodafair-0.1.62/pysoda/core/metadata_templates/__init__.py +0 -0
  36. pysodafair-0.1.62/pysoda/core/metadata_templates/code_description.xlsx +0 -0
  37. pysodafair-0.1.62/pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
  38. pysodafair-0.1.62/pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
  39. pysodafair-0.1.62/pysoda/core/metadata_templates/manifest.xlsx +0 -0
  40. pysodafair-0.1.62/pysoda/core/metadata_templates/performances.xlsx +0 -0
  41. pysodafair-0.1.62/pysoda/core/metadata_templates/resources.xlsx +0 -0
  42. pysodafair-0.1.62/pysoda/core/metadata_templates/samples.xlsx +0 -0
  43. pysodafair-0.1.62/pysoda/core/metadata_templates/sites.xlsx +0 -0
  44. pysodafair-0.1.62/pysoda/core/metadata_templates/subjects.xlsx +0 -0
  45. pysodafair-0.1.62/pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
  46. pysodafair-0.1.62/pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
  47. pysodafair-0.1.62/pysoda/core/metadata_templates/submission.xlsx +0 -0
  48. pysodafair-0.1.62/pysoda/core/permissions/__init__.py +1 -0
  49. pysodafair-0.1.62/pysoda/core/permissions/permissions.py +31 -0
  50. pysodafair-0.1.62/pysoda/core/pysoda/__init__.py +2 -0
  51. pysodafair-0.1.62/pysoda/core/pysoda/soda.py +34 -0
  52. pysodafair-0.1.62/pysoda/core/pysoda/soda_object.py +55 -0
  53. pysodafair-0.1.62/pysoda/core/upload_manifests/__init__.py +1 -0
  54. pysodafair-0.1.62/pysoda/core/upload_manifests/upload_manifests.py +37 -0
  55. pysodafair-0.1.62/pysoda/schema/__init__.py +0 -0
  56. pysodafair-0.1.62/pysoda/schema/code_description.json +629 -0
  57. pysodafair-0.1.62/pysoda/schema/dataset_description.json +295 -0
  58. pysodafair-0.1.62/pysoda/schema/manifest.json +60 -0
  59. pysodafair-0.1.62/pysoda/schema/performances.json +44 -0
  60. pysodafair-0.1.62/pysoda/schema/resources.json +39 -0
  61. pysodafair-0.1.62/pysoda/schema/samples.json +97 -0
  62. pysodafair-0.1.62/pysoda/schema/sites.json +38 -0
  63. pysodafair-0.1.62/pysoda/schema/soda_schema.json +664 -0
  64. pysodafair-0.1.62/pysoda/schema/subjects.json +131 -0
  65. pysodafair-0.1.62/pysoda/schema/submission_schema.json +28 -0
  66. pysodafair-0.1.62/pysoda/utils/__init__.py +9 -0
  67. pysodafair-0.1.62/pysoda/utils/authentication.py +381 -0
  68. pysodafair-0.1.62/pysoda/utils/config.py +68 -0
  69. pysodafair-0.1.62/pysoda/utils/exceptions.py +156 -0
  70. pysodafair-0.1.62/pysoda/utils/logger.py +6 -0
  71. pysodafair-0.1.62/pysoda/utils/metadata_utils.py +74 -0
  72. pysodafair-0.1.62/pysoda/utils/pennsieveAgentUtils.py +11 -0
  73. pysodafair-0.1.62/pysoda/utils/pennsieveUtils.py +118 -0
  74. pysodafair-0.1.62/pysoda/utils/profile.py +28 -0
  75. pysodafair-0.1.62/pysoda/utils/schema_validation.py +133 -0
  76. pysodafair-0.1.62/pysoda/utils/time_utils.py +5 -0
  77. pysodafair-0.1.62/pysoda/utils/upload_utils.py +108 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 FAIR Data Innovations Hub
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,190 @@
1
+ Metadata-Version: 2.4
2
+ Name: pysodafair
3
+ Version: 0.1.62
4
+ Summary: Pysoda package for Fairdataihub tools
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Author: Christopher Marroquin
8
+ Author-email: cmarroquin@calmi2.org
9
+ Requires-Python: >=3.6
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.6
13
+ Classifier: Programming Language :: Python :: 3.7
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Programming Language :: Python :: 3.14
21
+ Requires-Dist: openpyxl
22
+ Project-URL: Homepage, https://github.com/fairdataihub/pysodafair
23
+ Project-URL: Repository, https://github.com/fairdataihub/pysodafair
24
+ Description-Content-Type: text/markdown
25
+
26
+ # pysoda
27
+
28
+ ## Overview
29
+
30
+ Pysoda is a tool for your python workflows that can help you create datasets in compliance with your favorite FAIR(Findable, Accessible, Interoperable, Reusable) data standards. At the moment, pysoda is primarily focused on neuromodulation, neurophysiology, and related data according to the SPARC guidelines that are aimed at making data FAIR. However, we are envisioning to extend the tool to support other standards such as BIDS, FHIR, etc, in the future.
31
+
32
+ Pysoda stems from SODA, a desktop software that simplifies the organization and sharing of data that needs to comply to a FAIR data standard. While using the SODA app can be convenient for most investigators, others with coding proficiency may find it more convenient to implement automated workflows. Given that the backend of SODA contains many functions necessary for preparing and submitting a dataset that is compliant with the SPARC Data Structure (SDS) such as:
33
+
34
+ Creating standard metadata files
35
+ Generating manifest files
36
+ Automatically complying with the file/folder naming conventions
37
+ Validating against the offical SDS validator
38
+ Uploading dataset to Pennsieve with SDS compliance (ignoring empty folders and non-allowed files, avoiding duplicate files and folders, etc.)
39
+ And many more
40
+
41
+ Pysoda makes these functions, which have been thoroughtly tested and validated, easily integratable in automated workflows such that the investigators do not have to re-write them. This will be very similar to the [pyfairdatatools](https://github.com/AI-READI/pyfairdatatools) Python package we are developing for our [AI-READI](https://aireadi.org/) project as part of the NIH Bridge2AI program.
42
+
43
+ ## Workflow
44
+
45
+ ### Import the pysoda package into your project and initialize the soda object with the supported standard of your choosing
46
+
47
+ ```python
48
+ from pysoda import soda_create
49
+ # initialize the soda_create object
50
+ # Internal note: soda_create returns the typical sodaJSONObj with additional methods for adding data and metadata [not in version 1]
51
+ # It is passed into the module functions just like our sodaJSONObj is passed to the backend of our api
52
+
53
+ soda = soda_create(standard='sds')
54
+
55
+ # add a dataset name to the soda object
56
+ soda.set_dataset_name('my_dataset')
57
+
58
+ ```
59
+
60
+ ### Structure your data
61
+
62
+ ```python
63
+
64
+
65
+ # get your base dataset files and folders structure
66
+ dataset_structure = soda.get_dataset_structure()
67
+
68
+ # fill out your dataset structure.
69
+ # NOTE: YOu will want to reference the
70
+ # dataset_structure key in the soda_schema.json file to understand the structure
71
+ # and what is required.
72
+ dataset_structure['folders'] = {
73
+ 'data': {
74
+ 'files': {
75
+ 'file1': {
76
+ 'path': '/home/user/file1.txt', 'relativePath': '/data/file1.txt', 'action': 'new'
77
+ },
78
+ 'file2': {
79
+ 'path': '/home/user/file2.txt', 'relativePath': '/data/file2.txt', 'action': 'new'
80
+ }
81
+ },
82
+ 'folders': {
83
+ 'primary': {
84
+ 'files': {
85
+ 'file3': {
86
+ 'path': '/home/user/file3.txt', 'relativePath': '/data/primary/file3.txt', 'action': 'new'
87
+ }
88
+ }
89
+ }
90
+ },
91
+ 'relativePath': '/data'
92
+ },
93
+ 'files': {},
94
+ 'relativePath': '/'
95
+ }
96
+
97
+
98
+ # map your imported data files to the entity structure defined in the soda schema [here](soda_schema.py)
99
+ entity_structure = soda.get_entity_structure()
100
+
101
+ # fill out your entity structure using the schema as a reference
102
+ # NOTE: data model not finalized
103
+ entity = {'subjectId': 'sub-1', 'metadata': {'age': '1 year', 'sex': 'female'}, 'data-file': '/data/file1.txt'}
104
+ entity_structure['subjects'].append(entity)
105
+
106
+
107
+
108
+ ```
109
+
110
+ ### Create your dataset metadata
111
+
112
+ ```python
113
+
114
+ # import the metadata module from the soda_create package
115
+ from pysoda import metadata
116
+
117
+ # define your submission metadata
118
+ submission = soda.get_submission_metadata()
119
+
120
+ submission['consortium-data-standard'] = 'standard'
121
+ submission['funding-consortium'] = 'SPARC'
122
+ submission['award-number'] = '12345'
123
+ submission['milestone-acheieved'] = ['one', 'two', 'three']
124
+ submission['filepath'] = 'path/to/destination'
125
+
126
+ # create the excel file for the submission metadata
127
+ metadata.submission.create(soda, file_output_location='path/to/output')
128
+
129
+
130
+ # repeat
131
+ metadata.subjects.create(soda, file_output_location='path/to/output')
132
+ metadata.samples.create(soda, file_output_location='path/to/output')
133
+ metadata.performances.create(soda, file_output_location='path/to/output')
134
+ metadata.sites.create(soda, file_output_location='path/to/output')
135
+ metadata.code.create(soda, file_output_location='path/to/output')
136
+ metadata.manifest.create(soda, file_output_location='path/to/output')
137
+
138
+ ```
139
+
140
+ ### Generate your dataset
141
+
142
+ #### Generate locally
143
+
144
+ ```python
145
+
146
+ from pysoda import generate
147
+
148
+ # set the generation options
149
+ soda.set_generate_dataset_options(destination='local', path='path/to/destination', dataset_name='my_dataset')
150
+
151
+ # generate the dataset
152
+ generate(soda)
153
+
154
+ ```
155
+
156
+ #### Generate on Pennsieve
157
+
158
+ ```python
159
+ from pysoda import generate
160
+
161
+ # provide the Pennsieve API Key and secret
162
+ soda.upload.auth(api_key='api, api_secret='api_secret)
163
+
164
+ # upload new dataset
165
+ # NOTE: You will need to download and start the Pennsieve Agent [here](https://app.pennsieve.io) to upload data to Pennsieve
166
+ dataset_id = generate(soda) # returns dataset_id
167
+
168
+ # OR upload to an existing pennsieve dataset
169
+ # set the generate options in the soda object
170
+ soda.set_generate_dataset_options(destination='existing-ps', if_existing="merge", if_existing_files="replace", dataset_id=dataset_id)
171
+ update_existing(soda)
172
+ ```
173
+
174
+ ## Utilities
175
+
176
+ ### Compare a dataset on Pennsieve and a local dataset for differences
177
+
178
+ ```python
179
+ from pysoda import compare
180
+
181
+ # provide the Pennsieve API Key and secret
182
+ soda.upload.auth(api_key='api, api_secret='api_secret)
183
+
184
+ # import the dataset from Pennsieve
185
+ soda.import_dataset(dataset_id='dataset_id')
186
+
187
+ # compare the Pennsieve dataset with the local dataset
188
+ results = compare(soda, local_dataset_location='path/to/local/dataset')
189
+ ```
190
+
@@ -0,0 +1,164 @@
1
+ # pysoda
2
+
3
+ ## Overview
4
+
5
+ Pysoda is a tool for your python workflows that can help you create datasets in compliance with your favorite FAIR(Findable, Accessible, Interoperable, Reusable) data standards. At the moment, pysoda is primarily focused on neuromodulation, neurophysiology, and related data according to the SPARC guidelines that are aimed at making data FAIR. However, we are envisioning to extend the tool to support other standards such as BIDS, FHIR, etc, in the future.
6
+
7
+ Pysoda stems from SODA, a desktop software that simplifies the organization and sharing of data that needs to comply to a FAIR data standard. While using the SODA app can be convenient for most investigators, others with coding proficiency may find it more convenient to implement automated workflows. Given that the backend of SODA contains many functions necessary for preparing and submitting a dataset that is compliant with the SPARC Data Structure (SDS) such as:
8
+
9
+ Creating standard metadata files
10
+ Generating manifest files
11
+ Automatically complying with the file/folder naming conventions
12
+ Validating against the offical SDS validator
13
+ Uploading dataset to Pennsieve with SDS compliance (ignoring empty folders and non-allowed files, avoiding duplicate files and folders, etc.)
14
+ And many more
15
+
16
+ Pysoda makes these functions, which have been thoroughtly tested and validated, easily integratable in automated workflows such that the investigators do not have to re-write them. This will be very similar to the [pyfairdatatools](https://github.com/AI-READI/pyfairdatatools) Python package we are developing for our [AI-READI](https://aireadi.org/) project as part of the NIH Bridge2AI program.
17
+
18
+ ## Workflow
19
+
20
+ ### Import the pysoda package into your project and initialize the soda object with the supported standard of your choosing
21
+
22
+ ```python
23
+ from pysoda import soda_create
24
+ # initialize the soda_create object
25
+ # Internal note: soda_create returns the typical sodaJSONObj with additional methods for adding data and metadata [not in version 1]
26
+ # It is passed into the module functions just like our sodaJSONObj is passed to the backend of our api
27
+
28
+ soda = soda_create(standard='sds')
29
+
30
+ # add a dataset name to the soda object
31
+ soda.set_dataset_name('my_dataset')
32
+
33
+ ```
34
+
35
+ ### Structure your data
36
+
37
+ ```python
38
+
39
+
40
+ # get your base dataset files and folders structure
41
+ dataset_structure = soda.get_dataset_structure()
42
+
43
+ # fill out your dataset structure.
44
+ # NOTE: YOu will want to reference the
45
+ # dataset_structure key in the soda_schema.json file to understand the structure
46
+ # and what is required.
47
+ dataset_structure['folders'] = {
48
+ 'data': {
49
+ 'files': {
50
+ 'file1': {
51
+ 'path': '/home/user/file1.txt', 'relativePath': '/data/file1.txt', 'action': 'new'
52
+ },
53
+ 'file2': {
54
+ 'path': '/home/user/file2.txt', 'relativePath': '/data/file2.txt', 'action': 'new'
55
+ }
56
+ },
57
+ 'folders': {
58
+ 'primary': {
59
+ 'files': {
60
+ 'file3': {
61
+ 'path': '/home/user/file3.txt', 'relativePath': '/data/primary/file3.txt', 'action': 'new'
62
+ }
63
+ }
64
+ }
65
+ },
66
+ 'relativePath': '/data'
67
+ },
68
+ 'files': {},
69
+ 'relativePath': '/'
70
+ }
71
+
72
+
73
+ # map your imported data files to the entity structure defined in the soda schema [here](soda_schema.py)
74
+ entity_structure = soda.get_entity_structure()
75
+
76
+ # fill out your entity structure using the schema as a reference
77
+ # NOTE: data model not finalized
78
+ entity = {'subjectId': 'sub-1', 'metadata': {'age': '1 year', 'sex': 'female'}, 'data-file': '/data/file1.txt'}
79
+ entity_structure['subjects'].append(entity)
80
+
81
+
82
+
83
+ ```
84
+
85
+ ### Create your dataset metadata
86
+
87
+ ```python
88
+
89
+ # import the metadata module from the soda_create package
90
+ from pysoda import metadata
91
+
92
+ # define your submission metadata
93
+ submission = soda.get_submission_metadata()
94
+
95
+ submission['consortium-data-standard'] = 'standard'
96
+ submission['funding-consortium'] = 'SPARC'
97
+ submission['award-number'] = '12345'
98
+ submission['milestone-acheieved'] = ['one', 'two', 'three']
99
+ submission['filepath'] = 'path/to/destination'
100
+
101
+ # create the excel file for the submission metadata
102
+ metadata.submission.create(soda, file_output_location='path/to/output')
103
+
104
+
105
+ # repeat
106
+ metadata.subjects.create(soda, file_output_location='path/to/output')
107
+ metadata.samples.create(soda, file_output_location='path/to/output')
108
+ metadata.performances.create(soda, file_output_location='path/to/output')
109
+ metadata.sites.create(soda, file_output_location='path/to/output')
110
+ metadata.code.create(soda, file_output_location='path/to/output')
111
+ metadata.manifest.create(soda, file_output_location='path/to/output')
112
+
113
+ ```
114
+
115
+ ### Generate your dataset
116
+
117
+ #### Generate locally
118
+
119
+ ```python
120
+
121
+ from pysoda import generate
122
+
123
+ # set the generation options
124
+ soda.set_generate_dataset_options(destination='local', path='path/to/destination', dataset_name='my_dataset')
125
+
126
+ # generate the dataset
127
+ generate(soda)
128
+
129
+ ```
130
+
131
+ #### Generate on Pennsieve
132
+
133
+ ```python
134
+ from pysoda import generate
135
+
136
+ # provide the Pennsieve API Key and secret
137
+ soda.upload.auth(api_key='api, api_secret='api_secret)
138
+
139
+ # upload new dataset
140
+ # NOTE: You will need to download and start the Pennsieve Agent [here](https://app.pennsieve.io) to upload data to Pennsieve
141
+ dataset_id = generate(soda) # returns dataset_id
142
+
143
+ # OR upload to an existing pennsieve dataset
144
+ # set the generate options in the soda object
145
+ soda.set_generate_dataset_options(destination='existing-ps', if_existing="merge", if_existing_files="replace", dataset_id=dataset_id)
146
+ update_existing(soda)
147
+ ```
148
+
149
+ ## Utilities
150
+
151
+ ### Compare a dataset on Pennsieve and a local dataset for differences
152
+
153
+ ```python
154
+ from pysoda import compare
155
+
156
+ # provide the Pennsieve API Key and secret
157
+ soda.upload.auth(api_key='api, api_secret='api_secret)
158
+
159
+ # import the dataset from Pennsieve
160
+ soda.import_dataset(dataset_id='dataset_id')
161
+
162
+ # compare the Pennsieve dataset with the local dataset
163
+ results = compare(soda, local_dataset_location='path/to/local/dataset')
164
+ ```
@@ -0,0 +1,26 @@
1
+ [tool.poetry]
2
+ name = "pysodafair"
3
+ version = "0.1.62"
4
+ description = "Pysoda package for Fairdataihub tools"
5
+ authors = ["Christopher Marroquin <cmarroquin@calmi2.org>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ homepage = "https://github.com/fairdataihub/pysodafair"
9
+ repository = "https://github.com/fairdataihub/pysodafair"
10
+ packages = [
11
+ { include = "pysoda" }
12
+ ]
13
+ include = [
14
+ "pysoda/core/metadata_templates/*",
15
+ "pysoda/core/metadata_templates/*.xlsx",
16
+ "pysoda/schema/*.json"
17
+ ]
18
+
19
+ [tool.poetry.dependencies]
20
+ python = ">=3.6"
21
+ openpyxl = "*"
22
+
23
+
24
+ [build-system]
25
+ requires = ["poetry-core"]
26
+ build-backend = "poetry.core.masonry.api"
File without changes
@@ -0,0 +1,3 @@
1
+
2
+ PENNSIEVE_URL = "https://api.pennsieve.io"
3
+ PENNSIEVE_2_URL = "https://api2.pennsieve.io"
@@ -0,0 +1,10 @@
1
+ import logging
2
+
3
+ from .permissions import has_edit_permissions
4
+
5
+
6
+
7
+ # Create a logger for the package
8
+ logger = logging.getLogger(__name__)
9
+ # Optional: Provide a default configuration if no handlers are set
10
+ logger.setLevel(logging.WARNING)
@@ -0,0 +1,11 @@
1
+ from .upload import (
2
+ create_folder_level_manifest,
3
+ check_empty_files_folders,
4
+ main_curate_function,
5
+ main_curate_function_progress,
6
+ generate_manifest_file_locally,
7
+ generate_manifest_file_data,
8
+ check_json_size,
9
+ clean_json_structure,
10
+ check_server_access_to_files,
11
+ )
@@ -0,0 +1 @@
1
+ from .manifest_session import UploadManifestSession
@@ -0,0 +1,146 @@
1
+ from pennsieve2 import Pennsieve
2
+ import re
3
+ import math
4
+
5
+
6
+
7
+ class UploadManifestSession:
8
+
9
+ # properties
10
+ df_mid = None
11
+ ps = None
12
+
13
+ # upload values
14
+ main_total_generate_dataset_size = None
15
+ total_files_to_upload = None
16
+ elapsed_time = None
17
+
18
+ # rename values
19
+ renaming_files_flow = False
20
+ rename_total_files = None
21
+ list_of_files_to_rename = None
22
+
23
+ def __init__(self):
24
+ self.df_mid = None
25
+
26
+ def set_df_mid(self, id):
27
+ self.df_mid = id
28
+
29
+ def get_df_mid(self):
30
+ return self.df_mid
31
+
32
+ def set_elapsed_time(self, time):
33
+ self.elapsed_time = time
34
+
35
+ def get_elapsed_time(self):
36
+ return self.elapsed_time
37
+
38
+ def set_main_total_generate_dataset_size(self, size):
39
+ self.main_total_generate_dataset_size = size
40
+
41
+ def get_main_total_generate_dataset_size(self):
42
+ return self.main_total_generate_dataset_size
43
+
44
+ def set_total_files_to_upload(self, count):
45
+ self.total_files_to_upload = count
46
+
47
+ def get_total_files_to_upload(self):
48
+ return self.total_files_to_upload
49
+
50
+ def set_rename_total_files(self, count):
51
+ self.rename_total_files = count
52
+
53
+ def get_rename_total_files(self):
54
+ return self.rename_total_files
55
+
56
+ def set_list_of_files_to_rename(self, list):
57
+ self.list_of_files_to_rename = list
58
+
59
+ def get_list_of_files_to_rename(self):
60
+ return self.list_of_files_to_rename
61
+
62
+ def set_renaming_files_flow(self, value):
63
+ self.renaming_files_flow = value
64
+
65
+ def get_renaming_files_flow(self):
66
+ return self.renaming_files_flow
67
+
68
+ def df_mid_has_progress(self):
69
+ if self.ps is None:
70
+ self.ps = Pennsieve()
71
+ try:
72
+ self.ps.manifest.sync(self.df_mid)
73
+ except Exception as e:
74
+ return False
75
+
76
+ try:
77
+ mfs = self.ps.list_manifests()
78
+ except Exception as e:
79
+ # there are no manifests created yet
80
+ return False
81
+ return any(mf.id == self.df_mid and mf.status == "Initiated" for mf in mfs)
82
+
83
+ def get_remaining_file_count(self, mid, total_files):
84
+ if self.ps is None:
85
+ self.ps = Pennsieve()
86
+ total_pages = math.ceil(total_files / 1000)
87
+ remaining_files = 0
88
+ offset = 0
89
+ for i in range(total_pages):
90
+ if i >= 1:
91
+ offset += 1000
92
+ file_page = self.ps.manifest.list_files(mid, offset , 1000)
93
+ # if there is no node_id then an upload hasn't started yet - all files are remaining
94
+ # regular expression that searches and counts for every string that has "status: LOCAL" or "status: REGISTERED" or "status: FAILED" in the string
95
+ remaining_files += len(re.findall(r'status: REGISTERED|status: LOCAL|status: FAILED' , str(file_page)))
96
+ return remaining_files
97
+
98
+ def create_obj_from_string(self,s):
99
+ # Split into individual objects
100
+ objects = re.findall(r'file {([^}]*?)}', s, re.DOTALL)
101
+
102
+ # Parse each object
103
+ parsed_objects = []
104
+ for obj in objects:
105
+ # Split into lines and remove empty lines
106
+ lines = [line.strip() for line in obj.split('\n') if line.strip()]
107
+ # Split each line into key and value and create a dictionary
108
+ parsed_object = {line.split(': ')[0]: line.split(': ')[1] for line in lines}
109
+ parsed_objects.append(parsed_object)
110
+
111
+ return parsed_objects
112
+
113
+ def calculate_completed_upload_size(self, mid, bytes_per_file_dict, total_files):
114
+ if self.ps is None:
115
+ self.ps = Pennsieve()
116
+ total_pages = math.ceil(total_files / 1000)
117
+ offset = 0
118
+ total_bytes_uploaded = 0
119
+ for i in range(total_pages):
120
+ if i >= 1:
121
+ offset += 1000
122
+ file_string = self.ps.manifest.list_files(mid, offset , 1000)
123
+ parsed_objects = self.create_obj_from_string(str(file_string))
124
+ for obj in parsed_objects:
125
+ if 'status' not in obj:
126
+ total_bytes_uploaded += 0
127
+ elif obj['status'] in [
128
+ 'UPLOADED',
129
+ 'IMPORTED',
130
+ 'FINALIZED',
131
+ 'VERIFIED',
132
+ ]:
133
+ file_path = obj['source_path']
134
+ # remove the first and last characer of file_path - these are quotation marks
135
+ file_path = file_path[1:-1]
136
+ total_bytes_uploaded += int(bytes_per_file_dict.get(file_path, 0))
137
+
138
+ return total_bytes_uploaded
139
+
140
+
141
+
142
+
143
+
144
+
145
+ # ums = UploadManifestSession()
146
+ # ums.df_mid_has_progress()