pysodafair 0.1.62__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysodafair-0.1.62/LICENSE +21 -0
- pysodafair-0.1.62/PKG-INFO +190 -0
- pysodafair-0.1.62/README.md +164 -0
- pysodafair-0.1.62/pyproject.toml +26 -0
- pysodafair-0.1.62/pysoda/__init__.py +0 -0
- pysodafair-0.1.62/pysoda/constants.py +3 -0
- pysodafair-0.1.62/pysoda/core/__init__.py +10 -0
- pysodafair-0.1.62/pysoda/core/dataset_generation/__init__.py +11 -0
- pysodafair-0.1.62/pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
- pysodafair-0.1.62/pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
- pysodafair-0.1.62/pysoda/core/dataset_generation/upload.py +3951 -0
- pysodafair-0.1.62/pysoda/core/dataset_importing/__init__.py +1 -0
- pysodafair-0.1.62/pysoda/core/dataset_importing/import_dataset.py +662 -0
- pysodafair-0.1.62/pysoda/core/metadata/__init__.py +20 -0
- pysodafair-0.1.62/pysoda/core/metadata/code_description.py +109 -0
- pysodafair-0.1.62/pysoda/core/metadata/constants.py +32 -0
- pysodafair-0.1.62/pysoda/core/metadata/dataset_description.py +188 -0
- pysodafair-0.1.62/pysoda/core/metadata/excel_utils.py +41 -0
- pysodafair-0.1.62/pysoda/core/metadata/helpers.py +250 -0
- pysodafair-0.1.62/pysoda/core/metadata/manifest.py +112 -0
- pysodafair-0.1.62/pysoda/core/metadata/manifest_package/__init__.py +2 -0
- pysodafair-0.1.62/pysoda/core/metadata/manifest_package/manifest.py +0 -0
- pysodafair-0.1.62/pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
- pysodafair-0.1.62/pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
- pysodafair-0.1.62/pysoda/core/metadata/performances.py +46 -0
- pysodafair-0.1.62/pysoda/core/metadata/resources.py +53 -0
- pysodafair-0.1.62/pysoda/core/metadata/samples.py +184 -0
- pysodafair-0.1.62/pysoda/core/metadata/sites.py +51 -0
- pysodafair-0.1.62/pysoda/core/metadata/subjects.py +172 -0
- pysodafair-0.1.62/pysoda/core/metadata/submission.py +91 -0
- pysodafair-0.1.62/pysoda/core/metadata/text_metadata.py +47 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/CHANGES +1 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/LICENSE +1 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/README.md +4 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/__init__.py +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/code_description.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/manifest.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/performances.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/resources.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/samples.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/sites.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/subjects.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/metadata_templates/submission.xlsx +0 -0
- pysodafair-0.1.62/pysoda/core/permissions/__init__.py +1 -0
- pysodafair-0.1.62/pysoda/core/permissions/permissions.py +31 -0
- pysodafair-0.1.62/pysoda/core/pysoda/__init__.py +2 -0
- pysodafair-0.1.62/pysoda/core/pysoda/soda.py +34 -0
- pysodafair-0.1.62/pysoda/core/pysoda/soda_object.py +55 -0
- pysodafair-0.1.62/pysoda/core/upload_manifests/__init__.py +1 -0
- pysodafair-0.1.62/pysoda/core/upload_manifests/upload_manifests.py +37 -0
- pysodafair-0.1.62/pysoda/schema/__init__.py +0 -0
- pysodafair-0.1.62/pysoda/schema/code_description.json +629 -0
- pysodafair-0.1.62/pysoda/schema/dataset_description.json +295 -0
- pysodafair-0.1.62/pysoda/schema/manifest.json +60 -0
- pysodafair-0.1.62/pysoda/schema/performances.json +44 -0
- pysodafair-0.1.62/pysoda/schema/resources.json +39 -0
- pysodafair-0.1.62/pysoda/schema/samples.json +97 -0
- pysodafair-0.1.62/pysoda/schema/sites.json +38 -0
- pysodafair-0.1.62/pysoda/schema/soda_schema.json +664 -0
- pysodafair-0.1.62/pysoda/schema/subjects.json +131 -0
- pysodafair-0.1.62/pysoda/schema/submission_schema.json +28 -0
- pysodafair-0.1.62/pysoda/utils/__init__.py +9 -0
- pysodafair-0.1.62/pysoda/utils/authentication.py +381 -0
- pysodafair-0.1.62/pysoda/utils/config.py +68 -0
- pysodafair-0.1.62/pysoda/utils/exceptions.py +156 -0
- pysodafair-0.1.62/pysoda/utils/logger.py +6 -0
- pysodafair-0.1.62/pysoda/utils/metadata_utils.py +74 -0
- pysodafair-0.1.62/pysoda/utils/pennsieveAgentUtils.py +11 -0
- pysodafair-0.1.62/pysoda/utils/pennsieveUtils.py +118 -0
- pysodafair-0.1.62/pysoda/utils/profile.py +28 -0
- pysodafair-0.1.62/pysoda/utils/schema_validation.py +133 -0
- pysodafair-0.1.62/pysoda/utils/time_utils.py +5 -0
- pysodafair-0.1.62/pysoda/utils/upload_utils.py +108 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 FAIR Data Innovations Hub
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pysodafair
|
|
3
|
+
Version: 0.1.62
|
|
4
|
+
Summary: Pysoda package for Fairdataihub tools
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Author: Christopher Marroquin
|
|
8
|
+
Author-email: cmarroquin@calmi2.org
|
|
9
|
+
Requires-Python: >=3.6
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.6
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
21
|
+
Requires-Dist: openpyxl
|
|
22
|
+
Project-URL: Homepage, https://github.com/fairdataihub/pysodafair
|
|
23
|
+
Project-URL: Repository, https://github.com/fairdataihub/pysodafair
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# pysoda
|
|
27
|
+
|
|
28
|
+
## Overview
|
|
29
|
+
|
|
30
|
+
Pysoda is a tool for your python workflows that can help you create datasets in compliance with your favorite FAIR(Findable, Accessible, Interoperable, Reusable) data standards. At the moment, pysoda is primarily focused on neuromodulation, neurophysiology, and related data according to the SPARC guidelines that are aimed at making data FAIR. However, we are envisioning to extend the tool to support other standards such as BIDS, FHIR, etc, in the future.
|
|
31
|
+
|
|
32
|
+
Pysoda stems from SODA, a desktop software that simplifies the organization and sharing of data that needs to comply to a FAIR data standard. While using the SODA app can be convenient for most investigators, others with coding proficiency may find it more convenient to implement automated workflows. Given that the backend of SODA contains many functions necessary for preparing and submitting a dataset that is compliant with the SPARC Data Structure (SDS) such as:
|
|
33
|
+
|
|
34
|
+
Creating standard metadata files
|
|
35
|
+
Generating manifest files
|
|
36
|
+
Automatically complying with the file/folder naming conventions
|
|
37
|
+
Validating against the offical SDS validator
|
|
38
|
+
Uploading dataset to Pennsieve with SDS compliance (ignoring empty folders and non-allowed files, avoiding duplicate files and folders, etc.)
|
|
39
|
+
And many more
|
|
40
|
+
|
|
41
|
+
Pysoda makes these functions, which have been thoroughtly tested and validated, easily integratable in automated workflows such that the investigators do not have to re-write them. This will be very similar to the [pyfairdatatools](https://github.com/AI-READI/pyfairdatatools) Python package we are developing for our [AI-READI](https://aireadi.org/) project as part of the NIH Bridge2AI program.
|
|
42
|
+
|
|
43
|
+
## Workflow
|
|
44
|
+
|
|
45
|
+
### Import the pysoda package into your project and initialize the soda object with the supported standard of your choosing
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from pysoda import soda_create
|
|
49
|
+
# initialize the soda_create object
|
|
50
|
+
# Internal note: soda_create returns the typical sodaJSONObj with additional methods for adding data and metadata [not in version 1]
|
|
51
|
+
# It is passed into the module functions just like our sodaJSONObj is passed to the backend of our api
|
|
52
|
+
|
|
53
|
+
soda = soda_create(standard='sds')
|
|
54
|
+
|
|
55
|
+
# add a dataset name to the soda object
|
|
56
|
+
soda.set_dataset_name('my_dataset')
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Structure your data
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# get your base dataset files and folders structure
|
|
66
|
+
dataset_structure = soda.get_dataset_structure()
|
|
67
|
+
|
|
68
|
+
# fill out your dataset structure.
|
|
69
|
+
# NOTE: YOu will want to reference the
|
|
70
|
+
# dataset_structure key in the soda_schema.json file to understand the structure
|
|
71
|
+
# and what is required.
|
|
72
|
+
dataset_structure['folders'] = {
|
|
73
|
+
'data': {
|
|
74
|
+
'files': {
|
|
75
|
+
'file1': {
|
|
76
|
+
'path': '/home/user/file1.txt', 'relativePath': '/data/file1.txt', 'action': 'new'
|
|
77
|
+
},
|
|
78
|
+
'file2': {
|
|
79
|
+
'path': '/home/user/file2.txt', 'relativePath': '/data/file2.txt', 'action': 'new'
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
'folders': {
|
|
83
|
+
'primary': {
|
|
84
|
+
'files': {
|
|
85
|
+
'file3': {
|
|
86
|
+
'path': '/home/user/file3.txt', 'relativePath': '/data/primary/file3.txt', 'action': 'new'
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
'relativePath': '/data'
|
|
92
|
+
},
|
|
93
|
+
'files': {},
|
|
94
|
+
'relativePath': '/'
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# map your imported data files to the entity structure defined in the soda schema [here](soda_schema.py)
|
|
99
|
+
entity_structure = soda.get_entity_structure()
|
|
100
|
+
|
|
101
|
+
# fill out your entity structure using the schema as a reference
|
|
102
|
+
# NOTE: data model not finalized
|
|
103
|
+
entity = {'subjectId': 'sub-1', 'metadata': {'age': '1 year', 'sex': 'female'}, 'data-file': '/data/file1.txt'}
|
|
104
|
+
entity_structure['subjects'].append(entity)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Create your dataset metadata
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
|
|
114
|
+
# import the metadata module from the soda_create package
|
|
115
|
+
from pysoda import metadata
|
|
116
|
+
|
|
117
|
+
# define your submission metadata
|
|
118
|
+
submission = soda.get_submission_metadata()
|
|
119
|
+
|
|
120
|
+
submission['consortium-data-standard'] = 'standard'
|
|
121
|
+
submission['funding-consortium'] = 'SPARC'
|
|
122
|
+
submission['award-number'] = '12345'
|
|
123
|
+
submission['milestone-acheieved'] = ['one', 'two', 'three']
|
|
124
|
+
submission['filepath'] = 'path/to/destination'
|
|
125
|
+
|
|
126
|
+
# create the excel file for the submission metadata
|
|
127
|
+
metadata.submission.create(soda, file_output_location='path/to/output')
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# repeat
|
|
131
|
+
metadata.subjects.create(soda, file_output_location='path/to/output')
|
|
132
|
+
metadata.samples.create(soda, file_output_location='path/to/output')
|
|
133
|
+
metadata.performances.create(soda, file_output_location='path/to/output')
|
|
134
|
+
metadata.sites.create(soda, file_output_location='path/to/output')
|
|
135
|
+
metadata.code.create(soda, file_output_location='path/to/output')
|
|
136
|
+
metadata.manifest.create(soda, file_output_location='path/to/output')
|
|
137
|
+
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Generate your dataset
|
|
141
|
+
|
|
142
|
+
#### Generate locally
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
|
|
146
|
+
from pysoda import generate
|
|
147
|
+
|
|
148
|
+
# set the generation options
|
|
149
|
+
soda.set_generate_dataset_options(destination='local', path='path/to/destination', dataset_name='my_dataset')
|
|
150
|
+
|
|
151
|
+
# generate the dataset
|
|
152
|
+
generate(soda)
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### Generate on Pennsieve
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from pysoda import generate
|
|
160
|
+
|
|
161
|
+
# provide the Pennsieve API Key and secret
|
|
162
|
+
soda.upload.auth(api_key='api, api_secret='api_secret)
|
|
163
|
+
|
|
164
|
+
# upload new dataset
|
|
165
|
+
# NOTE: You will need to download and start the Pennsieve Agent [here](https://app.pennsieve.io) to upload data to Pennsieve
|
|
166
|
+
dataset_id = generate(soda) # returns dataset_id
|
|
167
|
+
|
|
168
|
+
# OR upload to an existing pennsieve dataset
|
|
169
|
+
# set the generate options in the soda object
|
|
170
|
+
soda.set_generate_dataset_options(destination='existing-ps', if_existing="merge", if_existing_files="replace", dataset_id=dataset_id)
|
|
171
|
+
update_existing(soda)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Utilities
|
|
175
|
+
|
|
176
|
+
### Compare a dataset on Pennsieve and a local dataset for differences
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from pysoda import compare
|
|
180
|
+
|
|
181
|
+
# provide the Pennsieve API Key and secret
|
|
182
|
+
soda.upload.auth(api_key='api, api_secret='api_secret)
|
|
183
|
+
|
|
184
|
+
# import the dataset from Pennsieve
|
|
185
|
+
soda.import_dataset(dataset_id='dataset_id')
|
|
186
|
+
|
|
187
|
+
# compare the Pennsieve dataset with the local dataset
|
|
188
|
+
results = compare(soda, local_dataset_location='path/to/local/dataset')
|
|
189
|
+
```
|
|
190
|
+
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# pysoda
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Pysoda is a tool for your python workflows that can help you create datasets in compliance with your favorite FAIR(Findable, Accessible, Interoperable, Reusable) data standards. At the moment, pysoda is primarily focused on neuromodulation, neurophysiology, and related data according to the SPARC guidelines that are aimed at making data FAIR. However, we are envisioning to extend the tool to support other standards such as BIDS, FHIR, etc, in the future.
|
|
6
|
+
|
|
7
|
+
Pysoda stems from SODA, a desktop software that simplifies the organization and sharing of data that needs to comply to a FAIR data standard. While using the SODA app can be convenient for most investigators, others with coding proficiency may find it more convenient to implement automated workflows. Given that the backend of SODA contains many functions necessary for preparing and submitting a dataset that is compliant with the SPARC Data Structure (SDS) such as:
|
|
8
|
+
|
|
9
|
+
Creating standard metadata files
|
|
10
|
+
Generating manifest files
|
|
11
|
+
Automatically complying with the file/folder naming conventions
|
|
12
|
+
Validating against the offical SDS validator
|
|
13
|
+
Uploading dataset to Pennsieve with SDS compliance (ignoring empty folders and non-allowed files, avoiding duplicate files and folders, etc.)
|
|
14
|
+
And many more
|
|
15
|
+
|
|
16
|
+
Pysoda makes these functions, which have been thoroughtly tested and validated, easily integratable in automated workflows such that the investigators do not have to re-write them. This will be very similar to the [pyfairdatatools](https://github.com/AI-READI/pyfairdatatools) Python package we are developing for our [AI-READI](https://aireadi.org/) project as part of the NIH Bridge2AI program.
|
|
17
|
+
|
|
18
|
+
## Workflow
|
|
19
|
+
|
|
20
|
+
### Import the pysoda package into your project and initialize the soda object with the supported standard of your choosing
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from pysoda import soda_create
|
|
24
|
+
# initialize the soda_create object
|
|
25
|
+
# Internal note: soda_create returns the typical sodaJSONObj with additional methods for adding data and metadata [not in version 1]
|
|
26
|
+
# It is passed into the module functions just like our sodaJSONObj is passed to the backend of our api
|
|
27
|
+
|
|
28
|
+
soda = soda_create(standard='sds')
|
|
29
|
+
|
|
30
|
+
# add a dataset name to the soda object
|
|
31
|
+
soda.set_dataset_name('my_dataset')
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Structure your data
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# get your base dataset files and folders structure
|
|
41
|
+
dataset_structure = soda.get_dataset_structure()
|
|
42
|
+
|
|
43
|
+
# fill out your dataset structure.
|
|
44
|
+
# NOTE: YOu will want to reference the
|
|
45
|
+
# dataset_structure key in the soda_schema.json file to understand the structure
|
|
46
|
+
# and what is required.
|
|
47
|
+
dataset_structure['folders'] = {
|
|
48
|
+
'data': {
|
|
49
|
+
'files': {
|
|
50
|
+
'file1': {
|
|
51
|
+
'path': '/home/user/file1.txt', 'relativePath': '/data/file1.txt', 'action': 'new'
|
|
52
|
+
},
|
|
53
|
+
'file2': {
|
|
54
|
+
'path': '/home/user/file2.txt', 'relativePath': '/data/file2.txt', 'action': 'new'
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
'folders': {
|
|
58
|
+
'primary': {
|
|
59
|
+
'files': {
|
|
60
|
+
'file3': {
|
|
61
|
+
'path': '/home/user/file3.txt', 'relativePath': '/data/primary/file3.txt', 'action': 'new'
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
'relativePath': '/data'
|
|
67
|
+
},
|
|
68
|
+
'files': {},
|
|
69
|
+
'relativePath': '/'
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# map your imported data files to the entity structure defined in the soda schema [here](soda_schema.py)
|
|
74
|
+
entity_structure = soda.get_entity_structure()
|
|
75
|
+
|
|
76
|
+
# fill out your entity structure using the schema as a reference
|
|
77
|
+
# NOTE: data model not finalized
|
|
78
|
+
entity = {'subjectId': 'sub-1', 'metadata': {'age': '1 year', 'sex': 'female'}, 'data-file': '/data/file1.txt'}
|
|
79
|
+
entity_structure['subjects'].append(entity)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Create your dataset metadata
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
|
|
89
|
+
# import the metadata module from the soda_create package
|
|
90
|
+
from pysoda import metadata
|
|
91
|
+
|
|
92
|
+
# define your submission metadata
|
|
93
|
+
submission = soda.get_submission_metadata()
|
|
94
|
+
|
|
95
|
+
submission['consortium-data-standard'] = 'standard'
|
|
96
|
+
submission['funding-consortium'] = 'SPARC'
|
|
97
|
+
submission['award-number'] = '12345'
|
|
98
|
+
submission['milestone-acheieved'] = ['one', 'two', 'three']
|
|
99
|
+
submission['filepath'] = 'path/to/destination'
|
|
100
|
+
|
|
101
|
+
# create the excel file for the submission metadata
|
|
102
|
+
metadata.submission.create(soda, file_output_location='path/to/output')
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# repeat
|
|
106
|
+
metadata.subjects.create(soda, file_output_location='path/to/output')
|
|
107
|
+
metadata.samples.create(soda, file_output_location='path/to/output')
|
|
108
|
+
metadata.performances.create(soda, file_output_location='path/to/output')
|
|
109
|
+
metadata.sites.create(soda, file_output_location='path/to/output')
|
|
110
|
+
metadata.code.create(soda, file_output_location='path/to/output')
|
|
111
|
+
metadata.manifest.create(soda, file_output_location='path/to/output')
|
|
112
|
+
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Generate your dataset
|
|
116
|
+
|
|
117
|
+
#### Generate locally
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
|
|
121
|
+
from pysoda import generate
|
|
122
|
+
|
|
123
|
+
# set the generation options
|
|
124
|
+
soda.set_generate_dataset_options(destination='local', path='path/to/destination', dataset_name='my_dataset')
|
|
125
|
+
|
|
126
|
+
# generate the dataset
|
|
127
|
+
generate(soda)
|
|
128
|
+
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
#### Generate on Pennsieve
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from pysoda import generate
|
|
135
|
+
|
|
136
|
+
# provide the Pennsieve API Key and secret
|
|
137
|
+
soda.upload.auth(api_key='api, api_secret='api_secret)
|
|
138
|
+
|
|
139
|
+
# upload new dataset
|
|
140
|
+
# NOTE: You will need to download and start the Pennsieve Agent [here](https://app.pennsieve.io) to upload data to Pennsieve
|
|
141
|
+
dataset_id = generate(soda) # returns dataset_id
|
|
142
|
+
|
|
143
|
+
# OR upload to an existing pennsieve dataset
|
|
144
|
+
# set the generate options in the soda object
|
|
145
|
+
soda.set_generate_dataset_options(destination='existing-ps', if_existing="merge", if_existing_files="replace", dataset_id=dataset_id)
|
|
146
|
+
update_existing(soda)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Utilities
|
|
150
|
+
|
|
151
|
+
### Compare a dataset on Pennsieve and a local dataset for differences
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
from pysoda import compare
|
|
155
|
+
|
|
156
|
+
# provide the Pennsieve API Key and secret
|
|
157
|
+
soda.upload.auth(api_key='api, api_secret='api_secret)
|
|
158
|
+
|
|
159
|
+
# import the dataset from Pennsieve
|
|
160
|
+
soda.import_dataset(dataset_id='dataset_id')
|
|
161
|
+
|
|
162
|
+
# compare the Pennsieve dataset with the local dataset
|
|
163
|
+
results = compare(soda, local_dataset_location='path/to/local/dataset')
|
|
164
|
+
```
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "pysodafair"
|
|
3
|
+
version = "0.1.62"
|
|
4
|
+
description = "Pysoda package for Fairdataihub tools"
|
|
5
|
+
authors = ["Christopher Marroquin <cmarroquin@calmi2.org>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
homepage = "https://github.com/fairdataihub/pysodafair"
|
|
9
|
+
repository = "https://github.com/fairdataihub/pysodafair"
|
|
10
|
+
packages = [
|
|
11
|
+
{ include = "pysoda" }
|
|
12
|
+
]
|
|
13
|
+
include = [
|
|
14
|
+
"pysoda/core/metadata_templates/*",
|
|
15
|
+
"pysoda/core/metadata_templates/*.xlsx",
|
|
16
|
+
"pysoda/schema/*.json"
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[tool.poetry.dependencies]
|
|
20
|
+
python = ">=3.6"
|
|
21
|
+
openpyxl = "*"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["poetry-core"]
|
|
26
|
+
build-backend = "poetry.core.masonry.api"
|
|
File without changes
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .upload import (
|
|
2
|
+
create_folder_level_manifest,
|
|
3
|
+
check_empty_files_folders,
|
|
4
|
+
main_curate_function,
|
|
5
|
+
main_curate_function_progress,
|
|
6
|
+
generate_manifest_file_locally,
|
|
7
|
+
generate_manifest_file_data,
|
|
8
|
+
check_json_size,
|
|
9
|
+
clean_json_structure,
|
|
10
|
+
check_server_access_to_files,
|
|
11
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .manifest_session import UploadManifestSession
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from pennsieve2 import Pennsieve
|
|
2
|
+
import re
|
|
3
|
+
import math
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class UploadManifestSession:
|
|
8
|
+
|
|
9
|
+
# properties
|
|
10
|
+
df_mid = None
|
|
11
|
+
ps = None
|
|
12
|
+
|
|
13
|
+
# upload values
|
|
14
|
+
main_total_generate_dataset_size = None
|
|
15
|
+
total_files_to_upload = None
|
|
16
|
+
elapsed_time = None
|
|
17
|
+
|
|
18
|
+
# rename values
|
|
19
|
+
renaming_files_flow = False
|
|
20
|
+
rename_total_files = None
|
|
21
|
+
list_of_files_to_rename = None
|
|
22
|
+
|
|
23
|
+
def __init__(self):
|
|
24
|
+
self.df_mid = None
|
|
25
|
+
|
|
26
|
+
def set_df_mid(self, id):
|
|
27
|
+
self.df_mid = id
|
|
28
|
+
|
|
29
|
+
def get_df_mid(self):
|
|
30
|
+
return self.df_mid
|
|
31
|
+
|
|
32
|
+
def set_elapsed_time(self, time):
|
|
33
|
+
self.elapsed_time = time
|
|
34
|
+
|
|
35
|
+
def get_elapsed_time(self):
|
|
36
|
+
return self.elapsed_time
|
|
37
|
+
|
|
38
|
+
def set_main_total_generate_dataset_size(self, size):
|
|
39
|
+
self.main_total_generate_dataset_size = size
|
|
40
|
+
|
|
41
|
+
def get_main_total_generate_dataset_size(self):
|
|
42
|
+
return self.main_total_generate_dataset_size
|
|
43
|
+
|
|
44
|
+
def set_total_files_to_upload(self, count):
|
|
45
|
+
self.total_files_to_upload = count
|
|
46
|
+
|
|
47
|
+
def get_total_files_to_upload(self):
|
|
48
|
+
return self.total_files_to_upload
|
|
49
|
+
|
|
50
|
+
def set_rename_total_files(self, count):
|
|
51
|
+
self.rename_total_files = count
|
|
52
|
+
|
|
53
|
+
def get_rename_total_files(self):
|
|
54
|
+
return self.rename_total_files
|
|
55
|
+
|
|
56
|
+
def set_list_of_files_to_rename(self, list):
|
|
57
|
+
self.list_of_files_to_rename = list
|
|
58
|
+
|
|
59
|
+
def get_list_of_files_to_rename(self):
|
|
60
|
+
return self.list_of_files_to_rename
|
|
61
|
+
|
|
62
|
+
def set_renaming_files_flow(self, value):
|
|
63
|
+
self.renaming_files_flow = value
|
|
64
|
+
|
|
65
|
+
def get_renaming_files_flow(self):
|
|
66
|
+
return self.renaming_files_flow
|
|
67
|
+
|
|
68
|
+
def df_mid_has_progress(self):
|
|
69
|
+
if self.ps is None:
|
|
70
|
+
self.ps = Pennsieve()
|
|
71
|
+
try:
|
|
72
|
+
self.ps.manifest.sync(self.df_mid)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
mfs = self.ps.list_manifests()
|
|
78
|
+
except Exception as e:
|
|
79
|
+
# there are no manifests created yet
|
|
80
|
+
return False
|
|
81
|
+
return any(mf.id == self.df_mid and mf.status == "Initiated" for mf in mfs)
|
|
82
|
+
|
|
83
|
+
def get_remaining_file_count(self, mid, total_files):
|
|
84
|
+
if self.ps is None:
|
|
85
|
+
self.ps = Pennsieve()
|
|
86
|
+
total_pages = math.ceil(total_files / 1000)
|
|
87
|
+
remaining_files = 0
|
|
88
|
+
offset = 0
|
|
89
|
+
for i in range(total_pages):
|
|
90
|
+
if i >= 1:
|
|
91
|
+
offset += 1000
|
|
92
|
+
file_page = self.ps.manifest.list_files(mid, offset , 1000)
|
|
93
|
+
# if there is no node_id then an upload hasn't started yet - all files are remaining
|
|
94
|
+
# regular expression that searches and counts for every string that has "status: LOCAL" or "status: REGISTERED" or "status: FAILED" in the string
|
|
95
|
+
remaining_files += len(re.findall(r'status: REGISTERED|status: LOCAL|status: FAILED' , str(file_page)))
|
|
96
|
+
return remaining_files
|
|
97
|
+
|
|
98
|
+
def create_obj_from_string(self,s):
|
|
99
|
+
# Split into individual objects
|
|
100
|
+
objects = re.findall(r'file {([^}]*?)}', s, re.DOTALL)
|
|
101
|
+
|
|
102
|
+
# Parse each object
|
|
103
|
+
parsed_objects = []
|
|
104
|
+
for obj in objects:
|
|
105
|
+
# Split into lines and remove empty lines
|
|
106
|
+
lines = [line.strip() for line in obj.split('\n') if line.strip()]
|
|
107
|
+
# Split each line into key and value and create a dictionary
|
|
108
|
+
parsed_object = {line.split(': ')[0]: line.split(': ')[1] for line in lines}
|
|
109
|
+
parsed_objects.append(parsed_object)
|
|
110
|
+
|
|
111
|
+
return parsed_objects
|
|
112
|
+
|
|
113
|
+
def calculate_completed_upload_size(self, mid, bytes_per_file_dict, total_files):
|
|
114
|
+
if self.ps is None:
|
|
115
|
+
self.ps = Pennsieve()
|
|
116
|
+
total_pages = math.ceil(total_files / 1000)
|
|
117
|
+
offset = 0
|
|
118
|
+
total_bytes_uploaded = 0
|
|
119
|
+
for i in range(total_pages):
|
|
120
|
+
if i >= 1:
|
|
121
|
+
offset += 1000
|
|
122
|
+
file_string = self.ps.manifest.list_files(mid, offset , 1000)
|
|
123
|
+
parsed_objects = self.create_obj_from_string(str(file_string))
|
|
124
|
+
for obj in parsed_objects:
|
|
125
|
+
if 'status' not in obj:
|
|
126
|
+
total_bytes_uploaded += 0
|
|
127
|
+
elif obj['status'] in [
|
|
128
|
+
'UPLOADED',
|
|
129
|
+
'IMPORTED',
|
|
130
|
+
'FINALIZED',
|
|
131
|
+
'VERIFIED',
|
|
132
|
+
]:
|
|
133
|
+
file_path = obj['source_path']
|
|
134
|
+
# remove the first and last characer of file_path - these are quotation marks
|
|
135
|
+
file_path = file_path[1:-1]
|
|
136
|
+
total_bytes_uploaded += int(bytes_per_file_dict.get(file_path, 0))
|
|
137
|
+
|
|
138
|
+
return total_bytes_uploaded
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ums = UploadManifestSession()
|
|
146
|
+
# ums.df_mid_has_progress()
|