pysodafair 0.1.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysoda/__init__.py +0 -0
- pysoda/constants.py +3 -0
- pysoda/core/__init__.py +10 -0
- pysoda/core/dataset_generation/__init__.py +11 -0
- pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
- pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
- pysoda/core/dataset_generation/upload.py +3951 -0
- pysoda/core/dataset_importing/__init__.py +1 -0
- pysoda/core/dataset_importing/import_dataset.py +662 -0
- pysoda/core/metadata/__init__.py +20 -0
- pysoda/core/metadata/code_description.py +109 -0
- pysoda/core/metadata/constants.py +32 -0
- pysoda/core/metadata/dataset_description.py +188 -0
- pysoda/core/metadata/excel_utils.py +41 -0
- pysoda/core/metadata/helpers.py +250 -0
- pysoda/core/metadata/manifest.py +112 -0
- pysoda/core/metadata/manifest_package/__init__.py +2 -0
- pysoda/core/metadata/manifest_package/manifest.py +0 -0
- pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
- pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
- pysoda/core/metadata/performances.py +46 -0
- pysoda/core/metadata/resources.py +53 -0
- pysoda/core/metadata/samples.py +184 -0
- pysoda/core/metadata/sites.py +51 -0
- pysoda/core/metadata/subjects.py +172 -0
- pysoda/core/metadata/submission.py +91 -0
- pysoda/core/metadata/text_metadata.py +47 -0
- pysoda/core/metadata_templates/CHANGES +1 -0
- pysoda/core/metadata_templates/LICENSE +1 -0
- pysoda/core/metadata_templates/README.md +4 -0
- pysoda/core/metadata_templates/__init__.py +0 -0
- pysoda/core/metadata_templates/code_description.xlsx +0 -0
- pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
- pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
- pysoda/core/metadata_templates/manifest.xlsx +0 -0
- pysoda/core/metadata_templates/performances.xlsx +0 -0
- pysoda/core/metadata_templates/resources.xlsx +0 -0
- pysoda/core/metadata_templates/samples.xlsx +0 -0
- pysoda/core/metadata_templates/sites.xlsx +0 -0
- pysoda/core/metadata_templates/subjects.xlsx +0 -0
- pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
- pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
- pysoda/core/metadata_templates/submission.xlsx +0 -0
- pysoda/core/permissions/__init__.py +1 -0
- pysoda/core/permissions/permissions.py +31 -0
- pysoda/core/pysoda/__init__.py +2 -0
- pysoda/core/pysoda/soda.py +34 -0
- pysoda/core/pysoda/soda_object.py +55 -0
- pysoda/core/upload_manifests/__init__.py +1 -0
- pysoda/core/upload_manifests/upload_manifests.py +37 -0
- pysoda/schema/__init__.py +0 -0
- pysoda/schema/code_description.json +629 -0
- pysoda/schema/dataset_description.json +295 -0
- pysoda/schema/manifest.json +60 -0
- pysoda/schema/performances.json +44 -0
- pysoda/schema/resources.json +39 -0
- pysoda/schema/samples.json +97 -0
- pysoda/schema/sites.json +38 -0
- pysoda/schema/soda_schema.json +664 -0
- pysoda/schema/subjects.json +131 -0
- pysoda/schema/submission_schema.json +28 -0
- pysoda/utils/__init__.py +9 -0
- pysoda/utils/authentication.py +381 -0
- pysoda/utils/config.py +68 -0
- pysoda/utils/exceptions.py +156 -0
- pysoda/utils/logger.py +6 -0
- pysoda/utils/metadata_utils.py +74 -0
- pysoda/utils/pennsieveAgentUtils.py +11 -0
- pysoda/utils/pennsieveUtils.py +118 -0
- pysoda/utils/profile.py +28 -0
- pysoda/utils/schema_validation.py +133 -0
- pysoda/utils/time_utils.py +5 -0
- pysoda/utils/upload_utils.py +108 -0
- pysodafair-0.1.62.dist-info/METADATA +190 -0
- pysodafair-0.1.62.dist-info/RECORD +77 -0
- pysodafair-0.1.62.dist-info/WHEEL +4 -0
- pysodafair-0.1.62.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_MANIFEST, SCHEMA_NAME_MANIFEST
|
|
2
|
+
from .excel_utils import rename_headers, excel_columns
|
|
3
|
+
from openpyxl.styles import Font, PatternFill
|
|
4
|
+
from os.path import join, getsize
|
|
5
|
+
from openpyxl import load_workbook
|
|
6
|
+
import shutil
|
|
7
|
+
from ...utils import validate_schema, get_schema_path
|
|
8
|
+
from .helpers import upload_metadata_file
|
|
9
|
+
from .helpers import get_template_path
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
from json import load as json_load
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def create_excel(soda, upload_boolean, local_destination):
|
|
18
|
+
source = get_template_path("manifest.xlsx")
|
|
19
|
+
destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_MANIFEST) if upload_boolean else local_destination
|
|
20
|
+
shutil.copyfile(source, destination)
|
|
21
|
+
wb = load_workbook(destination)
|
|
22
|
+
ws1 = wb["Sheet1"]
|
|
23
|
+
manifest = soda["dataset_metadata"]["manifest_file"]
|
|
24
|
+
# validate_schema(manifest, SCHEMA_NAME_MANIFEST)
|
|
25
|
+
ascii_headers = excel_columns(start_index=0)
|
|
26
|
+
custom_headers_to_column = {}
|
|
27
|
+
|
|
28
|
+
orangeFill = PatternFill(
|
|
29
|
+
start_color="FFD965", end_color="FFD965", fill_type="solid"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Load schema to get standard headers
|
|
33
|
+
schema_path =get_schema_path(SCHEMA_NAME_MANIFEST)
|
|
34
|
+
with open(schema_path, "r") as f:
|
|
35
|
+
schema = json_load(f)
|
|
36
|
+
# The schema is an array, so get the first item's properties
|
|
37
|
+
item_schema = schema["items"][0]
|
|
38
|
+
standard_headers = list(item_schema["properties"].keys())
|
|
39
|
+
|
|
40
|
+
# Write standard headers to the first row
|
|
41
|
+
for idx, header in enumerate(standard_headers):
|
|
42
|
+
ws1[ascii_headers[idx] + "1"] = header.replace("_", " ")
|
|
43
|
+
ws1[ascii_headers[idx] + "1"].font = Font(bold=True, size=12, name="Calibri")
|
|
44
|
+
|
|
45
|
+
row = 2
|
|
46
|
+
for entry in manifest:
|
|
47
|
+
# Write standard fields
|
|
48
|
+
for idx, header in enumerate(standard_headers):
|
|
49
|
+
value = entry.get(header, "")
|
|
50
|
+
if isinstance(value, list):
|
|
51
|
+
value = " ".join(value)
|
|
52
|
+
ws1[ascii_headers[idx] + str(row)] = value
|
|
53
|
+
ws1[ascii_headers[idx] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
54
|
+
|
|
55
|
+
# Handle custom fields
|
|
56
|
+
for field_name, field_value in entry.items():
|
|
57
|
+
if field_name in standard_headers:
|
|
58
|
+
continue
|
|
59
|
+
if field_name not in custom_headers_to_column:
|
|
60
|
+
custom_headers_to_column[field_name] = len(custom_headers_to_column) + len(standard_headers)
|
|
61
|
+
col_idx = custom_headers_to_column[field_name]
|
|
62
|
+
ws1[ascii_headers[col_idx] + "1"] = field_name
|
|
63
|
+
ws1[ascii_headers[col_idx] + "1"].fill = orangeFill
|
|
64
|
+
ws1[ascii_headers[col_idx] + "1"].font = Font(bold=True, size=12, name="Calibri")
|
|
65
|
+
col_idx = custom_headers_to_column[field_name]
|
|
66
|
+
ws1[ascii_headers[col_idx] + str(row)] = field_value
|
|
67
|
+
ws1[ascii_headers[col_idx] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
68
|
+
row += 1
|
|
69
|
+
|
|
70
|
+
# Rename additional metadata header to Additional Metadata header
|
|
71
|
+
# ws1[ascii_headers[len(standard_headers)] + "1"] = "Additional Metadata"
|
|
72
|
+
|
|
73
|
+
wb.save(destination)
|
|
74
|
+
size = getsize(destination)
|
|
75
|
+
if upload_boolean:
|
|
76
|
+
upload_metadata_file(SDS_FILE_MANIFEST, soda, destination, True)
|
|
77
|
+
|
|
78
|
+
return {"size": size}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def load_existing_manifest_file(manifest_file_path):
|
|
84
|
+
# check that a file exists at the path
|
|
85
|
+
if not os.path.exists(manifest_file_path):
|
|
86
|
+
raise FileNotFoundError(f"Manifest file not found at {manifest_file_path}")
|
|
87
|
+
|
|
88
|
+
# load the xlsx file and store its first row as a headers array and the rest of the rows in a data key
|
|
89
|
+
wb = load_workbook(manifest_file_path)
|
|
90
|
+
ws1 = wb["Sheet1"]
|
|
91
|
+
headers = []
|
|
92
|
+
data = []
|
|
93
|
+
|
|
94
|
+
for row in ws1.iter_rows(min_row=1, max_row=1, values_only=True):
|
|
95
|
+
headers = list(row)
|
|
96
|
+
|
|
97
|
+
for row in ws1.iter_rows(min_row=2, values_only=True):
|
|
98
|
+
# grab the item in row 5
|
|
99
|
+
new_row = []
|
|
100
|
+
for col_data in row:
|
|
101
|
+
if isinstance(col_data, list):
|
|
102
|
+
# space separate the list into a string
|
|
103
|
+
col_data = " ".join(col_data)
|
|
104
|
+
new_row.append(col_data)
|
|
105
|
+
data.append(new_row)
|
|
106
|
+
|
|
107
|
+
return {"headers": headers, "data": data}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
File without changes
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ....constants import PENNSIEVE_URL
|
|
4
|
+
|
|
5
|
+
def load_metadata_to_dataframe(node_id, file_type, ps_or_token, usecols=None, header=0):
|
|
6
|
+
"""
|
|
7
|
+
Given a manifests package id and its storage type - excel or csv - returns a pandas dataframe.
|
|
8
|
+
IMP: Pass in the pennsieve token or pennsieve object to ps_or_token for authentication.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
node_id (str): The id of the manifest package.
|
|
12
|
+
type (str): The type of the manifest - csv or excel.
|
|
13
|
+
ps_or_token (str): The pennsieve token or pennsieve object.
|
|
14
|
+
usecols (list, optional): The columns to be used. Defaults to None.
|
|
15
|
+
header (int, optional): The header row. Defaults to 0.
|
|
16
|
+
"""
|
|
17
|
+
payload = {"data": {"nodeIds": [node_id]}}
|
|
18
|
+
headers = { "Content-Type" : "application/json" }
|
|
19
|
+
# headers = create_request_headers(ps_or_token)
|
|
20
|
+
if type(ps_or_token) == str:
|
|
21
|
+
r = requests.post(f"{PENNSIEVE_URL}/zipit/?api_key={ps_or_token}", json=payload, headers=headers)
|
|
22
|
+
else:
|
|
23
|
+
token = ps_or_token.get_user().session_token
|
|
24
|
+
r = requests.post(f"{PENNSIEVE_URL}/zipit/?api_key={token}", json=payload, headers=headers)
|
|
25
|
+
|
|
26
|
+
if file_type == "csv":
|
|
27
|
+
return pd.read_csv(r.content, engine="openpyxl")
|
|
28
|
+
else:
|
|
29
|
+
return pd.read_excel(r.content, engine="openpyxl", usecols=usecols, header=header)
|