pysodafair 0.1.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. pysoda/__init__.py +0 -0
  2. pysoda/constants.py +3 -0
  3. pysoda/core/__init__.py +10 -0
  4. pysoda/core/dataset_generation/__init__.py +11 -0
  5. pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
  6. pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
  7. pysoda/core/dataset_generation/upload.py +3951 -0
  8. pysoda/core/dataset_importing/__init__.py +1 -0
  9. pysoda/core/dataset_importing/import_dataset.py +662 -0
  10. pysoda/core/metadata/__init__.py +20 -0
  11. pysoda/core/metadata/code_description.py +109 -0
  12. pysoda/core/metadata/constants.py +32 -0
  13. pysoda/core/metadata/dataset_description.py +188 -0
  14. pysoda/core/metadata/excel_utils.py +41 -0
  15. pysoda/core/metadata/helpers.py +250 -0
  16. pysoda/core/metadata/manifest.py +112 -0
  17. pysoda/core/metadata/manifest_package/__init__.py +2 -0
  18. pysoda/core/metadata/manifest_package/manifest.py +0 -0
  19. pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
  20. pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
  21. pysoda/core/metadata/performances.py +46 -0
  22. pysoda/core/metadata/resources.py +53 -0
  23. pysoda/core/metadata/samples.py +184 -0
  24. pysoda/core/metadata/sites.py +51 -0
  25. pysoda/core/metadata/subjects.py +172 -0
  26. pysoda/core/metadata/submission.py +91 -0
  27. pysoda/core/metadata/text_metadata.py +47 -0
  28. pysoda/core/metadata_templates/CHANGES +1 -0
  29. pysoda/core/metadata_templates/LICENSE +1 -0
  30. pysoda/core/metadata_templates/README.md +4 -0
  31. pysoda/core/metadata_templates/__init__.py +0 -0
  32. pysoda/core/metadata_templates/code_description.xlsx +0 -0
  33. pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
  34. pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
  35. pysoda/core/metadata_templates/manifest.xlsx +0 -0
  36. pysoda/core/metadata_templates/performances.xlsx +0 -0
  37. pysoda/core/metadata_templates/resources.xlsx +0 -0
  38. pysoda/core/metadata_templates/samples.xlsx +0 -0
  39. pysoda/core/metadata_templates/sites.xlsx +0 -0
  40. pysoda/core/metadata_templates/subjects.xlsx +0 -0
  41. pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
  42. pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
  43. pysoda/core/metadata_templates/submission.xlsx +0 -0
  44. pysoda/core/permissions/__init__.py +1 -0
  45. pysoda/core/permissions/permissions.py +31 -0
  46. pysoda/core/pysoda/__init__.py +2 -0
  47. pysoda/core/pysoda/soda.py +34 -0
  48. pysoda/core/pysoda/soda_object.py +55 -0
  49. pysoda/core/upload_manifests/__init__.py +1 -0
  50. pysoda/core/upload_manifests/upload_manifests.py +37 -0
  51. pysoda/schema/__init__.py +0 -0
  52. pysoda/schema/code_description.json +629 -0
  53. pysoda/schema/dataset_description.json +295 -0
  54. pysoda/schema/manifest.json +60 -0
  55. pysoda/schema/performances.json +44 -0
  56. pysoda/schema/resources.json +39 -0
  57. pysoda/schema/samples.json +97 -0
  58. pysoda/schema/sites.json +38 -0
  59. pysoda/schema/soda_schema.json +664 -0
  60. pysoda/schema/subjects.json +131 -0
  61. pysoda/schema/submission_schema.json +28 -0
  62. pysoda/utils/__init__.py +9 -0
  63. pysoda/utils/authentication.py +381 -0
  64. pysoda/utils/config.py +68 -0
  65. pysoda/utils/exceptions.py +156 -0
  66. pysoda/utils/logger.py +6 -0
  67. pysoda/utils/metadata_utils.py +74 -0
  68. pysoda/utils/pennsieveAgentUtils.py +11 -0
  69. pysoda/utils/pennsieveUtils.py +118 -0
  70. pysoda/utils/profile.py +28 -0
  71. pysoda/utils/schema_validation.py +133 -0
  72. pysoda/utils/time_utils.py +5 -0
  73. pysoda/utils/upload_utils.py +108 -0
  74. pysodafair-0.1.62.dist-info/METADATA +190 -0
  75. pysodafair-0.1.62.dist-info/RECORD +77 -0
  76. pysodafair-0.1.62.dist-info/WHEEL +4 -0
  77. pysodafair-0.1.62.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,112 @@
1
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_MANIFEST, SCHEMA_NAME_MANIFEST
2
+ from .excel_utils import rename_headers, excel_columns
3
+ from openpyxl.styles import Font, PatternFill
4
+ from os.path import join, getsize
5
+ from openpyxl import load_workbook
6
+ import shutil
7
+ from ...utils import validate_schema, get_schema_path
8
+ from .helpers import upload_metadata_file
9
+ from .helpers import get_template_path
10
+ import os
11
+
12
+
13
+ from json import load as json_load
14
+
15
+
16
+
17
+ def create_excel(soda, upload_boolean, local_destination):
18
+ source = get_template_path("manifest.xlsx")
19
+ destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_MANIFEST) if upload_boolean else local_destination
20
+ shutil.copyfile(source, destination)
21
+ wb = load_workbook(destination)
22
+ ws1 = wb["Sheet1"]
23
+ manifest = soda["dataset_metadata"]["manifest_file"]
24
+ # validate_schema(manifest, SCHEMA_NAME_MANIFEST)
25
+ ascii_headers = excel_columns(start_index=0)
26
+ custom_headers_to_column = {}
27
+
28
+ orangeFill = PatternFill(
29
+ start_color="FFD965", end_color="FFD965", fill_type="solid"
30
+ )
31
+
32
+ # Load schema to get standard headers
33
+ schema_path =get_schema_path(SCHEMA_NAME_MANIFEST)
34
+ with open(schema_path, "r") as f:
35
+ schema = json_load(f)
36
+ # The schema is an array, so get the first item's properties
37
+ item_schema = schema["items"][0]
38
+ standard_headers = list(item_schema["properties"].keys())
39
+
40
+ # Write standard headers to the first row
41
+ for idx, header in enumerate(standard_headers):
42
+ ws1[ascii_headers[idx] + "1"] = header.replace("_", " ")
43
+ ws1[ascii_headers[idx] + "1"].font = Font(bold=True, size=12, name="Calibri")
44
+
45
+ row = 2
46
+ for entry in manifest:
47
+ # Write standard fields
48
+ for idx, header in enumerate(standard_headers):
49
+ value = entry.get(header, "")
50
+ if isinstance(value, list):
51
+ value = " ".join(value)
52
+ ws1[ascii_headers[idx] + str(row)] = value
53
+ ws1[ascii_headers[idx] + str(row)].font = Font(bold=False, size=11, name="Arial")
54
+
55
+ # Handle custom fields
56
+ for field_name, field_value in entry.items():
57
+ if field_name in standard_headers:
58
+ continue
59
+ if field_name not in custom_headers_to_column:
60
+ custom_headers_to_column[field_name] = len(custom_headers_to_column) + len(standard_headers)
61
+ col_idx = custom_headers_to_column[field_name]
62
+ ws1[ascii_headers[col_idx] + "1"] = field_name
63
+ ws1[ascii_headers[col_idx] + "1"].fill = orangeFill
64
+ ws1[ascii_headers[col_idx] + "1"].font = Font(bold=True, size=12, name="Calibri")
65
+ col_idx = custom_headers_to_column[field_name]
66
+ ws1[ascii_headers[col_idx] + str(row)] = field_value
67
+ ws1[ascii_headers[col_idx] + str(row)].font = Font(bold=False, size=11, name="Arial")
68
+ row += 1
69
+
70
+ # Rename additional metadata header to Additional Metadata header
71
+ # ws1[ascii_headers[len(standard_headers)] + "1"] = "Additional Metadata"
72
+
73
+ wb.save(destination)
74
+ size = getsize(destination)
75
+ if upload_boolean:
76
+ upload_metadata_file(SDS_FILE_MANIFEST, soda, destination, True)
77
+
78
+ return {"size": size}
79
+
80
+
81
+
82
+
83
+ def load_existing_manifest_file(manifest_file_path):
84
+ # check that a file exists at the path
85
+ if not os.path.exists(manifest_file_path):
86
+ raise FileNotFoundError(f"Manifest file not found at {manifest_file_path}")
87
+
88
+ # load the xlsx file and store its first row as a headers array and the rest of the rows in a data key
89
+ wb = load_workbook(manifest_file_path)
90
+ ws1 = wb["Sheet1"]
91
+ headers = []
92
+ data = []
93
+
94
+ for row in ws1.iter_rows(min_row=1, max_row=1, values_only=True):
95
+ headers = list(row)
96
+
97
+ for row in ws1.iter_rows(min_row=2, values_only=True):
98
+ # grab the item in row 5
99
+ new_row = []
100
+ for col_data in row:
101
+ if isinstance(col_data, list):
102
+ # space separate the list into a string
103
+ col_data = " ".join(col_data)
104
+ new_row.append(col_data)
105
+ data.append(new_row)
106
+
107
+ return {"headers": headers, "data": data}
108
+
109
+
110
+
111
+
112
+
@@ -0,0 +1,2 @@
1
+ from .manifest_import import load_metadata_to_dataframe
2
+ from .manifest_writer import create_high_level_manifest_files, get_auto_generated_manifest_files, create_high_lvl_manifest_files_existing_ps_starting_point
File without changes
@@ -0,0 +1,29 @@
1
+ import requests
2
+ import pandas as pd
3
+ from ....constants import PENNSIEVE_URL
4
+
5
+ def load_metadata_to_dataframe(node_id, file_type, ps_or_token, usecols=None, header=0):
6
+ """
7
+ Given a manifests package id and its storage type - excel or csv - returns a pandas dataframe.
8
+ IMP: Pass in the pennsieve token or pennsieve object to ps_or_token for authentication.
9
+
10
+ Args:
11
+ node_id (str): The id of the manifest package.
12
+ type (str): The type of the manifest - csv or excel.
13
+ ps_or_token (str): The pennsieve token or pennsieve object.
14
+ usecols (list, optional): The columns to be used. Defaults to None.
15
+ header (int, optional): The header row. Defaults to 0.
16
+ """
17
+ payload = {"data": {"nodeIds": [node_id]}}
18
+ headers = { "Content-Type" : "application/json" }
19
+ # headers = create_request_headers(ps_or_token)
20
+ if type(ps_or_token) == str:
21
+ r = requests.post(f"{PENNSIEVE_URL}/zipit/?api_key={ps_or_token}", json=payload, headers=headers)
22
+ else:
23
+ token = ps_or_token.get_user().session_token
24
+ r = requests.post(f"{PENNSIEVE_URL}/zipit/?api_key={token}", json=payload, headers=headers)
25
+
26
+ if file_type == "csv":
27
+ return pd.read_csv(r.content, engine="openpyxl")
28
+ else:
29
+ return pd.read_excel(r.content, engine="openpyxl", usecols=usecols, header=header)