dcicutils 8.8.0__tar.gz → 8.8.0.1b1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/PKG-INFO +1 -1
  2. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/portal_utils.py +4 -4
  3. dcicutils-8.8.0.1b1/dcicutils/scripts/view_portal_object.py +545 -0
  4. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/structured_data.py +114 -28
  5. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/pyproject.toml +1 -1
  6. dcicutils-8.8.0/dcicutils/scripts/view_portal_object.py +0 -198
  7. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/LICENSE.txt +0 -0
  8. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/README.rst +0 -0
  9. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/__init__.py +0 -0
  10. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/base.py +0 -0
  11. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/beanstalk_utils.py +0 -0
  12. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/bundle_utils.py +0 -0
  13. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/captured_output.py +0 -0
  14. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/cloudformation_utils.py +0 -0
  15. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/codebuild_utils.py +0 -0
  16. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/command_utils.py +0 -0
  17. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/common.py +0 -0
  18. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/contribution_scripts.py +0 -0
  19. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/contribution_utils.py +0 -0
  20. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/creds_utils.py +0 -0
  21. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/data_readers.py +0 -0
  22. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/data_utils.py +0 -0
  23. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/datetime_utils.py +0 -0
  24. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/deployment_utils.py +0 -0
  25. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/diff_utils.py +0 -0
  26. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/docker_utils.py +0 -0
  27. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/ecr_scripts.py +0 -0
  28. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/ecr_utils.py +0 -0
  29. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/ecs_utils.py +0 -0
  30. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/env_base.py +0 -0
  31. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/env_manager.py +0 -0
  32. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/env_scripts.py +0 -0
  33. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/env_utils.py +0 -0
  34. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/env_utils_legacy.py +0 -0
  35. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/es_utils.py +0 -0
  36. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/exceptions.py +0 -0
  37. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/ff_mocks.py +0 -0
  38. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/ff_utils.py +0 -0
  39. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/file_utils.py +0 -0
  40. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/function_cache_decorator.py +0 -0
  41. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/glacier_utils.py +0 -0
  42. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/jh_utils.py +0 -0
  43. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/kibana/dashboards.json +0 -0
  44. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/kibana/readme.md +0 -0
  45. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/lang_utils.py +0 -0
  46. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  47. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  48. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  49. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  50. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  51. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  52. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/license_utils.py +0 -0
  53. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/log_utils.py +0 -0
  54. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/misc_utils.py +0 -0
  55. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/obfuscation_utils.py +0 -0
  56. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/opensearch_utils.py +0 -0
  57. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/portal_object_utils.py +0 -0
  58. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/project_utils.py +0 -0
  59. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/qa_checkers.py +0 -0
  60. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/qa_utils.py +0 -0
  61. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/redis_tools.py +0 -0
  62. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/redis_utils.py +0 -0
  63. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/s3_utils.py +0 -0
  64. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/schema_utils.py +0 -0
  65. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/scripts/publish_to_pypi.py +0 -0
  66. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/scripts/run_license_checker.py +0 -0
  67. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/secrets_utils.py +0 -0
  68. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/sheet_utils.py +0 -0
  69. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/snapshot_utils.py +0 -0
  70. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/ssl_certificate_utils.py +0 -0
  71. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/task_utils.py +0 -0
  72. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/tmpfile_utils.py +0 -0
  73. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/trace_utils.py +0 -0
  74. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/validation_utils.py +0 -0
  75. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/variant_utils.py +0 -0
  76. {dcicutils-8.8.0 → dcicutils-8.8.0.1b1}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.0
3
+ Version: 8.8.0.1b1
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -331,15 +331,15 @@ class Portal:
331
331
  Returns the "super type map" for all of the known schemas (via /profiles).
332
332
  This is a dictionary with property names which are all known schema type names which
333
333
  have (one or more) sub-types, and the value of each such property name is an array
334
- of all of those sub-types (direct and all descendents), in breadth first order.
334
+ of all of those sub-type names (direct and all descendents), in breadth first order.
335
335
  """
336
336
  def list_breadth_first(super_type_map: dict, super_type_name: str) -> dict:
337
337
  result = []
338
338
  queue = deque(super_type_map.get(super_type_name, []))
339
339
  while queue:
340
- result.append(sub_type_name := queue.popleft())
341
- if sub_type_name in super_type_map:
342
- queue.extend(super_type_map[sub_type_name])
340
+ result.append(subtype_name := queue.popleft())
341
+ if subtype_name in super_type_map:
342
+ queue.extend(super_type_map[subtype_name])
343
343
  return result
344
344
  if not (schemas := self.get_schemas()):
345
345
  return {}
@@ -0,0 +1,545 @@
1
+ # ------------------------------------------------------------------------------------------------------
2
+ # Command-line utility to retrieve and print the given object (UUID) from a SMaHT/CGAP/Fourfront Portal.
3
+ # ------------------------------------------------------------------------------------------------------
4
+ # Example command:
5
+ # view-portal-object 4483b19d-62e7-4e7f-a211-0395343a35df --yaml
6
+ #
7
+ # Example output:
8
+ # '@context': /terms/
9
+ # '@id': /access-keys/3968e38e-c11f-472e-8531-8650e2e296d4/
10
+ # '@type':
11
+ # - AccessKey
12
+ # - Item
13
+ # access_key_id: NSVCZ75O
14
+ # date_created: '2023-09-06T13:11:59.704005+00:00'
15
+ # description: Manually generated local access-key for testing.
16
+ # display_title: AccessKey from 2023-09-06
17
+ # expiration_date: '2023-12-05T13:11:59.714106'
18
+ # last_modified:
19
+ # date_modified: '2023-09-06T13:11:59.711367+00:00'
20
+ # modified_by:
21
+ # '@id': /users/3202fd57-44d2-44fb-a131-afb1e43d8ae5/
22
+ # '@type':
23
+ # - User
24
+ # - Item
25
+ # status: current
26
+ # uuid: 3202fd57-44d2-44fb-a131-afb1e43d8ae5
27
+ # principals_allowed:
28
+ # edit:
29
+ # - group.admin
30
+ # - userid.74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68
31
+ # view:
32
+ # - group.admin
33
+ # - group.read-only-admin
34
+ # - userid.74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68
35
+ # schema_version: '1'
36
+ # status: current
37
+ # user:
38
+ # '@id': /users/74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68/
39
+ # '@type':
40
+ # - User
41
+ # - Item
42
+ # display_title: David Michaels
43
+ # principals_allowed:
44
+ # edit:
45
+ # - group.admin
46
+ # view:
47
+ # - group.admin
48
+ # - group.read-only-admin
49
+ # status: current
50
+ # uuid: 74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68
51
+ # uuid: 3968e38e-c11f-472e-8531-8650e2e296d4
52
+ #
53
+ # Note that instead of a uuid you can also actually use a path, for example:
54
+ # view-local-object /file-formats/vcf_gz_tbi
55
+ #
56
+ # --------------------------------------------------------------------------------------------------
57
+
58
+ import argparse
59
+ from functools import lru_cache
60
+ import json
61
+ import pyperclip
62
+ import os
63
+ import sys
64
+ from typing import Callable, List, Optional, Tuple
65
+ import yaml
66
+ from dcicutils.captured_output import captured_output, uncaptured_output
67
+ from dcicutils.misc_utils import get_error_message, is_uuid, PRINT
68
+ from dcicutils.portal_utils import Portal
69
+
70
+
71
+ # Schema properties to ignore (by default) for the view schema usage.
72
+ _SCHEMAS_IGNORE_PROPERTIES = [
73
+ "date_created",
74
+ "last_modified",
75
+ "principals_allowed",
76
+ "submitted_by",
77
+ "schema_version"
78
+ ]
79
+
80
+
81
+ def main():
82
+
83
+ parser = argparse.ArgumentParser(description="View Portal object.")
84
+ parser.add_argument("uuid", type=str,
85
+ help=f"The uuid (or path) of the object to fetch and view. ")
86
+ parser.add_argument("--ini", type=str, required=False, default=None,
87
+ help=f"Name of the application .ini file.")
88
+ parser.add_argument("--env", "-e", type=str, required=False, default=None,
89
+ help=f"Environment name (key from ~/.smaht-keys.json).")
90
+ parser.add_argument("--server", "-s", type=str, required=False, default=None,
91
+ help=f"Environment server name (server from key in ~/.smaht-keys.json).")
92
+ parser.add_argument("--app", type=str, required=False, default=None,
93
+ help=f"Application name (one of: smaht, cgap, fourfront).")
94
+ parser.add_argument("--schema", action="store_true", required=False, default=False,
95
+ help="View named schema rather than object.")
96
+ parser.add_argument("--all", action="store_true", required=False, default=False,
97
+ help="Include all properties for schema usage.")
98
+ parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.")
99
+ parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.")
100
+ parser.add_argument("--database", action="store_true", required=False, default=False,
101
+ help="Read from database output.")
102
+ parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.")
103
+ parser.add_argument("--copy", "-c", action="store_true", required=False, default=False,
104
+ help="Copy object data to clipboard.")
105
+ parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.")
106
+ parser.add_argument("--more-details", action="store_true", required=False, default=False,
107
+ help="More detailed output.")
108
+ parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.")
109
+ parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.")
110
+ args = parser.parse_args()
111
+
112
+ if args.more_details:
113
+ args.details = True
114
+
115
+ portal = _create_portal(ini=args.ini, env=args.env or os.environ.get("SMAHT_ENV"),
116
+ server=args.server, app=args.app, verbose=args.verbose, debug=args.debug)
117
+
118
+ if args.uuid.lower() == "schemas" or args.uuid.lower() == "schema":
119
+ _print_all_schema_names(portal=portal, details=args.details,
120
+ more_details=args.more_details, all=args.all,
121
+ tree=args.tree, raw=args.raw, raw_yaml=args.yaml)
122
+ return
123
+ elif args.uuid.lower() == "info": # TODO: need word for what consortiums and submission centers are collectively
124
+ if consortia := portal.get_metadata("/consortia?limit=1000"):
125
+ _print("Known Consortia:")
126
+ consortia = sorted(consortia.get("@graph", []), key=lambda key: key.get("identifier"))
127
+ for consortium in consortia:
128
+ if ((consortium_name := consortium.get("identifier")) and
129
+ (consortium_uuid := consortium.get("uuid"))): # noqa
130
+ _print(f"- {consortium_name}: {consortium_uuid}")
131
+ if submission_centers := portal.get_metadata("/submission-centers?limit=1000"):
132
+ _print("Known Submission Centers:")
133
+ submission_centers = sorted(submission_centers.get("@graph", []), key=lambda key: key.get("identifier"))
134
+ for submission_center in submission_centers:
135
+ if ((submission_center_name := submission_center.get("identifier")) and
136
+ (submission_center_uuid := submission_center.get("uuid"))): # noqa
137
+ _print(f"- {submission_center_name}: {submission_center_uuid}")
138
+ try:
139
+ if file_formats := portal.get_metadata("/file-formats?limit=1000"):
140
+ _print("Known File Formats:")
141
+ file_formats = sorted(file_formats.get("@graph", []), key=lambda key: key.get("identifier"))
142
+ for file_format in file_formats:
143
+ if ((file_format_name := file_format.get("identifier")) and
144
+ (file_format_uuid := file_format.get("uuid"))): # noqa
145
+ _print(f"- {file_format_name}: {file_format_uuid}")
146
+ except Exception:
147
+ _print("Known File Formats: None")
148
+ return
149
+
150
+ if _is_maybe_schema_name(args.uuid):
151
+ args.schema = True
152
+
153
+ if args.schema:
154
+ schema, schema_name = _get_schema(portal, args.uuid)
155
+ if schema:
156
+ if args.copy:
157
+ pyperclip.copy(json.dumps(schema, indent=4))
158
+ if not args.raw:
159
+ if parent_schema_name := _get_parent_schema_name(schema):
160
+ if schema.get("isAbstract") is True:
161
+ _print(f"{schema_name} | parent: {parent_schema_name} | abstract")
162
+ else:
163
+ _print(f"{schema_name} | parent: {parent_schema_name}")
164
+ else:
165
+ _print(schema_name)
166
+ _print_schema(schema, details=args.details, more_details=args.details,
167
+ all=args.all, raw=args.raw, raw_yaml=args.yaml)
168
+ return
169
+
170
+ data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, database=args.database, verbose=args.verbose)
171
+ if args.copy:
172
+ pyperclip.copy(json.dumps(data, indent=4))
173
+ if args.yaml:
174
+ _print(yaml.dump(data))
175
+ else:
176
+ _print(json.dumps(data, default=str, indent=4))
177
+
178
+
179
+ def _create_portal(ini: str, env: Optional[str] = None,
180
+ server: Optional[str] = None, app: Optional[str] = None,
181
+ verbose: bool = False, debug: bool = False) -> Portal:
182
+ portal = None
183
+ with captured_output(not debug):
184
+ portal = Portal(env, server=server, app=app) if env or app else Portal(ini)
185
+ if portal:
186
+ if verbose:
187
+ if portal.env:
188
+ _print(f"Portal environment: {portal.env}")
189
+ if portal.keys_file:
190
+ _print(f"Portal keys file: {portal.keys_file}")
191
+ if portal.key_id:
192
+ _print(f"Portal key prefix: {portal.key_id[0:2]}******")
193
+ if portal.ini_file:
194
+ _print(f"Portal ini file: {portal.ini_file}")
195
+ if portal.server:
196
+ _print(f"Portal server: {portal.server}")
197
+ return portal
198
+
199
+
200
+ def _get_portal_object(portal: Portal, uuid: str,
201
+ raw: bool = False, database: bool = False, verbose: bool = False) -> dict:
202
+ response = None
203
+ try:
204
+ if not uuid.startswith("/"):
205
+ path = f"/{uuid}"
206
+ else:
207
+ path = uuid
208
+ response = portal.get(path, raw=raw, database=database)
209
+ except Exception as e:
210
+ if "404" in str(e) and "not found" in str(e).lower():
211
+ _print(f"Portal object not found at {portal.server}: {uuid}")
212
+ _exit()
213
+ _exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}")
214
+ if not response:
215
+ _exit(f"Null response getting Portal object from {portal.server}: {uuid}")
216
+ if response.status_code not in [200, 307]:
217
+ # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above.
218
+ _exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}")
219
+ if not response.json:
220
+ _exit(f"Invalid JSON getting Portal object: {uuid}")
221
+ return response.json()
222
+
223
+
224
+ @lru_cache(maxsize=1)
225
+ def _get_schemas(portal: Portal) -> Optional[dict]:
226
+ return portal.get_schemas()
227
+
228
+
229
+ def _get_schema(portal: Portal, name: str) -> Tuple[Optional[dict], Optional[str]]:
230
+ if portal and name and (name := name.replace("_", "").replace("-", "").strip().lower()):
231
+ if schemas := _get_schemas(portal):
232
+ for schema_name in schemas:
233
+ if schema_name.replace("_", "").replace("-", "").strip().lower() == name:
234
+ return schemas[schema_name], schema_name
235
+ return None, None
236
+
237
+
238
+ def _is_maybe_schema_name(value: str) -> bool:
239
+ if value and not is_uuid(value) and not value.startswith("/"):
240
+ return True
241
+ return False
242
+
243
+
244
+ def _print_schema(schema: dict, details: bool = False, more_details: bool = False, all: bool = False,
245
+ raw: bool = False, raw_yaml: bool = False) -> None:
246
+ if raw:
247
+ if raw_yaml:
248
+ _print(yaml.dump(schema))
249
+ else:
250
+ _print(json.dumps(schema, indent=4))
251
+ return
252
+ _print_schema_info(schema, details=details, more_details=more_details, all=all)
253
+
254
+
255
+ def _print_schema_info(schema: dict, level: int = 0,
256
+ details: bool = False, more_details: bool = False, all: bool = False,
257
+ required: Optional[List[str]] = None) -> None:
258
+ if not schema or not isinstance(schema, dict):
259
+ return
260
+ if level == 0:
261
+ if required_properties := schema.get("required"):
262
+ _print("- required properties:")
263
+ for required_property in sorted(list(set(required_properties))):
264
+ if not all and required_property in _SCHEMAS_IGNORE_PROPERTIES:
265
+ continue
266
+ if property_type := (info := schema.get("properties", {}).get(required_property, {})).get("type"):
267
+ if property_type == "array" and (array_type := info.get("items", {}).get("type")):
268
+ _print(f" - {required_property}: {property_type} of {array_type}")
269
+ else:
270
+ _print(f" - {required_property}: {property_type}")
271
+ else:
272
+ _print(f" - {required_property}")
273
+ if isinstance(any_of := schema.get("anyOf"), list):
274
+ if ((any_of == [{"required": ["submission_centers"]}, {"required": ["consortia"]}]) or
275
+ (any_of == [{"required": ["consortia"]}, {"required": ["submission_centers"]}])): # noqa
276
+ # Very very special case.
277
+ _print(f" - at least one of:")
278
+ _print(f" - consortia: array of string")
279
+ _print(f" - submission_centers: array of string")
280
+ required = required_properties
281
+ if identifying_properties := schema.get("identifyingProperties"):
282
+ _print("- identifying properties:")
283
+ for identifying_property in sorted(list(set(identifying_properties))):
284
+ if not all and identifying_property in _SCHEMAS_IGNORE_PROPERTIES:
285
+ continue
286
+ if property_type := (info := schema.get("properties", {}).get(identifying_property, {})).get("type"):
287
+ if property_type == "array" and (array_type := info.get("items", {}).get("type")):
288
+ _print(f" - {identifying_property}: {property_type} of {array_type}")
289
+ else:
290
+ _print(f" - {identifying_property}: {property_type}")
291
+ else:
292
+ _print(f" - {identifying_property}")
293
+ if properties := schema.get("properties"):
294
+ reference_properties = []
295
+ for property_name in properties:
296
+ if not all and property_name in _SCHEMAS_IGNORE_PROPERTIES:
297
+ continue
298
+ property = properties[property_name]
299
+ if link_to := property.get("linkTo"):
300
+ reference_properties.append({"name": property_name, "ref": link_to})
301
+ if reference_properties:
302
+ _print("- reference properties:")
303
+ for reference_property in sorted(reference_properties, key=lambda key: key["name"]):
304
+ _print(f" - {reference_property['name']}: {reference_property['ref']}")
305
+ if schema.get("additionalProperties") is True:
306
+ _print(f" - additional properties are allowed")
307
+ if not more_details:
308
+ return
309
+ if properties := (schema.get("properties") if level == 0 else schema):
310
+ if level == 0:
311
+ _print("- properties:")
312
+ for property_name in sorted(properties):
313
+ if not all and property_name in _SCHEMAS_IGNORE_PROPERTIES:
314
+ continue
315
+ if property_name.startswith("@"):
316
+ continue
317
+ spaces = f"{' ' * (level + 1) * 2}"
318
+ property = properties[property_name]
319
+ property_required = required and property_name in required
320
+ if property_type := property.get("type"):
321
+ if property_type == "object":
322
+ suffix = ""
323
+ if not (object_properties := property.get("properties")):
324
+ if property.get("additionalProperties") is True:
325
+ property_type = "any object"
326
+ else:
327
+ property_type = "undefined object"
328
+ elif property.get("additionalProperties") is True:
329
+ property_type = "open ended object"
330
+ if property.get("calculatedProperty"):
331
+ suffix += f" | calculated"
332
+ _print(f"{spaces}- {property_name}: {property_type}{suffix}")
333
+ _print_schema_info(object_properties, level=level + 1,
334
+ details=details, more_details=more_details, all=all,
335
+ required=property.get("required"))
336
+ elif property_type == "array":
337
+ suffix = ""
338
+ if property_required:
339
+ suffix += f" | required"
340
+ if property.get("uniqueItems"):
341
+ suffix += f" | unique"
342
+ if property.get("calculatedProperty"):
343
+ suffix += f" | calculated"
344
+ if property_items := property.get("items"):
345
+ if (enumeration := property_items.get("enum")) is not None:
346
+ suffix = f" | enum" + suffix
347
+ if pattern := property_items.get("pattern"):
348
+ suffix += f" | pattern: {pattern}"
349
+ if (format := property_items.get("format")) and (format != "uuid"):
350
+ suffix += f" | format: {format}"
351
+ if (max_length := property_items.get("maxLength")) is not None:
352
+ suffix += f" | max items: {max_length}"
353
+ if property_type := property_items.get("type"):
354
+ if property_type == "object":
355
+ suffix = ""
356
+ _print(f"{spaces}- {property_name}: array of object{suffix}")
357
+ _print_schema_info(property_items.get("properties"), level=level + 1,
358
+ details=details, more_details=more_details, all=all,
359
+ required=property_items.get("required"))
360
+ elif property_type == "array":
361
+ # This (array-of-array) never happens to occur at this time (February 2024).
362
+ _print(f"{spaces}- {property_name}: array of array{suffix}")
363
+ else:
364
+ _print(f"{spaces}- {property_name}: array of {property_type}{suffix}")
365
+ else:
366
+ _print(f"{spaces}- {property_name}: array{suffix}")
367
+ else:
368
+ _print(f"{spaces}- {property_name}: array{suffix}")
369
+ if enumeration:
370
+ nenums = 0
371
+ maxenums = 15
372
+ for enum in sorted(enumeration):
373
+ if (nenums := nenums + 1) >= maxenums:
374
+ if (remaining := len(enumeration) - nenums) > 0:
375
+ _print(f"{spaces} - [{remaining} more ...]")
376
+ break
377
+ _print(f"{spaces} - {enum}")
378
+ else:
379
+ if isinstance(property_type, list):
380
+ property_type = " or ".join(sorted(property_type))
381
+ suffix = ""
382
+ if (enumeration := property.get("enum")) is not None:
383
+ suffix += f" | enum"
384
+ if property_required:
385
+ suffix += f" | required"
386
+ if property.get("uniqueKey"):
387
+ suffix += f" | unique"
388
+ if pattern := property.get("pattern"):
389
+ suffix += f" | pattern: {pattern}"
390
+ if (format := property.get("format")) and (format != "uuid"):
391
+ suffix += f" | format: {format}"
392
+ if isinstance(any_of := property.get("anyOf"), list):
393
+ if ((any_of == [{"format": "date"}, {"format": "date-time"}]) or
394
+ (any_of == [{"format": "date-time"}, {"format": "date"}])): # noqa
395
+ # Very special case.
396
+ suffix += f" | format: date or date-time"
397
+ if link_to := property.get("linkTo"):
398
+ suffix += f" | reference: {link_to}"
399
+ if property.get("calculatedProperty"):
400
+ suffix += f" | calculated"
401
+ if (default := property.get("default")) is not None:
402
+ suffix += f" | default:"
403
+ if isinstance(default, dict):
404
+ suffix += f" object"
405
+ elif isinstance(default, list):
406
+ suffix += f" array"
407
+ else:
408
+ suffix += f" {default}"
409
+ if (minimum := property.get("minimum")) is not None:
410
+ suffix += f" | min: {minimum}"
411
+ if (maximum := property.get("maximum")) is not None:
412
+ suffix += f" | max: {maximum}"
413
+ if (max_length := property.get("maxLength")) is not None:
414
+ suffix += f" | max length: {max_length}"
415
+ if (min_length := property.get("minLength")) is not None:
416
+ suffix += f" | min length: {min_length}"
417
+ _print(f"{spaces}- {property_name}: {property_type}{suffix}")
418
+ if enumeration:
419
+ nenums = 0
420
+ maxenums = 15
421
+ for enum in sorted(enumeration):
422
+ if (nenums := nenums + 1) >= maxenums:
423
+ if (remaining := len(enumeration) - nenums) > 0:
424
+ _print(f"{spaces} - [{remaining} more ...]")
425
+ break
426
+ _print(f"{spaces} - {enum}")
427
+ else:
428
+ _print(f"{spaces}- {property_name}")
429
+
430
+
431
+ def _print_all_schema_names(portal: Portal,
432
+ details: bool = False, more_details: bool = False, all: bool = False,
433
+ tree: bool = False, raw: bool = False, raw_yaml: bool = False) -> None:
434
+ if not (schemas := _get_schemas(portal)):
435
+ return
436
+
437
+ if raw:
438
+ if raw_yaml:
439
+ _print(yaml.dump(schemas))
440
+ else:
441
+ _print(json.dumps(schemas, indent=4))
442
+ return
443
+
444
+ if tree:
445
+ _print_schemas_tree(schemas)
446
+ return
447
+
448
+ for schema_name in sorted(schemas.keys()):
449
+ if parent_schema_name := _get_parent_schema_name(schemas[schema_name]):
450
+ if schemas[schema_name].get("isAbstract") is True:
451
+ _print(f"{schema_name} | parent: {parent_schema_name} | abstract")
452
+ else:
453
+ _print(f"{schema_name} | parent: {parent_schema_name}")
454
+ else:
455
+ if schemas[schema_name].get("isAbstract") is True:
456
+ _print(f"{schema_name} | abstract")
457
+ else:
458
+ _print(schema_name)
459
+ if details:
460
+ _print_schema(schemas[schema_name], details=details, more_details=more_details, all=all)
461
+
462
+
463
+ def _get_parent_schema_name(schema: dict) -> Optional[str]:
464
+ if (isinstance(schema, dict) and
465
+ (parent_schema_name := schema.get("rdfs:subClassOf")) and
466
+ (parent_schema_name := parent_schema_name.replace("/profiles/", "").replace(".json", "")) and
467
+ (parent_schema_name != "Item")): # noqa
468
+ return parent_schema_name
469
+ return None
470
+
471
+
472
+ def _print_schemas_tree(schemas: dict) -> None:
473
+ def children_of(name: str) -> List[str]:
474
+ nonlocal schemas
475
+ children = []
476
+ if not (name is None or isinstance(name, str)):
477
+ return children
478
+ if name and name.lower() == "schemas":
479
+ name = None
480
+ for schema_name in (schemas if isinstance(schemas, dict) else {}):
481
+ if _get_parent_schema_name(schemas[schema_name]) == name:
482
+ children.append(schema_name)
483
+ return sorted(children)
484
+ def name_of(name: str) -> str: # noqa
485
+ nonlocal schemas
486
+ if not (name is None or isinstance(name, str)):
487
+ return name
488
+ if (schema := schemas.get(name)) and schema.get("isAbstract") is True:
489
+ return f"{name} (abstact)"
490
+ return name
491
+ _print_tree(root_name="Schemas", children_of=children_of, name_of=name_of)
492
+
493
+
494
+ def _print_tree(root_name: Optional[str],
495
+ children_of: Callable,
496
+ has_children: Optional[Callable] = None,
497
+ name_of: Optional[Callable] = None,
498
+ print: Callable = print) -> None:
499
+ """
500
+ Recursively prints as a tree structure the given root name and any of its
501
+ children (again, recursively) as specified by the given children_of callable;
502
+ the has_children may be specified, for efficiency, though if not specified
503
+ it will use the children_of function to determine this; the name_of callable
504
+ may be specified to modify the name before printing.
505
+ """
506
+ first = "└─ "
507
+ space = " "
508
+ branch = "│ "
509
+ tee = "├── "
510
+ last = "└── "
511
+
512
+ if not callable(children_of):
513
+ return
514
+ if not callable(has_children):
515
+ has_children = lambda name: children_of(name) is not None # noqa
516
+
517
+ # This function adapted from stackoverflow.
518
+ # Ref: https://stackoverflow.com/questions/9727673/list-directory-tree-structure-in-python
519
+ def tree_generator(name: str, prefix: str = ""):
520
+ contents = children_of(name)
521
+ pointers = [tee] * (len(contents) - 1) + [last]
522
+ for pointer, path in zip(pointers, contents):
523
+ yield prefix + pointer + (name_of(path) if callable(name_of) else path)
524
+ if has_children(path):
525
+ extension = branch if pointer == tee else space
526
+ yield from tree_generator(path, prefix=prefix+extension)
527
+ print(first + ((name_of(root_name) if callable(name_of) else root_name) or "root"))
528
+ for line in tree_generator(root_name, prefix=" "):
529
+ print(line)
530
+
531
+
532
+ def _print(*args, **kwargs):
533
+ with uncaptured_output():
534
+ PRINT(*args, **kwargs)
535
+ sys.stdout.flush()
536
+
537
+
538
+ def _exit(message: Optional[str] = None) -> None:
539
+ if message:
540
+ _print(f"ERROR: {message}")
541
+ exit(1)
542
+
543
+
544
+ if __name__ == "__main__":
545
+ main()
@@ -47,11 +47,27 @@ StructuredDataSet = Type["StructuredDataSet"]
47
47
 
48
48
  class StructuredDataSet:
49
49
 
50
+ # Reference (linkTo) lookup strategies; on a per-reference (type/value) basis;
51
+ # controlled by optional ref_lookup_strategy callable; default is lookup at root path
52
+ # but after the named reference (linkTo) type path lookup, and then lookup all subtypes;
53
+ # can choose to lookup root path first, or not lookup root path at all, or not lookup
54
+ # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
55
+ # and value (string) arguements and return an integer of any of the below ORed together.
56
+ REF_LOOKUP_ROOT = 0x0001
57
+ REF_LOOKUP_ROOT_FIRST = 0x0002 | REF_LOOKUP_ROOT
58
+ REF_LOOKUP_SUBTYPES = 0x0004
59
+ REF_LOOKUP_MINIMAL = 0
60
+ REF_LOOKUP_DEFAULT = REF_LOOKUP_ROOT | REF_LOOKUP_SUBTYPES
61
+
50
62
  def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
51
63
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
52
- order: Optional[List[str]] = None, prune: bool = True) -> None:
64
+ order: Optional[List[str]] = None, prune: bool = True,
65
+ ref_lookup_strategy: Optional[Callable] = None,
66
+ ref_lookup_nocache: bool = False) -> None:
53
67
  self._data = {}
54
- self._portal = Portal(portal, data=self._data, schemas=schemas) if portal else None
68
+ self._portal = Portal(portal, data=self._data, schemas=schemas,
69
+ ref_lookup_strategy=ref_lookup_strategy,
70
+ ref_lookup_nocache=ref_lookup_nocache) if portal else None
55
71
  self._order = order
56
72
  self._prune = prune
57
73
  self._warnings = {}
@@ -72,8 +88,11 @@ class StructuredDataSet:
72
88
  @staticmethod
73
89
  def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
74
90
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
75
- order: Optional[List[str]] = None, prune: bool = True) -> StructuredDataSet:
76
- return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune)
91
+ order: Optional[List[str]] = None, prune: bool = True,
92
+ ref_lookup_strategy: Optional[Callable] = None,
93
+ ref_lookup_nocache: bool = False) -> StructuredDataSet:
94
+ return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
95
+ ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache)
77
96
 
78
97
  def validate(self, force: bool = False) -> None:
79
98
  def data_without_deleted_properties(data: dict) -> dict:
@@ -255,6 +274,23 @@ class StructuredDataSet:
255
274
  if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
256
275
  structured_row[name] = properties[name]
257
276
 
277
+ def _is_ref_lookup_root(ref_lookup_flags: int) -> bool:
278
+ return (ref_lookup_flags & StructuredDataSet.REF_LOOKUP_ROOT) == StructuredDataSet.REF_LOOKUP_ROOT
279
+
280
+ def _is_ref_lookup_root_first(ref_lookup_flags: int) -> bool:
281
+ return (ref_lookup_flags & StructuredDataSet.REF_LOOKUP_ROOT_FIRST) == StructuredDataSet.REF_LOOKUP_ROOT_FIRST
282
+
283
+ def _is_ref_lookup_subtypes(ref_lookup_flags: int) -> bool:
284
+ return (ref_lookup_flags & StructuredDataSet.REF_LOOKUP_SUBTYPES) == StructuredDataSet.REF_LOOKUP_SUBTYPES
285
+
286
+ @property
287
+ def ref_cache_hit_count(self) -> int:
288
+ return self.portal.ref_cache_hit_count if self.portal else -1
289
+
290
+ @property
291
+ def ref_lookup_count(self) -> int:
292
+ return self.portal.ref_lookup_count if self.portal else -1
293
+
258
294
  def _note_warning(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
259
295
  self._note_issue(self._warnings, item, group)
260
296
 
@@ -637,6 +673,8 @@ class Portal(PortalBase):
637
673
  env: Optional[str] = None, server: Optional[str] = None,
638
674
  app: Optional[OrchestratedApp] = None,
639
675
  data: Optional[dict] = None, schemas: Optional[List[dict]] = None,
676
+ ref_lookup_strategy: Optional[Callable] = None,
677
+ ref_lookup_nocache: bool = False,
640
678
  raise_exception: bool = True) -> None:
641
679
  super().__init__(arg, env=env, server=server, app=app, raise_exception=raise_exception)
642
680
  if isinstance(arg, Portal):
@@ -645,10 +683,21 @@ class Portal(PortalBase):
645
683
  else:
646
684
  self._schemas = schemas
647
685
  self._data = data
686
+ if callable(ref_lookup_strategy):
687
+ self._ref_lookup_strategy = ref_lookup_strategy
688
+ else:
689
+ self._ref_lookup_strategy = lambda type_name, value: StructuredDataSet.REF_LOOKUP_DEFAULT
690
+ self._ref_cache = {} if not ref_lookup_nocache else None
691
+ self._ref_cache_hit_count = 0
692
+ self._ref_lookup_count = 0
648
693
 
649
- @lru_cache(maxsize=256)
650
- def get_metadata(self, object_name: str) -> Optional[dict]:
694
+ @lru_cache(maxsize=8092)
695
+ def get_metadata_cache(self, object_name: str) -> Optional[dict]:
696
+ return self.get_metadata_nocache(object_name)
697
+
698
+ def get_metadata_nocache(self, object_name: str) -> Optional[dict]:
651
699
  try:
700
+ self._ref_lookup_count += 1
652
701
  return super().get_metadata(object_name)
653
702
  except Exception:
654
703
  return None
@@ -675,53 +724,90 @@ class Portal(PortalBase):
675
724
  schemas[user_specified_schema["title"]] = user_specified_schema
676
725
  return schemas
677
726
 
727
+ @lru_cache(maxsize=64)
728
+ def _get_schema_subtypes(self, type_name: str) -> Optional[List[str]]:
729
+ if not (schemas_super_type_map := self.get_schemas_super_type_map()):
730
+ return []
731
+ return schemas_super_type_map.get(type_name)
732
+
678
733
  def is_file_schema(self, schema_name: str) -> bool:
679
734
  """
680
735
  Returns True iff the given schema name isa File type, i.e. has an ancestor which is of type File.
681
736
  """
682
737
  return self.is_schema_type(schema_name, FILE_SCHEMA_NAME)
683
738
 
684
- def ref_exists(self, type_name: str, value: Optional[str] = None) -> List[str]:
739
+ def _ref_exists_from_cache(self, type_name: str, value: str) -> Optional[List[dict]]:
740
+ if self._ref_cache is not None:
741
+ return self._ref_cache.get(f"/{type_name}/{value}", None)
742
+ return None
743
+
744
+ def _cache_ref(self, type_name: str, value: str, resolved: List[str],
745
+ subtype_names: Optional[List[str]]) -> None:
746
+ if self._ref_cache is not None:
747
+ for type_name in [type_name] + (subtype_names or []):
748
+ object_path = f"/{type_name}/{value}"
749
+ if self._ref_cache.get(object_path, None) is None:
750
+ self._ref_cache[object_path] = resolved
751
+
752
+ def ref_exists(self, type_name: str, value: Optional[str] = None) -> List[dict]:
685
753
  if not value:
686
754
  if type_name.startswith("/") and len(parts := type_name[1:].split("/")) == 2:
687
755
  type_name = parts[0]
688
756
  value = parts[1]
689
757
  else:
690
- return []
758
+ return [] # Should not happen.
759
+ if (resolved := self._ref_exists_from_cache(type_name, value)) is not None:
760
+ self._ref_cache_hit_count += 1
761
+ return resolved
762
+ # Not cached here.
691
763
  resolved = []
692
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value)
764
+ ref_lookup_strategy = self._ref_lookup_strategy(type_name, value)
765
+ is_ref_lookup_root = StructuredDataSet._is_ref_lookup_root(ref_lookup_strategy)
766
+ is_ref_lookup_root_first = StructuredDataSet._is_ref_lookup_root_first(ref_lookup_strategy)
767
+ is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
768
+ is_resolved = False
769
+ subtype_names = self._get_schema_subtypes(type_name)
770
+ if is_ref_lookup_root_first:
771
+ is_resolved, resolved_uuid = self._ref_exists_single(type_name, value, root=True)
772
+ if not is_resolved:
773
+ is_resolved, resolved_uuid = self._ref_exists_single(type_name, value)
774
+ if not is_resolved and is_ref_lookup_root and not is_ref_lookup_root_first:
775
+ is_resolved, resolved_uuid = self._ref_exists_single(type_name, value, root=True)
693
776
  if is_resolved:
694
777
  resolved.append({"type": type_name, "uuid": resolved_uuid})
695
- # TODO: Added this return on 2024-01-14 (dmichaels).
696
- # Why did I orginally check for multiple existing values?
697
- # Why not just return right away if I find that the ref exists?
698
- # Getting multiple values because, for example, we find
699
- # both this /Sample/UW_CELL-CULTURE-SAMPLE_COLO-829BL_HI-C_1
700
- # and /CellSample/UW_CELL-CULTURE-SAMPLE_COLO-829BL_HI-C_1
701
- # Why does that matter at all? Same thing.
702
- return resolved
703
- # Check for the given ref in all sub-types of the given type.
704
- if (schemas_super_type_map := self.get_schemas_super_type_map()):
705
- if (sub_type_names := schemas_super_type_map.get(type_name)):
706
- for sub_type_name in sub_type_names:
707
- is_resolved, resolved_uuid = self._ref_exists_single(sub_type_name, value)
708
- if is_resolved:
709
- resolved.append({"type": type_name, "uuid": resolved_uuid})
710
- # TODO: Added this return on 2024-01-14 (dmichaels). See above TODO.
711
- return resolved
778
+ # Check for the given ref in all subtypes of the given type.
779
+ elif subtype_names and is_ref_lookup_subtypes:
780
+ for subtype_name in subtype_names:
781
+ is_resolved, resolved_uuid = self._ref_exists_single(subtype_name, value)
782
+ if is_resolved:
783
+ resolved.append({"type": type_name, "uuid": resolved_uuid})
784
+ break
785
+ # Cache this ref (and all subtype versions of it); whether or not found;
786
+ # if not found it will be an empty array (array because caching all matches;
787
+ # but TODO - do not think we should do this anymore - maybe test changes needed).
788
+ self._cache_ref(type_name, value, resolved, subtype_names)
712
789
  return resolved
713
790
 
714
- def _ref_exists_single(self, type_name: str, value: str) -> Tuple[bool, Optional[str]]:
791
+ def _ref_exists_single(self, type_name: str, value: str, root: bool = False) -> Tuple[bool, Optional[str]]:
792
+ # Check first in our own data (i.e. e.g. within the given spreadsheet).
715
793
  if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
716
794
  iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
717
795
  for item in items:
718
796
  if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)):
719
797
  if isinstance(ivalue, list) and value in ivalue or ivalue == value:
720
798
  return True, (ivalue if isinstance(ivalue, str) and is_uuid(ivalue) else None)
721
- if (value := self.get_metadata(f"/{type_name}/{value}")) is None:
799
+ if (value := self.get_metadata(f"/{type_name}/{value}" if not root else f"/{value}")) is None:
722
800
  return False, None
723
801
  return True, value.get("uuid")
724
802
 
803
+ @property
804
+ def ref_cache_hit_count(self) -> int:
805
+ return self._ref_cache_hit_count
806
+
807
+ @property
808
+ def ref_lookup_count(self) -> int:
809
+ return self._ref_lookup_count
810
+
725
811
  @staticmethod
726
812
  def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None,
727
813
  schemas: Optional[List[dict]] = None) -> Portal:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.8.0"
3
+ version = "8.8.0.1b1" # TODO: To become 8.8.1
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
@@ -1,198 +0,0 @@
1
- # ------------------------------------------------------------------------------------------------------
2
- # Command-line utility to retrieve and print the given object (UUID) from a SMaHT/CGAP/Fourfront Portal.
3
- # ------------------------------------------------------------------------------------------------------
4
- # Example command:
5
- # view-portal-object 4483b19d-62e7-4e7f-a211-0395343a35df --yaml
6
- #
7
- # Example output:
8
- # '@context': /terms/
9
- # '@id': /access-keys/3968e38e-c11f-472e-8531-8650e2e296d4/
10
- # '@type':
11
- # - AccessKey
12
- # - Item
13
- # access_key_id: NSVCZ75O
14
- # date_created: '2023-09-06T13:11:59.704005+00:00'
15
- # description: Manually generated local access-key for testing.
16
- # display_title: AccessKey from 2023-09-06
17
- # expiration_date: '2023-12-05T13:11:59.714106'
18
- # last_modified:
19
- # date_modified: '2023-09-06T13:11:59.711367+00:00'
20
- # modified_by:
21
- # '@id': /users/3202fd57-44d2-44fb-a131-afb1e43d8ae5/
22
- # '@type':
23
- # - User
24
- # - Item
25
- # status: current
26
- # uuid: 3202fd57-44d2-44fb-a131-afb1e43d8ae5
27
- # principals_allowed:
28
- # edit:
29
- # - group.admin
30
- # - userid.74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68
31
- # view:
32
- # - group.admin
33
- # - group.read-only-admin
34
- # - userid.74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68
35
- # schema_version: '1'
36
- # status: current
37
- # user:
38
- # '@id': /users/74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68/
39
- # '@type':
40
- # - User
41
- # - Item
42
- # display_title: David Michaels
43
- # principals_allowed:
44
- # edit:
45
- # - group.admin
46
- # view:
47
- # - group.admin
48
- # - group.read-only-admin
49
- # status: current
50
- # uuid: 74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68
51
- # uuid: 3968e38e-c11f-472e-8531-8650e2e296d4
52
- #
53
- # Note that instead of a uuid you can also actually use a path, for example:
54
- # view-local-object /file-formats/vcf_gz_tbi
55
- #
56
- # --------------------------------------------------------------------------------------------------
57
-
58
- import argparse
59
- import json
60
- import pyperclip
61
- import sys
62
- from typing import Optional
63
- import yaml
64
- from dcicutils.captured_output import captured_output, uncaptured_output
65
- from dcicutils.misc_utils import get_error_message
66
- from dcicutils.portal_utils import Portal
67
- from dcicutils.structured_data import Schema
68
-
69
-
70
- def main():
71
-
72
- parser = argparse.ArgumentParser(description="View Portal object.")
73
- parser.add_argument("uuid", type=str,
74
- help=f"The uuid (or path) of the object to fetch and view. ")
75
- parser.add_argument("--ini", type=str, required=False, default=None,
76
- help=f"Name of the application .ini file.")
77
- parser.add_argument("--env", "-e", type=str, required=False, default=None,
78
- help=f"Environment name (key from ~/.smaht-keys.json).")
79
- parser.add_argument("--server", "-s", type=str, required=False, default=None,
80
- help=f"Environment server name (server from key in ~/.smaht-keys.json).")
81
- parser.add_argument("--app", type=str, required=False, default=None,
82
- help=f"Application name (one of: smaht, cgap, fourfront).")
83
- parser.add_argument("--schema", action="store_true", required=False, default=False,
84
- help="View named schema rather than object.")
85
- parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.")
86
- parser.add_argument("--database", action="store_true", required=False, default=False,
87
- help="Read from database output.")
88
- parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.")
89
- parser.add_argument("--copy", "-c", action="store_true", required=False, default=False,
90
- help="Copy object data to clipboard.")
91
- parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.")
92
- parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.")
93
- args = parser.parse_args()
94
-
95
- portal = _create_portal(ini=args.ini, env=args.env, server=args.server, app=args.app, debug=args.debug)
96
- if args.uuid == "schemas":
97
- _print_all_schema_names(portal=portal, verbose=args.verbose)
98
- return
99
- elif args.schema:
100
- data = _get_schema(portal=portal, schema_name=args.uuid)
101
- else:
102
- data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw,
103
- database=args.database, verbose=args.verbose)
104
-
105
- if args.copy:
106
- pyperclip.copy(json.dumps(data, indent=4))
107
- if args.yaml:
108
- _print(yaml.dump(data))
109
- else:
110
- _print(json.dumps(data, default=str, indent=4))
111
-
112
-
113
- def _create_portal(ini: str, env: Optional[str] = None,
114
- server: Optional[str] = None, app: Optional[str] = None, debug: bool = False) -> Portal:
115
- with captured_output(not debug):
116
- return Portal(env, server=server, app=app) if env or app else Portal(ini)
117
-
118
-
119
- def _get_portal_object(portal: Portal, uuid: str,
120
- raw: bool = False, database: bool = False, verbose: bool = False) -> dict:
121
- if verbose:
122
- _print(f"Getting object from Portal: {uuid}")
123
- if portal.env:
124
- _print(f"Portal environment: {portal.env}")
125
- if portal.keys_file:
126
- _print(f"Portal keys file: {portal.keys_file}")
127
- if portal.key_id:
128
- _print(f"Portal key prefix: {portal.key_id[0:2]}******")
129
- if portal.ini_file:
130
- _print(f"Portal ini file: {portal.ini_file}")
131
- if portal.server:
132
- _print(f"Portal server: {portal.server}")
133
- response = None
134
- try:
135
- if not uuid.startswith("/"):
136
- path = f"/{uuid}"
137
- else:
138
- path = uuid
139
- response = portal.get(path, raw=raw, database=database)
140
- except Exception as e:
141
- if "404" in str(e) and "not found" in str(e).lower():
142
- _print(f"Portal object not found: {uuid}")
143
- _exit_without_action()
144
- _exit_without_action(f"Exception getting Portal object: {uuid}\n{get_error_message(e)}")
145
- if not response:
146
- _exit_without_action(f"Null response getting Portal object: {uuid}")
147
- if response.status_code not in [200, 307]:
148
- # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above.
149
- _exit_without_action(f"Invalid status code ({response.status_code}) getting Portal object: {uuid}")
150
- if not response.json:
151
- _exit_without_action(f"Invalid JSON getting Portal object: {uuid}")
152
- if verbose:
153
- _print("OK")
154
- return response.json()
155
-
156
-
157
- def _get_schema(portal: Portal, schema_name: str) -> Optional[dict]:
158
- def rummage_for_schema_name(portal: Portal, schema_name: str) -> Optional[str]: # noqa
159
- if schemas := portal.get_schemas():
160
- for schema in schemas:
161
- if schema.lower() == schema_name.lower():
162
- return schema
163
- schema = Schema.load_by_name(schema_name, portal)
164
- if not schema:
165
- if schema_name := rummage_for_schema_name(portal, schema_name):
166
- schema = Schema.load_by_name(schema_name, portal)
167
- return schema.data if schema else None
168
-
169
-
170
- def _print_all_schema_names(portal: Portal, verbose: bool = False) -> None:
171
- if schemas := portal.get_schemas():
172
- for schema in sorted(schemas.keys()):
173
- _print(schema)
174
- if verbose:
175
- if identifying_properties := schemas[schema].get("identifyingProperties"):
176
- _print("- identifying properties:")
177
- for identifying_property in sorted(identifying_properties):
178
- _print(f" - {identifying_property}")
179
- if required_properties := schemas[schema].get("required"):
180
- _print("- required properties:")
181
- for required_property in sorted(required_properties):
182
- _print(f" - {required_property}")
183
-
184
-
185
- def _print(*args, **kwargs):
186
- with uncaptured_output():
187
- print(*args, **kwargs)
188
- sys.stdout.flush()
189
-
190
-
191
- def _exit_without_action(message: Optional[str] = None) -> None:
192
- if message:
193
- _print(f"ERROR: {message}")
194
- exit(1)
195
-
196
-
197
- if __name__ == "__main__":
198
- main()
File without changes
File without changes