hotglue-singer-sdk 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hotglue_singer_sdk/__init__.py +34 -0
- hotglue_singer_sdk/authenticators.py +554 -0
- hotglue_singer_sdk/cli/__init__.py +1 -0
- hotglue_singer_sdk/cli/common_options.py +37 -0
- hotglue_singer_sdk/configuration/__init__.py +1 -0
- hotglue_singer_sdk/configuration/_dict_config.py +101 -0
- hotglue_singer_sdk/exceptions.py +52 -0
- hotglue_singer_sdk/helpers/__init__.py +1 -0
- hotglue_singer_sdk/helpers/_catalog.py +122 -0
- hotglue_singer_sdk/helpers/_classproperty.py +18 -0
- hotglue_singer_sdk/helpers/_compat.py +15 -0
- hotglue_singer_sdk/helpers/_flattening.py +374 -0
- hotglue_singer_sdk/helpers/_schema.py +100 -0
- hotglue_singer_sdk/helpers/_secrets.py +41 -0
- hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
- hotglue_singer_sdk/helpers/_singer.py +280 -0
- hotglue_singer_sdk/helpers/_state.py +282 -0
- hotglue_singer_sdk/helpers/_typing.py +231 -0
- hotglue_singer_sdk/helpers/_util.py +27 -0
- hotglue_singer_sdk/helpers/capabilities.py +240 -0
- hotglue_singer_sdk/helpers/jsonpath.py +39 -0
- hotglue_singer_sdk/io_base.py +134 -0
- hotglue_singer_sdk/mapper.py +691 -0
- hotglue_singer_sdk/mapper_base.py +156 -0
- hotglue_singer_sdk/plugin_base.py +415 -0
- hotglue_singer_sdk/py.typed +0 -0
- hotglue_singer_sdk/sinks/__init__.py +14 -0
- hotglue_singer_sdk/sinks/batch.py +90 -0
- hotglue_singer_sdk/sinks/core.py +412 -0
- hotglue_singer_sdk/sinks/record.py +66 -0
- hotglue_singer_sdk/sinks/sql.py +299 -0
- hotglue_singer_sdk/streams/__init__.py +14 -0
- hotglue_singer_sdk/streams/core.py +1294 -0
- hotglue_singer_sdk/streams/graphql.py +74 -0
- hotglue_singer_sdk/streams/rest.py +611 -0
- hotglue_singer_sdk/streams/sql.py +1023 -0
- hotglue_singer_sdk/tap_base.py +580 -0
- hotglue_singer_sdk/target_base.py +554 -0
- hotglue_singer_sdk/target_sdk/__init__.py +0 -0
- hotglue_singer_sdk/target_sdk/auth.py +124 -0
- hotglue_singer_sdk/target_sdk/client.py +286 -0
- hotglue_singer_sdk/target_sdk/common.py +13 -0
- hotglue_singer_sdk/target_sdk/lambda.py +121 -0
- hotglue_singer_sdk/target_sdk/rest.py +108 -0
- hotglue_singer_sdk/target_sdk/sinks.py +16 -0
- hotglue_singer_sdk/target_sdk/target.py +570 -0
- hotglue_singer_sdk/target_sdk/target_base.py +627 -0
- hotglue_singer_sdk/testing.py +198 -0
- hotglue_singer_sdk/typing.py +603 -0
- hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
- hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Helpers for parsing and wrangling configuration dictionaries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Iterable
|
|
9
|
+
|
|
10
|
+
from dotenv import find_dotenv
|
|
11
|
+
from dotenv.main import DotEnv
|
|
12
|
+
|
|
13
|
+
from hotglue_singer_sdk.helpers._typing import is_string_array_type
|
|
14
|
+
from hotglue_singer_sdk.helpers._util import read_json_file
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def parse_environment_config(
|
|
20
|
+
config_schema: dict[str, Any],
|
|
21
|
+
prefix: str,
|
|
22
|
+
dotenv_path: str | None = None,
|
|
23
|
+
) -> dict[str, Any]:
|
|
24
|
+
"""Parse configuration from environment variables.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
config_schema: A JSON Schema dictionary for the configuration.
|
|
28
|
+
prefix: Prefix for environment variables.
|
|
29
|
+
dotenv_path: Path to a .env file. If None, will try to find one in increasingly
|
|
30
|
+
higher folders.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
ValueError: If an un-parsable setting is found.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
A configuration dictionary.
|
|
37
|
+
"""
|
|
38
|
+
result: dict[str, Any] = {}
|
|
39
|
+
|
|
40
|
+
if not dotenv_path:
|
|
41
|
+
dotenv_path = find_dotenv()
|
|
42
|
+
|
|
43
|
+
logger.debug("Loading configuration from %s", dotenv_path)
|
|
44
|
+
DotEnv(dotenv_path).set_as_environment_variables()
|
|
45
|
+
|
|
46
|
+
for config_key in config_schema["properties"].keys():
|
|
47
|
+
env_var_name = prefix + config_key.upper().replace("-", "_")
|
|
48
|
+
if env_var_name in os.environ:
|
|
49
|
+
env_var_value = os.environ[env_var_name]
|
|
50
|
+
logger.info(
|
|
51
|
+
"Parsing '%s' config from env variable '%s'.",
|
|
52
|
+
config_key,
|
|
53
|
+
env_var_name,
|
|
54
|
+
)
|
|
55
|
+
if is_string_array_type(config_schema["properties"][config_key]):
|
|
56
|
+
if env_var_value[0] == "[" and env_var_value[-1] == "]":
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"A bracketed list was detected in the environment variable "
|
|
59
|
+
f"'{env_var_name}'. This syntax is no longer supported. "
|
|
60
|
+
"Please remove the brackets and try again."
|
|
61
|
+
)
|
|
62
|
+
result[config_key] = env_var_value.split(",")
|
|
63
|
+
else:
|
|
64
|
+
result[config_key] = env_var_value
|
|
65
|
+
return result
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def merge_config_sources(
|
|
69
|
+
inputs: Iterable[str],
|
|
70
|
+
config_schema: dict[str, Any],
|
|
71
|
+
env_prefix: str,
|
|
72
|
+
) -> dict[str, Any]:
|
|
73
|
+
"""Merge configuration from multiple sources into a single dictionary.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
inputs: A sequence of configuration sources (file paths or ENV).
|
|
77
|
+
config_schema: A JSON Schema dictionary for the configuration.
|
|
78
|
+
env_prefix: Prefix for environment variables.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
FileNotFoundError: If any of config files does not exist.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
A single configuration dictionary.
|
|
85
|
+
"""
|
|
86
|
+
config: dict[str, Any] = {}
|
|
87
|
+
for config_path in inputs:
|
|
88
|
+
if config_path == "ENV":
|
|
89
|
+
env_config = parse_environment_config(config_schema, prefix=env_prefix)
|
|
90
|
+
config.update(env_config)
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
if not Path(config_path).is_file():
|
|
94
|
+
raise FileNotFoundError(
|
|
95
|
+
f"Could not locate config file at '{config_path}'."
|
|
96
|
+
"Please check that the file exists."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
config.update(read_json_file(config_path))
|
|
100
|
+
|
|
101
|
+
return config
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Defines a common set of exceptions which developers can raise and/or catch."""
|
|
2
|
+
import requests
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ConfigValidationError(Exception):
|
|
6
|
+
"""Raised when a user's config settings fail validation."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FatalAPIError(Exception):
|
|
10
|
+
"""Exception raised when a failed request should not be considered retriable."""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidStreamSortException(Exception):
|
|
14
|
+
"""Exception to raise if sorting errors are found while syncing the records."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MapExpressionError(Exception):
|
|
18
|
+
"""Failed map expression evaluation."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MaxRecordsLimitException(Exception):
|
|
22
|
+
"""Exception to raise if the maximum number of allowable records is exceeded."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RecordsWitoutSchemaException(Exception):
|
|
26
|
+
"""Raised if a target receives RECORD messages prior to a SCHEMA message."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RetriableAPIError(Exception):
|
|
30
|
+
"""Exception raised when a failed request can be safely retried."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, message: str, response: requests.Response = None) -> None:
|
|
33
|
+
"""Extends the default with the failed response as an attribute.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
message (str): The Error Message
|
|
37
|
+
response (requests.Response): The response object.
|
|
38
|
+
"""
|
|
39
|
+
super().__init__(message)
|
|
40
|
+
self.response = response
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class StreamMapConfigError(Exception):
|
|
44
|
+
"""Raised when a stream map has an invalid configuration."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class TapStreamConnectionFailure(Exception):
|
|
48
|
+
"""Exception to raise when stream connection fails or stream is disconnected."""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class TooManyRecordsException(Exception):
|
|
52
|
+
"""Exception to raise when query returns more records than max_records."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Helper library for the SDK."""
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Private helper functions for catalog and selection logic."""
|
|
2
|
+
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
from logging import Logger
|
|
5
|
+
from typing import Any, Dict, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from memoization import cached
|
|
8
|
+
|
|
9
|
+
from hotglue_singer_sdk.helpers._singer import Catalog, SelectionMask
|
|
10
|
+
from hotglue_singer_sdk.helpers._typing import is_object_type
|
|
11
|
+
|
|
12
|
+
_MAX_LRU_CACHE = 500
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@cached(max_size=_MAX_LRU_CACHE)
|
|
16
|
+
def get_selected_schema(
|
|
17
|
+
stream_name: str, schema: dict, mask: SelectionMask, logger: Logger
|
|
18
|
+
) -> dict:
|
|
19
|
+
"""Return a copy of the provided JSON schema, dropping any fields not selected."""
|
|
20
|
+
new_schema = deepcopy(schema)
|
|
21
|
+
_pop_deselected_schema(new_schema, mask, stream_name, (), logger)
|
|
22
|
+
return new_schema
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _pop_deselected_schema(
|
|
26
|
+
schema: dict,
|
|
27
|
+
mask: SelectionMask,
|
|
28
|
+
stream_name: str,
|
|
29
|
+
breadcrumb: Tuple[str, ...],
|
|
30
|
+
logger: Logger,
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Remove anything from schema that is not selected.
|
|
33
|
+
|
|
34
|
+
Walk through schema, starting at the index in breadcrumb, recursively updating in
|
|
35
|
+
place.
|
|
36
|
+
"""
|
|
37
|
+
schema_at_breadcrumb = schema
|
|
38
|
+
for crumb in breadcrumb:
|
|
39
|
+
schema_at_breadcrumb = schema_at_breadcrumb.get(crumb, {})
|
|
40
|
+
|
|
41
|
+
if not isinstance(schema_at_breadcrumb, dict):
|
|
42
|
+
raise ValueError(
|
|
43
|
+
f"Expected dictionary type instead of "
|
|
44
|
+
f"'{type(schema_at_breadcrumb).__name__}' '{schema_at_breadcrumb}' "
|
|
45
|
+
f"for '{stream_name}' bookmark '{str(breadcrumb)}' in '{schema}'"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if "properties" not in schema_at_breadcrumb:
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
for property_name, property_def in list(schema_at_breadcrumb["properties"].items()):
|
|
52
|
+
property_breadcrumb: Tuple[str, ...] = tuple(
|
|
53
|
+
list(breadcrumb) + ["properties", property_name]
|
|
54
|
+
)
|
|
55
|
+
selected = mask[property_breadcrumb]
|
|
56
|
+
if not selected:
|
|
57
|
+
schema_at_breadcrumb["properties"].pop(property_name, None)
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
if is_object_type(property_def):
|
|
61
|
+
# call recursively in case any subproperties are deselected.
|
|
62
|
+
_pop_deselected_schema(
|
|
63
|
+
schema, mask, stream_name, property_breadcrumb, logger
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def pop_deselected_record_properties(
|
|
68
|
+
record: Dict[str, Any],
|
|
69
|
+
schema: dict,
|
|
70
|
+
mask: SelectionMask,
|
|
71
|
+
logger: Logger,
|
|
72
|
+
breadcrumb: Tuple[str, ...] = (),
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Remove anything from record properties that is not selected.
|
|
75
|
+
|
|
76
|
+
Walk through properties, starting at the index in breadcrumb, recursively
|
|
77
|
+
updating in place.
|
|
78
|
+
"""
|
|
79
|
+
for property_name, val in list(record.items()):
|
|
80
|
+
property_breadcrumb = breadcrumb + ("properties", property_name)
|
|
81
|
+
selected = mask[property_breadcrumb]
|
|
82
|
+
if not selected:
|
|
83
|
+
record.pop(property_name)
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
if isinstance(val, dict):
|
|
87
|
+
# call recursively in case any subproperties are deselected.
|
|
88
|
+
pop_deselected_record_properties(
|
|
89
|
+
val, schema, mask, logger, property_breadcrumb
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def deselect_all_streams(catalog: Catalog) -> None:
|
|
94
|
+
"""Deselect all streams in catalog dictionary."""
|
|
95
|
+
for entry in catalog.streams:
|
|
96
|
+
set_catalog_stream_selected(catalog, entry.tap_stream_id, selected=False)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def set_catalog_stream_selected(
|
|
100
|
+
catalog: Catalog,
|
|
101
|
+
stream_name: str,
|
|
102
|
+
selected: bool,
|
|
103
|
+
breadcrumb: Optional[Tuple[str, ...]] = None,
|
|
104
|
+
) -> None:
|
|
105
|
+
"""Return True if the property is selected for extract.
|
|
106
|
+
|
|
107
|
+
Breadcrumb of `[]` or `None` indicates the stream itself. Otherwise, the
|
|
108
|
+
breadcrumb is the path to a property within the stream.
|
|
109
|
+
"""
|
|
110
|
+
breadcrumb = breadcrumb or ()
|
|
111
|
+
if not isinstance(breadcrumb, tuple):
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"Expected tuple value for breadcrumb '{breadcrumb}'. "
|
|
114
|
+
f"Got {type(breadcrumb).__name__}"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
catalog_entry = catalog.get_stream(stream_name)
|
|
118
|
+
if not catalog_entry:
|
|
119
|
+
raise ValueError(f"Catalog entry missing for '{stream_name}'. Skipping.")
|
|
120
|
+
|
|
121
|
+
md_entry = catalog_entry.metadata[breadcrumb]
|
|
122
|
+
md_entry.selected = selected
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# flake8: noqa
|
|
2
|
+
|
|
3
|
+
"""Defines the `classproperty` decorator."""
|
|
4
|
+
|
|
5
|
+
# noqa
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class classproperty(property):
|
|
9
|
+
"""Class property decorator."""
|
|
10
|
+
|
|
11
|
+
def __get__(self, obj, objtype=None):
|
|
12
|
+
return super().__get__(objtype)
|
|
13
|
+
|
|
14
|
+
def __set__(self, obj, value):
|
|
15
|
+
super().__set__(type(obj), value)
|
|
16
|
+
|
|
17
|
+
def __delete__(self, obj):
|
|
18
|
+
super().__delete__(type(obj))
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Compatibility helpers."""
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from typing import final
|
|
5
|
+
except ImportError:
|
|
6
|
+
# Final not available until Python3.8
|
|
7
|
+
final = lambda f: f # noqa: E731
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from importlib import metadata
|
|
11
|
+
except ImportError:
|
|
12
|
+
# Running on pre-3.8 Python; use importlib-metadata package
|
|
13
|
+
import importlib_metadata as metadata # type: ignore
|
|
14
|
+
|
|
15
|
+
__all__ = ["metadata", "final"]
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""Internal helper library for record flatteting functions."""
|
|
2
|
+
|
|
3
|
+
import collections
|
|
4
|
+
import itertools
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from copy import deepcopy
|
|
8
|
+
from typing import Any, List, Mapping, MutableMapping, NamedTuple, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
import inflection
|
|
11
|
+
|
|
12
|
+
DEFAULT_FLATTENING_SEPARATOR = "__"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FlatteningOptions(NamedTuple):
|
|
16
|
+
"""A stream map which performs the flattening role."""
|
|
17
|
+
|
|
18
|
+
max_level: int
|
|
19
|
+
flattening_enabled: bool = True
|
|
20
|
+
separator: str = DEFAULT_FLATTENING_SEPARATOR
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_flattening_options(
|
|
24
|
+
plugin_config: Mapping,
|
|
25
|
+
) -> Optional[FlatteningOptions]:
|
|
26
|
+
"""Get flattening options, if flattening is enabled.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
plugin_config: The tap or target config dictionary.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
A new FlatteningOptions object or None if flattening is disabled.
|
|
33
|
+
"""
|
|
34
|
+
if "flattening_enabled" in plugin_config and plugin_config["flattening_enabled"]:
|
|
35
|
+
return FlatteningOptions(max_level=int(plugin_config["flattening_max_depth"]))
|
|
36
|
+
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def flatten_key(key_name: str, parent_keys: List[str], separator: str = "__") -> str:
|
|
41
|
+
"""Concatenate `key_name` with its `parent_keys` using `separator`.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
key_name: The node's key.
|
|
45
|
+
parent_keys: A list of parent keys which are ancestors to this node.
|
|
46
|
+
separator: The separator used during concatenation. Defaults to "__".
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
The flattened key name as a string.
|
|
50
|
+
|
|
51
|
+
>>> flatten_key("foo", ["bar", "baz"])
|
|
52
|
+
'bar__baz__foo'
|
|
53
|
+
|
|
54
|
+
>>> flatten_key("foo", ["bar", "baz"], separator=".")
|
|
55
|
+
'bar.baz.foo'
|
|
56
|
+
"""
|
|
57
|
+
full_key = parent_keys + [key_name]
|
|
58
|
+
inflected_key = full_key.copy()
|
|
59
|
+
reducer_index = 0
|
|
60
|
+
while len(separator.join(inflected_key)) >= 255 and reducer_index < len(
|
|
61
|
+
inflected_key
|
|
62
|
+
):
|
|
63
|
+
reduced_key = re.sub(
|
|
64
|
+
r"[a-z]", "", inflection.camelize(inflected_key[reducer_index])
|
|
65
|
+
)
|
|
66
|
+
inflected_key[reducer_index] = (
|
|
67
|
+
reduced_key if len(reduced_key) > 1 else inflected_key[reducer_index][0:3]
|
|
68
|
+
).lower()
|
|
69
|
+
reducer_index += 1
|
|
70
|
+
|
|
71
|
+
return separator.join(inflected_key)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def flatten_schema(
|
|
75
|
+
schema: dict,
|
|
76
|
+
max_level: int,
|
|
77
|
+
separator: str = "__",
|
|
78
|
+
) -> dict:
|
|
79
|
+
"""Flatten the provided schema up to a depth of max_level.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
schema: The schema definition to flatten.
|
|
83
|
+
separator: The string to use when concatenating key names.
|
|
84
|
+
max_level: The max recursion level (zero-based, exclusive).
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
A flattened version of the provided schema definition.
|
|
88
|
+
|
|
89
|
+
>>> import json
|
|
90
|
+
>>> schema = {
|
|
91
|
+
... "type": "object",
|
|
92
|
+
... "properties": {
|
|
93
|
+
... "id": {
|
|
94
|
+
... "type": "string"
|
|
95
|
+
... },
|
|
96
|
+
... "foo": {
|
|
97
|
+
... "type": "object",
|
|
98
|
+
... "properties": {
|
|
99
|
+
... "bar": {
|
|
100
|
+
... "type": "object",
|
|
101
|
+
... "properties": {
|
|
102
|
+
... "baz": {
|
|
103
|
+
... "type": "object",
|
|
104
|
+
... "properties": {
|
|
105
|
+
... "qux": {
|
|
106
|
+
... "type": "string"
|
|
107
|
+
... }
|
|
108
|
+
... }
|
|
109
|
+
... }
|
|
110
|
+
... }
|
|
111
|
+
... }
|
|
112
|
+
... }
|
|
113
|
+
... }
|
|
114
|
+
... }
|
|
115
|
+
... }
|
|
116
|
+
>>> print(json.dumps(flatten_schema(schema, 0), indent=2))
|
|
117
|
+
{
|
|
118
|
+
"type": "object",
|
|
119
|
+
"properties": {
|
|
120
|
+
"id": {
|
|
121
|
+
"type": "string"
|
|
122
|
+
},
|
|
123
|
+
"foo": {
|
|
124
|
+
"type": "object",
|
|
125
|
+
"properties": {
|
|
126
|
+
"bar": {
|
|
127
|
+
"type": "object",
|
|
128
|
+
"properties": {
|
|
129
|
+
"baz": {
|
|
130
|
+
"type": "object",
|
|
131
|
+
"properties": {
|
|
132
|
+
"qux": {
|
|
133
|
+
"type": "string"
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
>>> print(json.dumps(flatten_schema(schema, 1), indent=2))
|
|
145
|
+
{
|
|
146
|
+
"type": "object",
|
|
147
|
+
"properties": {
|
|
148
|
+
"id": {
|
|
149
|
+
"type": "string"
|
|
150
|
+
},
|
|
151
|
+
"foo__bar": {
|
|
152
|
+
"type": "object",
|
|
153
|
+
"properties": {
|
|
154
|
+
"baz": {
|
|
155
|
+
"type": "object",
|
|
156
|
+
"properties": {
|
|
157
|
+
"qux": {
|
|
158
|
+
"type": "string"
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
>>> print(json.dumps(flatten_schema(schema, 2), indent=2))
|
|
168
|
+
{
|
|
169
|
+
"type": "object",
|
|
170
|
+
"properties": {
|
|
171
|
+
"id": {
|
|
172
|
+
"type": "string"
|
|
173
|
+
},
|
|
174
|
+
"foo__bar__baz": {
|
|
175
|
+
"type": "object",
|
|
176
|
+
"properties": {
|
|
177
|
+
"qux": {
|
|
178
|
+
"type": "string"
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
>>> print(json.dumps(flatten_schema(schema, 3), indent=2))
|
|
186
|
+
{
|
|
187
|
+
"type": "object",
|
|
188
|
+
"properties": {
|
|
189
|
+
"id": {
|
|
190
|
+
"type": "string"
|
|
191
|
+
},
|
|
192
|
+
"foo__bar__baz__qux": {
|
|
193
|
+
"type": "string"
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
"""
|
|
198
|
+
new_schema = deepcopy(schema)
|
|
199
|
+
new_schema["properties"] = _flatten_schema(
|
|
200
|
+
schema_node=new_schema,
|
|
201
|
+
max_level=max_level,
|
|
202
|
+
separator=separator,
|
|
203
|
+
)
|
|
204
|
+
return new_schema
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _flatten_schema(
|
|
208
|
+
schema_node: dict,
|
|
209
|
+
parent_keys: List[str] = None,
|
|
210
|
+
separator: str = "__",
|
|
211
|
+
level: int = 0,
|
|
212
|
+
max_level: int = 0,
|
|
213
|
+
) -> dict:
|
|
214
|
+
"""Flatten the provided schema node, recursively up to depth of `max_level`.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
schema_node: The schema node to flatten.
|
|
218
|
+
parent_key: The parent's key, provided as a list of node names.
|
|
219
|
+
separator: The string to use when concatenating key names.
|
|
220
|
+
level: The current recursion level (zero-based).
|
|
221
|
+
max_level: The max recursion level (zero-based, exclusive).
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
A flattened version of the provided node.
|
|
225
|
+
"""
|
|
226
|
+
if parent_keys is None:
|
|
227
|
+
parent_keys = []
|
|
228
|
+
|
|
229
|
+
items: List[Tuple[str, dict]] = []
|
|
230
|
+
if "properties" not in schema_node:
|
|
231
|
+
return {}
|
|
232
|
+
|
|
233
|
+
for k, v in schema_node["properties"].items():
|
|
234
|
+
new_key = flatten_key(k, parent_keys, separator)
|
|
235
|
+
if "type" in v.keys():
|
|
236
|
+
if "object" in v["type"] and "properties" in v and level < max_level:
|
|
237
|
+
items.extend(
|
|
238
|
+
_flatten_schema(
|
|
239
|
+
v,
|
|
240
|
+
parent_keys + [k],
|
|
241
|
+
separator=separator,
|
|
242
|
+
level=level + 1,
|
|
243
|
+
max_level=max_level,
|
|
244
|
+
).items()
|
|
245
|
+
)
|
|
246
|
+
else:
|
|
247
|
+
items.append((new_key, v))
|
|
248
|
+
else:
|
|
249
|
+
if len(v.values()) > 0:
|
|
250
|
+
if list(v.values())[0][0]["type"] == "string":
|
|
251
|
+
list(v.values())[0][0]["type"] = ["null", "string"]
|
|
252
|
+
items.append((new_key, list(v.values())[0][0]))
|
|
253
|
+
elif list(v.values())[0][0]["type"] == "array":
|
|
254
|
+
list(v.values())[0][0]["type"] = ["null", "array"]
|
|
255
|
+
items.append((new_key, list(v.values())[0][0]))
|
|
256
|
+
elif list(v.values())[0][0]["type"] == "object":
|
|
257
|
+
list(v.values())[0][0]["type"] = ["null", "object"]
|
|
258
|
+
items.append((new_key, list(v.values())[0][0]))
|
|
259
|
+
|
|
260
|
+
# Sort and check for duplicates
|
|
261
|
+
def _key_func(item):
|
|
262
|
+
return item[0] # first item is tuple is the key name.
|
|
263
|
+
|
|
264
|
+
sorted_items = sorted(items, key=_key_func)
|
|
265
|
+
for k, g in itertools.groupby(sorted_items, key=_key_func):
|
|
266
|
+
if len(list(g)) > 1:
|
|
267
|
+
raise ValueError(f"Duplicate column name produced in schema: {k}")
|
|
268
|
+
|
|
269
|
+
# Return the (unsorted) result as a dict.
|
|
270
|
+
return dict(items)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def flatten_record(
|
|
274
|
+
record: dict,
|
|
275
|
+
flattened_schema: dict,
|
|
276
|
+
max_level: int,
|
|
277
|
+
separator: str = "__",
|
|
278
|
+
) -> dict:
|
|
279
|
+
"""Flatten a record up to max_level.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
record: The record to flatten.
|
|
283
|
+
flattened_schema: The already flattened schema.
|
|
284
|
+
separator: The string used to separate concatenated key names. Defaults to "__".
|
|
285
|
+
max_level: The maximum depth of keys to flatten recursively.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
A flattened version of the record.
|
|
289
|
+
"""
|
|
290
|
+
return _flatten_record(
|
|
291
|
+
record_node=record,
|
|
292
|
+
flattened_schema=flattened_schema,
|
|
293
|
+
separator=separator,
|
|
294
|
+
max_level=max_level,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _flatten_record(
|
|
299
|
+
record_node: MutableMapping[Any, Any],
|
|
300
|
+
flattened_schema: dict = None,
|
|
301
|
+
parent_key: List[str] = None,
|
|
302
|
+
separator: str = "__",
|
|
303
|
+
level: int = 0,
|
|
304
|
+
max_level: int = 0,
|
|
305
|
+
) -> dict:
|
|
306
|
+
"""This recursive function flattens the record node.
|
|
307
|
+
|
|
308
|
+
The current invocation is expected to be at `level` and will continue recursively
|
|
309
|
+
until the provided `max_level` is reached.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
record_node: The record node to flatten.
|
|
313
|
+
flattened_schema: The already flattened full schema for the record.
|
|
314
|
+
parent_key: The parent's key, provided as a list of node names.
|
|
315
|
+
separator: The string to use when concatenating key names.
|
|
316
|
+
level: The current recursion level (zero-based).
|
|
317
|
+
max_level: The max recursion level (zero-based, exclusive).
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
A flattened version of the provided node.
|
|
321
|
+
"""
|
|
322
|
+
if parent_key is None:
|
|
323
|
+
parent_key = []
|
|
324
|
+
|
|
325
|
+
items: List[Tuple[str, Any]] = []
|
|
326
|
+
for k, v in record_node.items():
|
|
327
|
+
new_key = flatten_key(k, parent_key, separator)
|
|
328
|
+
if isinstance(v, collections.abc.MutableMapping) and level < max_level:
|
|
329
|
+
items.extend(
|
|
330
|
+
_flatten_record(
|
|
331
|
+
v,
|
|
332
|
+
flattened_schema,
|
|
333
|
+
parent_key + [k],
|
|
334
|
+
separator=separator,
|
|
335
|
+
level=level + 1,
|
|
336
|
+
max_level=max_level,
|
|
337
|
+
).items()
|
|
338
|
+
)
|
|
339
|
+
else:
|
|
340
|
+
items.append(
|
|
341
|
+
(
|
|
342
|
+
new_key,
|
|
343
|
+
json.dumps(v)
|
|
344
|
+
if _should_jsondump_value(k, v, flattened_schema)
|
|
345
|
+
else v,
|
|
346
|
+
)
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
return dict(items)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _should_jsondump_value(key: str, value: Any, flattened_schema=None) -> bool:
|
|
353
|
+
"""Return True if json.dump() should be used to serialize the value.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
key: [description]
|
|
357
|
+
value: [description]
|
|
358
|
+
schema: [description]. Defaults to None.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
[description]
|
|
362
|
+
"""
|
|
363
|
+
if isinstance(value, (dict, list)):
|
|
364
|
+
return True
|
|
365
|
+
|
|
366
|
+
if (
|
|
367
|
+
flattened_schema
|
|
368
|
+
and key in flattened_schema
|
|
369
|
+
and "type" in flattened_schema[key]
|
|
370
|
+
and set(flattened_schema[key]["type"]) == {"null", "object", "array"}
|
|
371
|
+
):
|
|
372
|
+
return True
|
|
373
|
+
|
|
374
|
+
return False
|