hotglue-singer-sdk 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. hotglue_singer_sdk/__init__.py +34 -0
  2. hotglue_singer_sdk/authenticators.py +554 -0
  3. hotglue_singer_sdk/cli/__init__.py +1 -0
  4. hotglue_singer_sdk/cli/common_options.py +37 -0
  5. hotglue_singer_sdk/configuration/__init__.py +1 -0
  6. hotglue_singer_sdk/configuration/_dict_config.py +101 -0
  7. hotglue_singer_sdk/exceptions.py +52 -0
  8. hotglue_singer_sdk/helpers/__init__.py +1 -0
  9. hotglue_singer_sdk/helpers/_catalog.py +122 -0
  10. hotglue_singer_sdk/helpers/_classproperty.py +18 -0
  11. hotglue_singer_sdk/helpers/_compat.py +15 -0
  12. hotglue_singer_sdk/helpers/_flattening.py +374 -0
  13. hotglue_singer_sdk/helpers/_schema.py +100 -0
  14. hotglue_singer_sdk/helpers/_secrets.py +41 -0
  15. hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
  16. hotglue_singer_sdk/helpers/_singer.py +280 -0
  17. hotglue_singer_sdk/helpers/_state.py +282 -0
  18. hotglue_singer_sdk/helpers/_typing.py +231 -0
  19. hotglue_singer_sdk/helpers/_util.py +27 -0
  20. hotglue_singer_sdk/helpers/capabilities.py +240 -0
  21. hotglue_singer_sdk/helpers/jsonpath.py +39 -0
  22. hotglue_singer_sdk/io_base.py +134 -0
  23. hotglue_singer_sdk/mapper.py +691 -0
  24. hotglue_singer_sdk/mapper_base.py +156 -0
  25. hotglue_singer_sdk/plugin_base.py +415 -0
  26. hotglue_singer_sdk/py.typed +0 -0
  27. hotglue_singer_sdk/sinks/__init__.py +14 -0
  28. hotglue_singer_sdk/sinks/batch.py +90 -0
  29. hotglue_singer_sdk/sinks/core.py +412 -0
  30. hotglue_singer_sdk/sinks/record.py +66 -0
  31. hotglue_singer_sdk/sinks/sql.py +299 -0
  32. hotglue_singer_sdk/streams/__init__.py +14 -0
  33. hotglue_singer_sdk/streams/core.py +1294 -0
  34. hotglue_singer_sdk/streams/graphql.py +74 -0
  35. hotglue_singer_sdk/streams/rest.py +611 -0
  36. hotglue_singer_sdk/streams/sql.py +1023 -0
  37. hotglue_singer_sdk/tap_base.py +580 -0
  38. hotglue_singer_sdk/target_base.py +554 -0
  39. hotglue_singer_sdk/target_sdk/__init__.py +0 -0
  40. hotglue_singer_sdk/target_sdk/auth.py +124 -0
  41. hotglue_singer_sdk/target_sdk/client.py +286 -0
  42. hotglue_singer_sdk/target_sdk/common.py +13 -0
  43. hotglue_singer_sdk/target_sdk/lambda.py +121 -0
  44. hotglue_singer_sdk/target_sdk/rest.py +108 -0
  45. hotglue_singer_sdk/target_sdk/sinks.py +16 -0
  46. hotglue_singer_sdk/target_sdk/target.py +570 -0
  47. hotglue_singer_sdk/target_sdk/target_base.py +627 -0
  48. hotglue_singer_sdk/testing.py +198 -0
  49. hotglue_singer_sdk/typing.py +603 -0
  50. hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
  51. hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
  52. hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
  53. hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,101 @@
1
+ """Helpers for parsing and wrangling configuration dictionaries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ from pathlib import Path
8
+ from typing import Any, Iterable
9
+
10
+ from dotenv import find_dotenv
11
+ from dotenv.main import DotEnv
12
+
13
+ from hotglue_singer_sdk.helpers._typing import is_string_array_type
14
+ from hotglue_singer_sdk.helpers._util import read_json_file
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def parse_environment_config(
20
+ config_schema: dict[str, Any],
21
+ prefix: str,
22
+ dotenv_path: str | None = None,
23
+ ) -> dict[str, Any]:
24
+ """Parse configuration from environment variables.
25
+
26
+ Args:
27
+ config_schema: A JSON Schema dictionary for the configuration.
28
+ prefix: Prefix for environment variables.
29
+ dotenv_path: Path to a .env file. If None, will try to find one in increasingly
30
+ higher folders.
31
+
32
+ Raises:
33
+ ValueError: If an un-parsable setting is found.
34
+
35
+ Returns:
36
+ A configuration dictionary.
37
+ """
38
+ result: dict[str, Any] = {}
39
+
40
+ if not dotenv_path:
41
+ dotenv_path = find_dotenv()
42
+
43
+ logger.debug("Loading configuration from %s", dotenv_path)
44
+ DotEnv(dotenv_path).set_as_environment_variables()
45
+
46
+ for config_key in config_schema["properties"].keys():
47
+ env_var_name = prefix + config_key.upper().replace("-", "_")
48
+ if env_var_name in os.environ:
49
+ env_var_value = os.environ[env_var_name]
50
+ logger.info(
51
+ "Parsing '%s' config from env variable '%s'.",
52
+ config_key,
53
+ env_var_name,
54
+ )
55
+ if is_string_array_type(config_schema["properties"][config_key]):
56
+ if env_var_value[0] == "[" and env_var_value[-1] == "]":
57
+ raise ValueError(
58
+ "A bracketed list was detected in the environment variable "
59
+ f"'{env_var_name}'. This syntax is no longer supported. "
60
+ "Please remove the brackets and try again."
61
+ )
62
+ result[config_key] = env_var_value.split(",")
63
+ else:
64
+ result[config_key] = env_var_value
65
+ return result
66
+
67
+
68
+ def merge_config_sources(
69
+ inputs: Iterable[str],
70
+ config_schema: dict[str, Any],
71
+ env_prefix: str,
72
+ ) -> dict[str, Any]:
73
+ """Merge configuration from multiple sources into a single dictionary.
74
+
75
+ Args:
76
+ inputs: A sequence of configuration sources (file paths or ENV).
77
+ config_schema: A JSON Schema dictionary for the configuration.
78
+ env_prefix: Prefix for environment variables.
79
+
80
+ Raises:
81
+ FileNotFoundError: If any of config files does not exist.
82
+
83
+ Returns:
84
+ A single configuration dictionary.
85
+ """
86
+ config: dict[str, Any] = {}
87
+ for config_path in inputs:
88
+ if config_path == "ENV":
89
+ env_config = parse_environment_config(config_schema, prefix=env_prefix)
90
+ config.update(env_config)
91
+ continue
92
+
93
+ if not Path(config_path).is_file():
94
+ raise FileNotFoundError(
95
+ f"Could not locate config file at '{config_path}'."
96
+ "Please check that the file exists."
97
+ )
98
+
99
+ config.update(read_json_file(config_path))
100
+
101
+ return config
@@ -0,0 +1,52 @@
1
+ """Defines a common set of exceptions which developers can raise and/or catch."""
2
+ import requests
3
+
4
+
5
+ class ConfigValidationError(Exception):
6
+ """Raised when a user's config settings fail validation."""
7
+
8
+
9
+ class FatalAPIError(Exception):
10
+ """Exception raised when a failed request should not be considered retriable."""
11
+
12
+
13
+ class InvalidStreamSortException(Exception):
14
+ """Exception to raise if sorting errors are found while syncing the records."""
15
+
16
+
17
+ class MapExpressionError(Exception):
18
+ """Failed map expression evaluation."""
19
+
20
+
21
+ class MaxRecordsLimitException(Exception):
22
+ """Exception to raise if the maximum number of allowable records is exceeded."""
23
+
24
+
25
+ class RecordsWitoutSchemaException(Exception):
26
+ """Raised if a target receives RECORD messages prior to a SCHEMA message."""
27
+
28
+
29
+ class RetriableAPIError(Exception):
30
+ """Exception raised when a failed request can be safely retried."""
31
+
32
+ def __init__(self, message: str, response: requests.Response = None) -> None:
33
+ """Extends the default with the failed response as an attribute.
34
+
35
+ Args:
36
+ message (str): The Error Message
37
+ response (requests.Response): The response object.
38
+ """
39
+ super().__init__(message)
40
+ self.response = response
41
+
42
+
43
+ class StreamMapConfigError(Exception):
44
+ """Raised when a stream map has an invalid configuration."""
45
+
46
+
47
+ class TapStreamConnectionFailure(Exception):
48
+ """Exception to raise when stream connection fails or stream is disconnected."""
49
+
50
+
51
+ class TooManyRecordsException(Exception):
52
+ """Exception to raise when query returns more records than max_records."""
@@ -0,0 +1 @@
1
+ """Helper library for the SDK."""
@@ -0,0 +1,122 @@
1
+ """Private helper functions for catalog and selection logic."""
2
+
3
+ from copy import deepcopy
4
+ from logging import Logger
5
+ from typing import Any, Dict, Optional, Tuple
6
+
7
+ from memoization import cached
8
+
9
+ from hotglue_singer_sdk.helpers._singer import Catalog, SelectionMask
10
+ from hotglue_singer_sdk.helpers._typing import is_object_type
11
+
12
+ _MAX_LRU_CACHE = 500
13
+
14
+
15
+ @cached(max_size=_MAX_LRU_CACHE)
16
+ def get_selected_schema(
17
+ stream_name: str, schema: dict, mask: SelectionMask, logger: Logger
18
+ ) -> dict:
19
+ """Return a copy of the provided JSON schema, dropping any fields not selected."""
20
+ new_schema = deepcopy(schema)
21
+ _pop_deselected_schema(new_schema, mask, stream_name, (), logger)
22
+ return new_schema
23
+
24
+
25
+ def _pop_deselected_schema(
26
+ schema: dict,
27
+ mask: SelectionMask,
28
+ stream_name: str,
29
+ breadcrumb: Tuple[str, ...],
30
+ logger: Logger,
31
+ ) -> None:
32
+ """Remove anything from schema that is not selected.
33
+
34
+ Walk through schema, starting at the index in breadcrumb, recursively updating in
35
+ place.
36
+ """
37
+ schema_at_breadcrumb = schema
38
+ for crumb in breadcrumb:
39
+ schema_at_breadcrumb = schema_at_breadcrumb.get(crumb, {})
40
+
41
+ if not isinstance(schema_at_breadcrumb, dict):
42
+ raise ValueError(
43
+ f"Expected dictionary type instead of "
44
+ f"'{type(schema_at_breadcrumb).__name__}' '{schema_at_breadcrumb}' "
45
+ f"for '{stream_name}' bookmark '{str(breadcrumb)}' in '{schema}'"
46
+ )
47
+
48
+ if "properties" not in schema_at_breadcrumb:
49
+ return
50
+
51
+ for property_name, property_def in list(schema_at_breadcrumb["properties"].items()):
52
+ property_breadcrumb: Tuple[str, ...] = tuple(
53
+ list(breadcrumb) + ["properties", property_name]
54
+ )
55
+ selected = mask[property_breadcrumb]
56
+ if not selected:
57
+ schema_at_breadcrumb["properties"].pop(property_name, None)
58
+ continue
59
+
60
+ if is_object_type(property_def):
61
+ # call recursively in case any subproperties are deselected.
62
+ _pop_deselected_schema(
63
+ schema, mask, stream_name, property_breadcrumb, logger
64
+ )
65
+
66
+
67
+ def pop_deselected_record_properties(
68
+ record: Dict[str, Any],
69
+ schema: dict,
70
+ mask: SelectionMask,
71
+ logger: Logger,
72
+ breadcrumb: Tuple[str, ...] = (),
73
+ ) -> None:
74
+ """Remove anything from record properties that is not selected.
75
+
76
+ Walk through properties, starting at the index in breadcrumb, recursively
77
+ updating in place.
78
+ """
79
+ for property_name, val in list(record.items()):
80
+ property_breadcrumb = breadcrumb + ("properties", property_name)
81
+ selected = mask[property_breadcrumb]
82
+ if not selected:
83
+ record.pop(property_name)
84
+ continue
85
+
86
+ if isinstance(val, dict):
87
+ # call recursively in case any subproperties are deselected.
88
+ pop_deselected_record_properties(
89
+ val, schema, mask, logger, property_breadcrumb
90
+ )
91
+
92
+
93
+ def deselect_all_streams(catalog: Catalog) -> None:
94
+ """Deselect all streams in catalog dictionary."""
95
+ for entry in catalog.streams:
96
+ set_catalog_stream_selected(catalog, entry.tap_stream_id, selected=False)
97
+
98
+
99
+ def set_catalog_stream_selected(
100
+ catalog: Catalog,
101
+ stream_name: str,
102
+ selected: bool,
103
+ breadcrumb: Optional[Tuple[str, ...]] = None,
104
+ ) -> None:
105
+ """Return True if the property is selected for extract.
106
+
107
+ Breadcrumb of `[]` or `None` indicates the stream itself. Otherwise, the
108
+ breadcrumb is the path to a property within the stream.
109
+ """
110
+ breadcrumb = breadcrumb or ()
111
+ if not isinstance(breadcrumb, tuple):
112
+ raise ValueError(
113
+ f"Expected tuple value for breadcrumb '{breadcrumb}'. "
114
+ f"Got {type(breadcrumb).__name__}"
115
+ )
116
+
117
+ catalog_entry = catalog.get_stream(stream_name)
118
+ if not catalog_entry:
119
+ raise ValueError(f"Catalog entry missing for '{stream_name}'. Skipping.")
120
+
121
+ md_entry = catalog_entry.metadata[breadcrumb]
122
+ md_entry.selected = selected
@@ -0,0 +1,18 @@
1
+ # flake8: noqa
2
+
3
+ """Defines the `classproperty` decorator."""
4
+
5
+ # noqa
6
+
7
+
8
+ class classproperty(property):
9
+ """Class property decorator."""
10
+
11
+ def __get__(self, obj, objtype=None):
12
+ return super().__get__(objtype)
13
+
14
+ def __set__(self, obj, value):
15
+ super().__set__(type(obj), value)
16
+
17
+ def __delete__(self, obj):
18
+ super().__delete__(type(obj))
@@ -0,0 +1,15 @@
1
+ """Compatibility helpers."""
2
+
3
+ try:
4
+ from typing import final
5
+ except ImportError:
6
+ # Final not available until Python3.8
7
+ final = lambda f: f # noqa: E731
8
+
9
+ try:
10
+ from importlib import metadata
11
+ except ImportError:
12
+ # Running on pre-3.8 Python; use importlib-metadata package
13
+ import importlib_metadata as metadata # type: ignore
14
+
15
+ __all__ = ["metadata", "final"]
@@ -0,0 +1,374 @@
1
+ """Internal helper library for record flatteting functions."""
2
+
3
+ import collections
4
+ import itertools
5
+ import json
6
+ import re
7
+ from copy import deepcopy
8
+ from typing import Any, List, Mapping, MutableMapping, NamedTuple, Optional, Tuple
9
+
10
+ import inflection
11
+
12
+ DEFAULT_FLATTENING_SEPARATOR = "__"
13
+
14
+
15
+ class FlatteningOptions(NamedTuple):
16
+ """A stream map which performs the flattening role."""
17
+
18
+ max_level: int
19
+ flattening_enabled: bool = True
20
+ separator: str = DEFAULT_FLATTENING_SEPARATOR
21
+
22
+
23
+ def get_flattening_options(
24
+ plugin_config: Mapping,
25
+ ) -> Optional[FlatteningOptions]:
26
+ """Get flattening options, if flattening is enabled.
27
+
28
+ Args:
29
+ plugin_config: The tap or target config dictionary.
30
+
31
+ Returns:
32
+ A new FlatteningOptions object or None if flattening is disabled.
33
+ """
34
+ if "flattening_enabled" in plugin_config and plugin_config["flattening_enabled"]:
35
+ return FlatteningOptions(max_level=int(plugin_config["flattening_max_depth"]))
36
+
37
+ return None
38
+
39
+
40
+ def flatten_key(key_name: str, parent_keys: List[str], separator: str = "__") -> str:
41
+ """Concatenate `key_name` with its `parent_keys` using `separator`.
42
+
43
+ Args:
44
+ key_name: The node's key.
45
+ parent_keys: A list of parent keys which are ancestors to this node.
46
+ separator: The separator used during concatenation. Defaults to "__".
47
+
48
+ Returns:
49
+ The flattened key name as a string.
50
+
51
+ >>> flatten_key("foo", ["bar", "baz"])
52
+ 'bar__baz__foo'
53
+
54
+ >>> flatten_key("foo", ["bar", "baz"], separator=".")
55
+ 'bar.baz.foo'
56
+ """
57
+ full_key = parent_keys + [key_name]
58
+ inflected_key = full_key.copy()
59
+ reducer_index = 0
60
+ while len(separator.join(inflected_key)) >= 255 and reducer_index < len(
61
+ inflected_key
62
+ ):
63
+ reduced_key = re.sub(
64
+ r"[a-z]", "", inflection.camelize(inflected_key[reducer_index])
65
+ )
66
+ inflected_key[reducer_index] = (
67
+ reduced_key if len(reduced_key) > 1 else inflected_key[reducer_index][0:3]
68
+ ).lower()
69
+ reducer_index += 1
70
+
71
+ return separator.join(inflected_key)
72
+
73
+
74
+ def flatten_schema(
75
+ schema: dict,
76
+ max_level: int,
77
+ separator: str = "__",
78
+ ) -> dict:
79
+ """Flatten the provided schema up to a depth of max_level.
80
+
81
+ Args:
82
+ schema: The schema definition to flatten.
83
+ separator: The string to use when concatenating key names.
84
+ max_level: The max recursion level (zero-based, exclusive).
85
+
86
+ Returns:
87
+ A flattened version of the provided schema definition.
88
+
89
+ >>> import json
90
+ >>> schema = {
91
+ ... "type": "object",
92
+ ... "properties": {
93
+ ... "id": {
94
+ ... "type": "string"
95
+ ... },
96
+ ... "foo": {
97
+ ... "type": "object",
98
+ ... "properties": {
99
+ ... "bar": {
100
+ ... "type": "object",
101
+ ... "properties": {
102
+ ... "baz": {
103
+ ... "type": "object",
104
+ ... "properties": {
105
+ ... "qux": {
106
+ ... "type": "string"
107
+ ... }
108
+ ... }
109
+ ... }
110
+ ... }
111
+ ... }
112
+ ... }
113
+ ... }
114
+ ... }
115
+ ... }
116
+ >>> print(json.dumps(flatten_schema(schema, 0), indent=2))
117
+ {
118
+ "type": "object",
119
+ "properties": {
120
+ "id": {
121
+ "type": "string"
122
+ },
123
+ "foo": {
124
+ "type": "object",
125
+ "properties": {
126
+ "bar": {
127
+ "type": "object",
128
+ "properties": {
129
+ "baz": {
130
+ "type": "object",
131
+ "properties": {
132
+ "qux": {
133
+ "type": "string"
134
+ }
135
+ }
136
+ }
137
+ }
138
+ }
139
+ }
140
+ }
141
+ }
142
+ }
143
+
144
+ >>> print(json.dumps(flatten_schema(schema, 1), indent=2))
145
+ {
146
+ "type": "object",
147
+ "properties": {
148
+ "id": {
149
+ "type": "string"
150
+ },
151
+ "foo__bar": {
152
+ "type": "object",
153
+ "properties": {
154
+ "baz": {
155
+ "type": "object",
156
+ "properties": {
157
+ "qux": {
158
+ "type": "string"
159
+ }
160
+ }
161
+ }
162
+ }
163
+ }
164
+ }
165
+ }
166
+
167
+ >>> print(json.dumps(flatten_schema(schema, 2), indent=2))
168
+ {
169
+ "type": "object",
170
+ "properties": {
171
+ "id": {
172
+ "type": "string"
173
+ },
174
+ "foo__bar__baz": {
175
+ "type": "object",
176
+ "properties": {
177
+ "qux": {
178
+ "type": "string"
179
+ }
180
+ }
181
+ }
182
+ }
183
+ }
184
+
185
+ >>> print(json.dumps(flatten_schema(schema, 3), indent=2))
186
+ {
187
+ "type": "object",
188
+ "properties": {
189
+ "id": {
190
+ "type": "string"
191
+ },
192
+ "foo__bar__baz__qux": {
193
+ "type": "string"
194
+ }
195
+ }
196
+ }
197
+ """
198
+ new_schema = deepcopy(schema)
199
+ new_schema["properties"] = _flatten_schema(
200
+ schema_node=new_schema,
201
+ max_level=max_level,
202
+ separator=separator,
203
+ )
204
+ return new_schema
205
+
206
+
207
+ def _flatten_schema(
208
+ schema_node: dict,
209
+ parent_keys: List[str] = None,
210
+ separator: str = "__",
211
+ level: int = 0,
212
+ max_level: int = 0,
213
+ ) -> dict:
214
+ """Flatten the provided schema node, recursively up to depth of `max_level`.
215
+
216
+ Args:
217
+ schema_node: The schema node to flatten.
218
+ parent_key: The parent's key, provided as a list of node names.
219
+ separator: The string to use when concatenating key names.
220
+ level: The current recursion level (zero-based).
221
+ max_level: The max recursion level (zero-based, exclusive).
222
+
223
+ Returns:
224
+ A flattened version of the provided node.
225
+ """
226
+ if parent_keys is None:
227
+ parent_keys = []
228
+
229
+ items: List[Tuple[str, dict]] = []
230
+ if "properties" not in schema_node:
231
+ return {}
232
+
233
+ for k, v in schema_node["properties"].items():
234
+ new_key = flatten_key(k, parent_keys, separator)
235
+ if "type" in v.keys():
236
+ if "object" in v["type"] and "properties" in v and level < max_level:
237
+ items.extend(
238
+ _flatten_schema(
239
+ v,
240
+ parent_keys + [k],
241
+ separator=separator,
242
+ level=level + 1,
243
+ max_level=max_level,
244
+ ).items()
245
+ )
246
+ else:
247
+ items.append((new_key, v))
248
+ else:
249
+ if len(v.values()) > 0:
250
+ if list(v.values())[0][0]["type"] == "string":
251
+ list(v.values())[0][0]["type"] = ["null", "string"]
252
+ items.append((new_key, list(v.values())[0][0]))
253
+ elif list(v.values())[0][0]["type"] == "array":
254
+ list(v.values())[0][0]["type"] = ["null", "array"]
255
+ items.append((new_key, list(v.values())[0][0]))
256
+ elif list(v.values())[0][0]["type"] == "object":
257
+ list(v.values())[0][0]["type"] = ["null", "object"]
258
+ items.append((new_key, list(v.values())[0][0]))
259
+
260
+ # Sort and check for duplicates
261
+ def _key_func(item):
262
+ return item[0] # first item is tuple is the key name.
263
+
264
+ sorted_items = sorted(items, key=_key_func)
265
+ for k, g in itertools.groupby(sorted_items, key=_key_func):
266
+ if len(list(g)) > 1:
267
+ raise ValueError(f"Duplicate column name produced in schema: {k}")
268
+
269
+ # Return the (unsorted) result as a dict.
270
+ return dict(items)
271
+
272
+
273
+ def flatten_record(
274
+ record: dict,
275
+ flattened_schema: dict,
276
+ max_level: int,
277
+ separator: str = "__",
278
+ ) -> dict:
279
+ """Flatten a record up to max_level.
280
+
281
+ Args:
282
+ record: The record to flatten.
283
+ flattened_schema: The already flattened schema.
284
+ separator: The string used to separate concatenated key names. Defaults to "__".
285
+ max_level: The maximum depth of keys to flatten recursively.
286
+
287
+ Returns:
288
+ A flattened version of the record.
289
+ """
290
+ return _flatten_record(
291
+ record_node=record,
292
+ flattened_schema=flattened_schema,
293
+ separator=separator,
294
+ max_level=max_level,
295
+ )
296
+
297
+
298
+ def _flatten_record(
299
+ record_node: MutableMapping[Any, Any],
300
+ flattened_schema: dict = None,
301
+ parent_key: List[str] = None,
302
+ separator: str = "__",
303
+ level: int = 0,
304
+ max_level: int = 0,
305
+ ) -> dict:
306
+ """This recursive function flattens the record node.
307
+
308
+ The current invocation is expected to be at `level` and will continue recursively
309
+ until the provided `max_level` is reached.
310
+
311
+ Args:
312
+ record_node: The record node to flatten.
313
+ flattened_schema: The already flattened full schema for the record.
314
+ parent_key: The parent's key, provided as a list of node names.
315
+ separator: The string to use when concatenating key names.
316
+ level: The current recursion level (zero-based).
317
+ max_level: The max recursion level (zero-based, exclusive).
318
+
319
+ Returns:
320
+ A flattened version of the provided node.
321
+ """
322
+ if parent_key is None:
323
+ parent_key = []
324
+
325
+ items: List[Tuple[str, Any]] = []
326
+ for k, v in record_node.items():
327
+ new_key = flatten_key(k, parent_key, separator)
328
+ if isinstance(v, collections.abc.MutableMapping) and level < max_level:
329
+ items.extend(
330
+ _flatten_record(
331
+ v,
332
+ flattened_schema,
333
+ parent_key + [k],
334
+ separator=separator,
335
+ level=level + 1,
336
+ max_level=max_level,
337
+ ).items()
338
+ )
339
+ else:
340
+ items.append(
341
+ (
342
+ new_key,
343
+ json.dumps(v)
344
+ if _should_jsondump_value(k, v, flattened_schema)
345
+ else v,
346
+ )
347
+ )
348
+
349
+ return dict(items)
350
+
351
+
352
+ def _should_jsondump_value(key: str, value: Any, flattened_schema=None) -> bool:
353
+ """Return True if json.dump() should be used to serialize the value.
354
+
355
+ Args:
356
+ key: [description]
357
+ value: [description]
358
+ schema: [description]. Defaults to None.
359
+
360
+ Returns:
361
+ [description]
362
+ """
363
+ if isinstance(value, (dict, list)):
364
+ return True
365
+
366
+ if (
367
+ flattened_schema
368
+ and key in flattened_schema
369
+ and "type" in flattened_schema[key]
370
+ and set(flattened_schema[key]["type"]) == {"null", "object", "array"}
371
+ ):
372
+ return True
373
+
374
+ return False