omnata-plugin-runtime 0.11.4a320__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnata_plugin_runtime-0.12.0/PKG-INFO +56 -0
- omnata_plugin_runtime-0.12.0/pyproject.toml +57 -0
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/json_schema.py +175 -78
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/logging.py +2 -1
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/omnata_plugin.py +136 -85
- omnata_plugin_runtime-0.11.4a320/PKG-INFO +0 -56
- omnata_plugin_runtime-0.11.4a320/pyproject.toml +0 -57
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/LICENSE +0 -0
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/README.md +0 -0
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/__init__.py +0 -0
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/api.py +0 -0
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/configuration.py +0 -0
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/forms.py +0 -0
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/plugin_entrypoints.py +0 -0
- {omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/rate_limiting.py +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: omnata-plugin-runtime
|
|
3
|
+
Version: 0.12.0
|
|
4
|
+
Summary: Classes and common runtime components for building and running Omnata Plugins
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Author: James Weakley
|
|
7
|
+
Author-email: james.weakley@omnata.com
|
|
8
|
+
Requires-Python: >=3.10,<=3.13
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Requires-Dist: annotated-types (<=0.6.0)
|
|
15
|
+
Requires-Dist: certifi (<=2025.11.12)
|
|
16
|
+
Requires-Dist: cffi (<=2.0.0)
|
|
17
|
+
Requires-Dist: charset-normalizer (<=3.4.4)
|
|
18
|
+
Requires-Dist: cryptography (<=46.0.3)
|
|
19
|
+
Requires-Dist: filelock (<=3.20.0)
|
|
20
|
+
Requires-Dist: idna (<=3.11)
|
|
21
|
+
Requires-Dist: jinja2 (>=3.1.2,<=3.1.6)
|
|
22
|
+
Requires-Dist: markupsafe (<=3.0.2)
|
|
23
|
+
Requires-Dist: numpy (<=2.3.5)
|
|
24
|
+
Requires-Dist: opentelemetry-api (<=1.38.0)
|
|
25
|
+
Requires-Dist: packaging (<=25.0)
|
|
26
|
+
Requires-Dist: pandas (<=2.3.3)
|
|
27
|
+
Requires-Dist: platformdirs (<=4.5.0)
|
|
28
|
+
Requires-Dist: protobuf (<=6.33.0)
|
|
29
|
+
Requires-Dist: pyarrow (<=21.0.0)
|
|
30
|
+
Requires-Dist: pycparser (<=2.23)
|
|
31
|
+
Requires-Dist: pydantic (>=2,<=2.12.4)
|
|
32
|
+
Requires-Dist: pydantic-core (<=2.41.5)
|
|
33
|
+
Requires-Dist: pyjwt (<=2.10.1)
|
|
34
|
+
Requires-Dist: pyopenssl (<=225.3.0)
|
|
35
|
+
Requires-Dist: pytz (<=2025.2)
|
|
36
|
+
Requires-Dist: pyyaml (<=6.0.3)
|
|
37
|
+
Requires-Dist: requests (>=2,<=2.32.5)
|
|
38
|
+
Requires-Dist: setuptools (<=80.9.0)
|
|
39
|
+
Requires-Dist: snowflake-connector-python (>=3,<4)
|
|
40
|
+
Requires-Dist: snowflake-snowpark-python (>=1.20.0,<=1.43.0)
|
|
41
|
+
Requires-Dist: snowflake-telemetry-python (<=0.5.0)
|
|
42
|
+
Requires-Dist: tenacity (>=8,<9)
|
|
43
|
+
Requires-Dist: tomlkit (<=0.13.3)
|
|
44
|
+
Requires-Dist: urllib3 (<=2.5.0)
|
|
45
|
+
Requires-Dist: wheel (<=0.45.1)
|
|
46
|
+
Requires-Dist: wrapt (<=2.0.1)
|
|
47
|
+
Description-Content-Type: text/markdown
|
|
48
|
+
|
|
49
|
+
# omnata-plugin-runtime
|
|
50
|
+
This package is a runtime dependency for [Omnata Plugins](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins).
|
|
51
|
+
|
|
52
|
+
It contains data classes, interfaces and application logic used to perform plugin operations.
|
|
53
|
+
|
|
54
|
+
For instructions on creating plugins, visit our [docs site](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins/creating-plugins).
|
|
55
|
+
|
|
56
|
+
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "omnata-plugin-runtime"
|
|
3
|
+
version = "0.12.0"
|
|
4
|
+
description = "Classes and common runtime components for building and running Omnata Plugins"
|
|
5
|
+
authors = ["James Weakley <james.weakley@omnata.com>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
packages = [{include = "omnata_plugin_runtime", from = "src"}]
|
|
8
|
+
|
|
9
|
+
[tool.poetry.dependencies]
|
|
10
|
+
python = ">=3.10, <=3.13"
|
|
11
|
+
snowflake-snowpark-python = ">=1.20.0,<=1.43.0" # latest version available on Snowflake Anaconda, but allow pinning to 1.20.0 for to_pandas_batches workaround
|
|
12
|
+
snowflake-connector-python = "^3, <=4.1.0" # latest version available on Snowflake Anaconda
|
|
13
|
+
cryptography = "<=46.0.3"
|
|
14
|
+
annotated-types = "<=0.6.0"
|
|
15
|
+
pycparser = "<=2.23"
|
|
16
|
+
filelock = "<=3.20.0"
|
|
17
|
+
pydantic-core = "<=2.41.5"
|
|
18
|
+
# had to relax some of these thanks to snowcli pinning newer versions
|
|
19
|
+
certifi = "<=2025.11.12" # latest version available on Snowflake Anaconda
|
|
20
|
+
charset-normalizer = "<=3.4.4" # latest version available on Snowflake Anaconda
|
|
21
|
+
idna = "<=3.11" # latest version available on Snowflake Anaconda
|
|
22
|
+
jinja2 = ">=3.1.2,<=3.1.6" # 3.1.6 was latest version available on Snowflake Anaconda
|
|
23
|
+
markupsafe = "<=3.0.2" # latest version available on Snowflake Anaconda
|
|
24
|
+
numpy = "<=2.3.5" # latest version available on Snowflake Anaconda
|
|
25
|
+
packaging = "<=25.0" # latest version available on Snowflake Anaconda
|
|
26
|
+
pandas = "<=2.3.3" # latest version available on Snowflake Anaconda
|
|
27
|
+
platformdirs = "<=4.5.0" # latest version available on Snowflake Anaconda
|
|
28
|
+
pydantic = "^2, <=2.12.4" # latest version available on Snowflake Anaconda
|
|
29
|
+
pyjwt = "<=2.10.1" # latest version available on Snowflake Anaconda
|
|
30
|
+
pyopenssl = "<=225.3.0" # latest version available on Snowflake Anaconda
|
|
31
|
+
pytz = "<=2025.2" # latest version available on Snowflake Anaconda
|
|
32
|
+
requests = "^2, <=2.32.5" # latest version available on Snowflake Anaconda
|
|
33
|
+
setuptools = "<=80.9.0" # latest version available on Snowflake Anaconda
|
|
34
|
+
tomlkit = "<=0.13.3" # latest version available on Snowflake Anaconda
|
|
35
|
+
tenacity = "^8, <=9.1.2" # latest version available on Snowflake Anaconda
|
|
36
|
+
urllib3 = "<=2.5.0" # latest version available on Snowflake Anaconda
|
|
37
|
+
wheel = "<=0.45.1" # latest version available on Snowflake Anaconda
|
|
38
|
+
pyyaml = "<=6.0.3" # latest version available on Snowflake Anaconda
|
|
39
|
+
cffi = "<=2.0.0" # latest version available on Snowflake Anaconda
|
|
40
|
+
pyarrow = "<=21.0.0" # latest version available on Snowflake Anaconda
|
|
41
|
+
wrapt = "<=2.0.1" # latest version available on Snowflake Anaconda
|
|
42
|
+
opentelemetry-api = "<=1.38.0" # latest version available on Snowflake Anaconda
|
|
43
|
+
snowflake-telemetry-python = "<=0.5.0" # latest version available on Snowflake Anaconda
|
|
44
|
+
protobuf = "<=6.33.0" # latest version available on Snowflake Anaconda
|
|
45
|
+
|
|
46
|
+
[tool.poetry.dev-dependencies]
|
|
47
|
+
pytest = "^6.2.4"
|
|
48
|
+
deepdiff = "^6"
|
|
49
|
+
requests-mock = ">=1.9.3"
|
|
50
|
+
|
|
51
|
+
[tool.pytest.ini_options]
|
|
52
|
+
addopts = ["--import-mode=importlib"]
|
|
53
|
+
testpaths = ["tests"]
|
|
54
|
+
|
|
55
|
+
[build-system]
|
|
56
|
+
requires = ["poetry-core"]
|
|
57
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -664,9 +664,12 @@ class SnowflakeViewParts(BaseModel):
|
|
|
664
664
|
)
|
|
665
665
|
joined_parts:List[SnowflakeViewPart] = []
|
|
666
666
|
# remove the joins from the main part if they are not in the raw stream locations
|
|
667
|
+
original_join_count = len(main_stream_view_part.joins)
|
|
667
668
|
main_stream_view_part.joins = [join for join in main_stream_view_part.joins
|
|
668
669
|
if join.join_stream_name in raw_stream_locations
|
|
669
670
|
and join.join_stream_name in stream_schemas]
|
|
671
|
+
if len(main_stream_view_part.joins) < original_join_count:
|
|
672
|
+
logger.debug(f"Removed {original_join_count - len(main_stream_view_part.joins)} joins from stream: {stream_name} due to missing raw stream locations or schemas")
|
|
670
673
|
|
|
671
674
|
for join in main_stream_view_part.joins:
|
|
672
675
|
logger.debug(f"Generating view parts for join stream: {join.join_stream_name}")
|
|
@@ -679,6 +682,8 @@ class SnowflakeViewParts(BaseModel):
|
|
|
679
682
|
column_name_expression=column_name_expression,
|
|
680
683
|
plugin_app_database=plugin_app_database
|
|
681
684
|
))
|
|
685
|
+
if len(main_stream_view_part.joins) == 0:
|
|
686
|
+
logger.debug(f"No joins found for stream: {stream_name}")
|
|
682
687
|
# For each column, the plugin can advise which fields (of the same stream or joined) are required for the join, which comes through as referenced_columns
|
|
683
688
|
# on the SnowflakeViewColumn object.
|
|
684
689
|
# Until this generate function is called with the raw stream names, we don't know which streams the user has actually selected, nor which
|
|
@@ -697,7 +702,8 @@ class SnowflakeViewParts(BaseModel):
|
|
|
697
702
|
|
|
698
703
|
# Process all joins to build the mappings
|
|
699
704
|
for part in [main_stream_view_part] + joined_parts:
|
|
700
|
-
|
|
705
|
+
joined_parts_names = [j.join_stream_name for j in part.joins]
|
|
706
|
+
logger.debug(f"Processing joins for stream: {part.stream_name} (joined streams: {joined_parts_names})")
|
|
701
707
|
# Make sure the part's stream name is in the mappings
|
|
702
708
|
if part.stream_name not in stream_to_aliases:
|
|
703
709
|
stream_to_aliases[part.stream_name] = [part.stream_name]
|
|
@@ -807,19 +813,8 @@ class SnowflakeViewParts(BaseModel):
|
|
|
807
813
|
# If we get here, no circular references were found
|
|
808
814
|
logger.debug("No circular references found")
|
|
809
815
|
|
|
810
|
-
#
|
|
811
|
-
|
|
812
|
-
# then, we can do a final pass and remove columns that reference fields that are not available in the current stream
|
|
813
|
-
|
|
814
|
-
# Now proceed with the actual pruning process
|
|
815
|
-
# First, removing unavailable columns from other streams
|
|
816
|
-
# then, we can do a final pass and remove columns that reference fields that are not available in the current stream
|
|
817
|
-
|
|
818
|
-
prune_count = 0
|
|
819
|
-
while prune(main_stream_view_part, joined_parts):
|
|
820
|
-
prune_count += 1
|
|
821
|
-
if prune_count > 10:
|
|
822
|
-
raise ValueError("Pruning of columns from the view has entered an infinite loop")
|
|
816
|
+
# Prune columns using graph-based dependency resolution (single pass)
|
|
817
|
+
prune(main_stream_view_part, joined_parts)
|
|
823
818
|
|
|
824
819
|
return cls(main_part=main_stream_view_part, joined_parts=joined_parts)
|
|
825
820
|
|
|
@@ -844,81 +839,183 @@ def find_part(view_part: SnowflakeViewPart, joined_parts: List[SnowflakeViewPart
|
|
|
844
839
|
|
|
845
840
|
def prune(view_part: SnowflakeViewPart, joined_parts: List[SnowflakeViewPart]) -> bool:
|
|
846
841
|
"""
|
|
847
|
-
Prunes columns from view parts
|
|
842
|
+
Prunes columns from view parts using graph-based dependency resolution.
|
|
848
843
|
|
|
849
|
-
|
|
850
|
-
1.
|
|
851
|
-
2.
|
|
844
|
+
Uses TopologicalSorter to:
|
|
845
|
+
1. Build a complete dependency graph of all columns across all parts
|
|
846
|
+
2. Identify "root" columns that must be kept (in main part or used in joins)
|
|
847
|
+
3. Traverse dependencies to find all transitively required columns
|
|
848
|
+
4. Remove columns that aren't needed
|
|
852
849
|
|
|
853
850
|
Returns True if any columns were removed, False otherwise.
|
|
854
|
-
Raises ValueError if a cyclic dependency is detected.
|
|
855
851
|
"""
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
852
|
+
|
|
853
|
+
all_parts = [view_part] + joined_parts
|
|
854
|
+
|
|
855
|
+
# Build column registry: (stream_name, column_name) -> column object
|
|
856
|
+
all_columns: Dict[Tuple[str, str], SnowflakeViewColumn] = {}
|
|
857
|
+
for part in all_parts:
|
|
858
|
+
for column in part.columns:
|
|
859
|
+
all_columns[(part.stream_name, column.original_name)] = column
|
|
860
|
+
|
|
861
|
+
# Build dependency graph for topological analysis
|
|
862
|
+
# Key: (stream, column), Value: list of (stream, column) dependencies
|
|
863
|
+
# Also track columns with invalid dependencies (reference non-existent columns)
|
|
864
|
+
dependency_graph: Dict[Tuple[str, str], List[Tuple[str, str]]] = {}
|
|
865
|
+
columns_with_invalid_deps: set[Tuple[str, str]] = set()
|
|
866
|
+
|
|
867
|
+
# First pass: build dependency graph and detect direct invalid references
|
|
868
|
+
for part in all_parts:
|
|
869
|
+
for column in part.columns:
|
|
870
|
+
key = (part.stream_name, column.original_name)
|
|
871
|
+
deps = []
|
|
872
|
+
has_invalid_dep = False
|
|
867
873
|
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
874
|
+
if column.referenced_columns:
|
|
875
|
+
for ref_stream_name, ref_fields in column.referenced_columns.items():
|
|
876
|
+
# Resolve stream alias to actual stream name
|
|
877
|
+
resolved_stream = ref_stream_name
|
|
878
|
+
for join in view_part.joins:
|
|
879
|
+
if join.join_stream_alias == ref_stream_name:
|
|
880
|
+
resolved_stream = join.join_stream_name
|
|
881
|
+
break
|
|
882
|
+
|
|
883
|
+
for ref_field in ref_fields:
|
|
884
|
+
dep_key = (resolved_stream, ref_field)
|
|
885
|
+
if dep_key in all_columns:
|
|
886
|
+
deps.append(dep_key)
|
|
887
|
+
else:
|
|
888
|
+
logger.warning(
|
|
889
|
+
f"Column {column.original_name} in {part.stream_name} references "
|
|
890
|
+
f"{ref_field} in {resolved_stream}, which doesn't exist"
|
|
891
|
+
)
|
|
892
|
+
has_invalid_dep = True
|
|
872
893
|
|
|
873
|
-
|
|
874
|
-
if
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
894
|
+
dependency_graph[key] = deps
|
|
895
|
+
if has_invalid_dep:
|
|
896
|
+
columns_with_invalid_deps.add(key)
|
|
897
|
+
|
|
898
|
+
# Second pass: propagate invalidity to columns that depend on invalid columns
|
|
899
|
+
# Keep iterating until no new invalid columns are found
|
|
900
|
+
changed = True
|
|
901
|
+
while changed:
|
|
902
|
+
changed = False
|
|
903
|
+
for col_key, deps in dependency_graph.items():
|
|
904
|
+
if col_key not in columns_with_invalid_deps:
|
|
905
|
+
# Check if any dependency is invalid
|
|
906
|
+
for dep_key in deps:
|
|
907
|
+
if dep_key in columns_with_invalid_deps:
|
|
908
|
+
logger.warning(
|
|
909
|
+
f"Column {col_key[1]} in {col_key[0]} depends on "
|
|
910
|
+
f"{dep_key[1]} in {dep_key[0]}, which has invalid dependencies"
|
|
911
|
+
)
|
|
912
|
+
columns_with_invalid_deps.add(col_key)
|
|
913
|
+
changed = True
|
|
914
|
+
break
|
|
915
|
+
|
|
916
|
+
# Build alias to stream mapping
|
|
917
|
+
alias_to_stream: Dict[str, str] = {}
|
|
918
|
+
for part in all_parts:
|
|
919
|
+
alias_to_stream[part.stream_name] = part.stream_name
|
|
920
|
+
for join in part.joins:
|
|
921
|
+
alias_to_stream[join.join_stream_alias] = join.join_stream_name
|
|
922
|
+
# left_alias might be an alias for a joined stream, resolve it
|
|
923
|
+
if join.left_alias not in alias_to_stream:
|
|
924
|
+
# Try to find the stream for this alias
|
|
925
|
+
for other_part in all_parts:
|
|
926
|
+
if other_part.stream_name == join.left_alias:
|
|
927
|
+
alias_to_stream[join.left_alias] = other_part.stream_name
|
|
928
|
+
break
|
|
929
|
+
|
|
930
|
+
# Identify root columns that must be kept
|
|
931
|
+
needed_columns: set[Tuple[str, str]] = set()
|
|
932
|
+
|
|
933
|
+
# 1. All columns in the main part are needed (except those with invalid dependencies)
|
|
934
|
+
for column in view_part.columns:
|
|
935
|
+
col_key = (view_part.stream_name, column.original_name)
|
|
936
|
+
if col_key not in columns_with_invalid_deps:
|
|
937
|
+
needed_columns.add(col_key)
|
|
938
|
+
|
|
939
|
+
# 2. All columns used in join conditions are needed (except those with invalid dependencies)
|
|
940
|
+
for part in all_parts:
|
|
941
|
+
for join in part.joins:
|
|
942
|
+
# Resolve left_alias to actual stream name
|
|
943
|
+
left_stream = alias_to_stream.get(join.left_alias, join.left_alias)
|
|
944
|
+
left_key = (left_stream, join.left_column)
|
|
945
|
+
right_key = (join.join_stream_name, join.join_stream_column)
|
|
946
|
+
if left_key not in columns_with_invalid_deps:
|
|
947
|
+
needed_columns.add(left_key)
|
|
948
|
+
if right_key not in columns_with_invalid_deps:
|
|
949
|
+
needed_columns.add(right_key)
|
|
950
|
+
|
|
951
|
+
logger.debug(f"Identified {len(needed_columns)} root columns to keep (excluding {len(columns_with_invalid_deps)} with invalid deps)")
|
|
952
|
+
|
|
953
|
+
# 3. Find all transitive dependencies using recursive traversal
|
|
954
|
+
# Skip columns with invalid dependencies and their dependents
|
|
955
|
+
def collect_dependencies(col_key: Tuple[str, str], visited: set[Tuple[str, str]]) -> None:
|
|
956
|
+
"""Recursively collect all columns that col_key depends on"""
|
|
957
|
+
if col_key in visited or col_key not in dependency_graph:
|
|
958
|
+
return
|
|
959
|
+
if col_key in columns_with_invalid_deps:
|
|
960
|
+
return # Don't traverse dependencies of invalid columns
|
|
961
|
+
visited.add(col_key)
|
|
962
|
+
|
|
963
|
+
for dep_key in dependency_graph[col_key]:
|
|
964
|
+
if dep_key in all_columns and dep_key not in columns_with_invalid_deps:
|
|
965
|
+
needed_columns.add(dep_key)
|
|
966
|
+
collect_dependencies(dep_key, visited)
|
|
967
|
+
|
|
968
|
+
visited_global: set[Tuple[str, str]] = set()
|
|
969
|
+
for root_col in list(needed_columns):
|
|
970
|
+
collect_dependencies(root_col, visited_global)
|
|
971
|
+
|
|
972
|
+
# Remove columns that are not needed
|
|
973
|
+
columns_removed = False
|
|
974
|
+
for part in all_parts:
|
|
975
|
+
original_count = len(part.columns)
|
|
976
|
+
removed_cols = [col for col in part.columns
|
|
977
|
+
if (part.stream_name, col.original_name) not in needed_columns]
|
|
978
|
+
|
|
979
|
+
# Log warnings for each removed column with the reason
|
|
980
|
+
for col in removed_cols:
|
|
981
|
+
# Determine why the column is being removed
|
|
982
|
+
col_key = (part.stream_name, col.original_name)
|
|
983
|
+
if col.referenced_columns:
|
|
984
|
+
# Check if any referenced columns don't exist
|
|
985
|
+
missing_refs = []
|
|
986
|
+
for ref_stream_name, ref_fields in col.referenced_columns.items():
|
|
987
|
+
resolved_stream = ref_stream_name
|
|
988
|
+
for join in view_part.joins:
|
|
989
|
+
if join.join_stream_alias == ref_stream_name:
|
|
990
|
+
resolved_stream = join.join_stream_name
|
|
991
|
+
break
|
|
992
|
+
for ref_field in ref_fields:
|
|
993
|
+
if (resolved_stream, ref_field) not in all_columns:
|
|
994
|
+
missing_refs.append(f"{ref_field} in {resolved_stream}")
|
|
885
995
|
|
|
886
|
-
|
|
887
|
-
if ref_column is None:
|
|
996
|
+
if missing_refs:
|
|
888
997
|
logger.warning(
|
|
889
|
-
f"
|
|
890
|
-
f"
|
|
998
|
+
f"Removing column {col.original_name} from {part.stream_name} because it references "
|
|
999
|
+
f"non-existent column(s): {', '.join(missing_refs)}"
|
|
891
1000
|
)
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
1001
|
+
else:
|
|
1002
|
+
# Column is not needed (not referenced by main part)
|
|
1003
|
+
logger.debug(
|
|
1004
|
+
f"Removing column {col.original_name} from {part.stream_name} because it is not "
|
|
1005
|
+
f"referenced by the main part or any join conditions"
|
|
1006
|
+
)
|
|
1007
|
+
else:
|
|
1008
|
+
logger.debug(
|
|
1009
|
+
f"Removing column {col.original_name} from {part.stream_name} because it is not "
|
|
1010
|
+
f"referenced by the main part or any join conditions"
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1013
|
+
part.columns = [col for col in part.columns
|
|
1014
|
+
if (part.stream_name, col.original_name) in needed_columns]
|
|
1015
|
+
|
|
1016
|
+
if removed_cols:
|
|
901
1017
|
columns_removed = True
|
|
902
1018
|
|
|
903
|
-
# Process joined parts
|
|
904
|
-
for joined_part in joined_parts:
|
|
905
|
-
# We have to avoid pruning columns that are referenced by joins to this stream.
|
|
906
|
-
# first, we determine all aliases for this stream (multiple join paths back to the same stream are allowed)
|
|
907
|
-
aliases_for_stream = [j.join_stream_alias for j in view_part.joins if j.join_stream_name == joined_part.stream_name]
|
|
908
|
-
# now find all joins using this stream as the join stream
|
|
909
|
-
columns_used_in_joins = [
|
|
910
|
-
j.left_column for j in view_part.joins if j.left_alias in aliases_for_stream
|
|
911
|
-
]
|
|
912
|
-
for column in joined_part.columns[:]: # Use a copy to allow safe removal
|
|
913
|
-
# First check if the column is a join column
|
|
914
|
-
if column.original_name in columns_used_in_joins:
|
|
915
|
-
# If it's a join column, we need to keep it
|
|
916
|
-
continue
|
|
917
|
-
|
|
918
|
-
if not should_keep_column(column, joined_part):
|
|
919
|
-
joined_part.columns.remove(column)
|
|
920
|
-
columns_removed = True
|
|
921
|
-
|
|
922
1019
|
return columns_removed
|
|
923
1020
|
|
|
924
1021
|
class JsonSchemaTopLevel(BaseModel):
|
|
@@ -9,9 +9,10 @@ from typing import Dict, List, Optional
|
|
|
9
9
|
from snowflake.snowpark import Session
|
|
10
10
|
from pydantic import ValidationError
|
|
11
11
|
from snowflake import telemetry
|
|
12
|
-
from opentelemetry import trace
|
|
12
|
+
from opentelemetry import trace, metrics
|
|
13
13
|
|
|
14
14
|
tracer = trace.get_tracer('omnata_plugin_runtime')
|
|
15
|
+
meter = metrics.get_meter('omnata_plugin_runtime')
|
|
15
16
|
|
|
16
17
|
class CustomLoggerAdapter(logging.LoggerAdapter):
|
|
17
18
|
"""
|
|
@@ -15,7 +15,7 @@ if tuple(sys.version_info[:2]) >= (3, 9):
|
|
|
15
15
|
else:
|
|
16
16
|
# Python 3.8 and below
|
|
17
17
|
from typing_extensions import Annotated
|
|
18
|
-
|
|
18
|
+
from dataclasses import dataclass
|
|
19
19
|
import zipfile
|
|
20
20
|
import datetime
|
|
21
21
|
import http
|
|
@@ -48,7 +48,12 @@ from snowflake.snowpark import Session
|
|
|
48
48
|
from snowflake.snowpark.functions import col
|
|
49
49
|
from tenacity import Retrying, stop_after_attempt, wait_fixed, retry_if_exception_message
|
|
50
50
|
|
|
51
|
-
from .logging import OmnataPluginLogHandler, logger, tracer
|
|
51
|
+
from .logging import OmnataPluginLogHandler, logger, tracer, meter
|
|
52
|
+
stream_duration_gauge = meter.create_gauge(
|
|
53
|
+
name="omnata.sync_run.stream_duration",
|
|
54
|
+
description="The duration of stream processing",
|
|
55
|
+
unit="s",
|
|
56
|
+
)
|
|
52
57
|
from opentelemetry import context
|
|
53
58
|
import math
|
|
54
59
|
import numpy as np
|
|
@@ -265,6 +270,29 @@ def jinja_filter(func):
|
|
|
265
270
|
func.is_jinja_filter = True
|
|
266
271
|
return func
|
|
267
272
|
|
|
273
|
+
@dataclass
|
|
274
|
+
class StateResult:
|
|
275
|
+
"""
|
|
276
|
+
Represents the current cursor state of a stream. This simple wrapper just helps us identify what type of
|
|
277
|
+
object is in the apply_results list.
|
|
278
|
+
"""
|
|
279
|
+
new_state: Any
|
|
280
|
+
|
|
281
|
+
@dataclass
|
|
282
|
+
class RecordsToUploadResult:
|
|
283
|
+
"""
|
|
284
|
+
Represents the records to upload for a stream. This simple wrapper just helps us identify what type of
|
|
285
|
+
object is in the apply_results list.
|
|
286
|
+
"""
|
|
287
|
+
records: pandas.DataFrame
|
|
288
|
+
|
|
289
|
+
@dataclass
|
|
290
|
+
class CriteriaDeleteResult:
|
|
291
|
+
"""
|
|
292
|
+
Represents the result of processing criteria deletes for a stream. This simple wrapper just helps us identify what type of
|
|
293
|
+
object is in the apply_results list.
|
|
294
|
+
"""
|
|
295
|
+
criteria_deletes: pandas.DataFrame
|
|
268
296
|
|
|
269
297
|
class SyncRequest(ABC):
|
|
270
298
|
"""
|
|
@@ -1057,7 +1085,6 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1057
1085
|
}
|
|
1058
1086
|
|
|
1059
1087
|
# These are similar to the results, but represent requests to delete records by some criteria
|
|
1060
|
-
self._apply_results_criteria_deletes: Dict[str, List[pandas.DataFrame]] = {}
|
|
1061
1088
|
self._temp_tables = {}
|
|
1062
1089
|
self._temp_table_lock = threading.Lock()
|
|
1063
1090
|
self._results_exist: Dict[
|
|
@@ -1096,7 +1123,9 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1096
1123
|
self._criteria_deletes_table_name = results_table.get_fully_qualified_criteria_deletes_table_name()
|
|
1097
1124
|
self.state_register_table_name = results_table.get_fully_qualified_state_register_table_name()
|
|
1098
1125
|
# this is keyed on stream name, each containing a list of dataframes and state updates mixed
|
|
1099
|
-
self._apply_results: Dict[str, List[
|
|
1126
|
+
self._apply_results: Dict[str, List[RecordsToUploadResult | StateResult | CriteriaDeleteResult]] = {}
|
|
1127
|
+
# track the start times of each stream, so we can calculate durations. The int is a epoch (time.time()) value
|
|
1128
|
+
self._stream_start_times: Dict[str, int] = {}
|
|
1100
1129
|
|
|
1101
1130
|
def apply_results_queue(self):
|
|
1102
1131
|
"""
|
|
@@ -1105,7 +1134,8 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1105
1134
|
logger.debug("InboundSyncRequest apply_results_queue")
|
|
1106
1135
|
if self._apply_results is not None:
|
|
1107
1136
|
with self._apply_results_lock:
|
|
1108
|
-
|
|
1137
|
+
records_to_upload:List[pandas.DataFrame] = []
|
|
1138
|
+
criteria_deletes_to_upload:List[pandas.DataFrame] = []
|
|
1109
1139
|
stream_states_for_upload:Dict[str, Dict[str, Any]] = {}
|
|
1110
1140
|
for stream_name, stream_results in self._apply_results.items():
|
|
1111
1141
|
# the stream results contains an ordered sequence of dataframes and state updates (append only)
|
|
@@ -1113,9 +1143,9 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1113
1143
|
# so first, we iterate backwards to find the last state update
|
|
1114
1144
|
last_state_index = -1
|
|
1115
1145
|
for i in range(len(stream_results) - 1, -1, -1):
|
|
1116
|
-
if isinstance(stream_results[i],
|
|
1146
|
+
if isinstance(stream_results[i], StateResult):
|
|
1117
1147
|
last_state_index = i
|
|
1118
|
-
stream_states_for_upload[stream_name] = stream_results[i]
|
|
1148
|
+
stream_states_for_upload[stream_name] = stream_results[i].new_state
|
|
1119
1149
|
break
|
|
1120
1150
|
# if there are no state updates, we can't do anything with this stream
|
|
1121
1151
|
if last_state_index == -1:
|
|
@@ -1124,56 +1154,54 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1124
1154
|
)
|
|
1125
1155
|
continue
|
|
1126
1156
|
assert isinstance(stream_states_for_upload[stream_name], dict), "Latest state must be a dictionary"
|
|
1127
|
-
# now we can take the dataframes up to the last state update
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
x for x in
|
|
1157
|
+
# now we can take the record dataframes up to the last state update
|
|
1158
|
+
results_subset = stream_results[:last_state_index]
|
|
1159
|
+
non_empty_record_dfs:List[pandas.DataFrame] = [
|
|
1160
|
+
x.records for x in results_subset
|
|
1161
|
+
if x is not None and isinstance(x, RecordsToUploadResult) and len(x.records) > 0
|
|
1131
1162
|
]
|
|
1132
1163
|
# get the total length of all the dataframes
|
|
1133
|
-
total_length = sum([len(x) for x in
|
|
1164
|
+
total_length = sum([len(x) for x in non_empty_record_dfs])
|
|
1134
1165
|
# add the count of this batch to the total for this stream
|
|
1135
1166
|
self._stream_record_counts[
|
|
1136
1167
|
stream_name
|
|
1137
1168
|
] = self._stream_record_counts[stream_name] + total_length
|
|
1138
|
-
|
|
1169
|
+
records_to_upload.extend(non_empty_record_dfs)
|
|
1170
|
+
# also handle any criteria deletes
|
|
1171
|
+
criteria_deletes_to_upload.extend([
|
|
1172
|
+
x.criteria_deletes for x in results_subset
|
|
1173
|
+
if x is not None and isinstance(x, CriteriaDeleteResult) and len(x.criteria_deletes) > 0
|
|
1174
|
+
])
|
|
1139
1175
|
# now remove everything up to the last state update
|
|
1140
1176
|
# we do this so that we don't apply the same state update multiple times
|
|
1177
|
+
# keep everything after the last state update
|
|
1141
1178
|
self._apply_results[stream_name] = stream_results[
|
|
1142
1179
|
last_state_index + 1 :
|
|
1143
|
-
]
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1180
|
+
]
|
|
1181
|
+
|
|
1182
|
+
if len(records_to_upload) > 0 or len(criteria_deletes_to_upload) > 0:
|
|
1183
|
+
if len(records_to_upload) > 0:
|
|
1184
|
+
logger.debug(
|
|
1185
|
+
f"Applying {len(records_to_upload)} batches of queued results"
|
|
1186
|
+
)
|
|
1187
|
+
# upload all cached apply results
|
|
1188
|
+
records_to_upload_combined = pandas.concat(records_to_upload)
|
|
1189
|
+
self._apply_results_dataframe(list(stream_states_for_upload.keys()), records_to_upload_combined)
|
|
1190
|
+
# now that the results have been updated, we need to insert records into the state register table
|
|
1191
|
+
# we do this by inserting the latest state for each stream
|
|
1192
|
+
if len(criteria_deletes_to_upload) > 0:
|
|
1193
|
+
logger.debug(
|
|
1194
|
+
f"Applying {len(criteria_deletes_to_upload)} batches of queued criteria deletes"
|
|
1195
|
+
)
|
|
1196
|
+
# upload all cached apply results
|
|
1197
|
+
all_criteria_deletes = pandas.concat(criteria_deletes_to_upload)
|
|
1198
|
+
self._apply_criteria_deletes_dataframe(all_criteria_deletes)
|
|
1199
|
+
|
|
1200
|
+
query_id = self._get_query_id_for_now()
|
|
1153
1201
|
self._directly_insert_to_state_register(
|
|
1154
1202
|
stream_states_for_upload, query_id=query_id
|
|
1155
1203
|
)
|
|
1156
1204
|
|
|
1157
|
-
# also take care of uploading delete requests
|
|
1158
|
-
# technically these should be managed along with the state, however there aren't any scenarios where checkpointing is done
|
|
1159
|
-
# and deletes have an impact. This is because we only checkpoint in scenarios where the target table is empty first
|
|
1160
|
-
if hasattr(self,'_apply_results_criteria_deletes') and self._apply_results_criteria_deletes is not None:
|
|
1161
|
-
with self._apply_results_lock:
|
|
1162
|
-
results:List[pandas.DataFrame] = []
|
|
1163
|
-
for stream_name, stream_results in self._apply_results_criteria_deletes.items():
|
|
1164
|
-
results.extend([
|
|
1165
|
-
x for x in stream_results if x is not None and len(x) > 0
|
|
1166
|
-
])
|
|
1167
|
-
if len(results) > 0:
|
|
1168
|
-
logger.debug(
|
|
1169
|
-
f"Applying {len(results)} batches of queued criteria deletes"
|
|
1170
|
-
)
|
|
1171
|
-
# upload all cached apply results
|
|
1172
|
-
all_dfs = pandas.concat(results)
|
|
1173
|
-
self._apply_criteria_deletes_dataframe(all_dfs)
|
|
1174
|
-
# clear the delete requests
|
|
1175
|
-
self._apply_results_criteria_deletes = {}
|
|
1176
|
-
|
|
1177
1205
|
|
|
1178
1206
|
# update the inbound stream record counts, so we can see progress
|
|
1179
1207
|
# we do this last, because marking a stream as completed will cause the sync engine to process it
|
|
@@ -1281,29 +1309,40 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1281
1309
|
if stream_name is None or len(stream_name) == 0:
|
|
1282
1310
|
raise ValueError("Stream name cannot be empty")
|
|
1283
1311
|
with self._apply_results_lock:
|
|
1284
|
-
existing_results: List[
|
|
1312
|
+
existing_results: List[RecordsToUploadResult | StateResult | CriteriaDeleteResult] = []
|
|
1285
1313
|
if stream_name in self._apply_results:
|
|
1286
1314
|
existing_results = self._apply_results[stream_name]
|
|
1287
|
-
existing_results.append(
|
|
1315
|
+
existing_results.append(RecordsToUploadResult(
|
|
1316
|
+
records=self._preprocess_results_list(stream_name, results, is_delete)
|
|
1317
|
+
))
|
|
1288
1318
|
if new_state is not None:
|
|
1289
|
-
existing_results.append(
|
|
1319
|
+
existing_results.append(
|
|
1320
|
+
StateResult(new_state=new_state)
|
|
1321
|
+
) # append the new state at the end
|
|
1290
1322
|
self._apply_results[stream_name] = existing_results
|
|
1291
|
-
# if the total size of all the dataframes exceeds 200MB, apply the results immediately
|
|
1292
|
-
# we'll use df.memory_usage(index=True) for this
|
|
1293
1323
|
if self.development_mode is False:
|
|
1294
1324
|
# note: we want to do it for all values in self._apply_results, not just the new one
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1325
|
+
self._apply_results_if_size_exceeded()
|
|
1326
|
+
|
|
1327
|
+
def _apply_results_if_size_exceeded(self,):
|
|
1328
|
+
# so first we need to get the list of lists from the dictionary values and flatten it
|
|
1329
|
+
# then we can sum the memory usage of each dataframe
|
|
1330
|
+
# if the total exceeds 200MB, we apply the results immediately
|
|
1331
|
+
all_df_lists:List[List[RecordsToUploadResult | StateResult | CriteriaDeleteResult]] = list(self._apply_results.values())
|
|
1332
|
+
# flatten
|
|
1333
|
+
all_dfs:List[pandas.DataFrame] = []
|
|
1334
|
+
for sublist in all_df_lists:
|
|
1335
|
+
for x in sublist:
|
|
1336
|
+
if isinstance(x, RecordsToUploadResult):
|
|
1337
|
+
all_dfs.append(x.records)
|
|
1338
|
+
if isinstance(x, CriteriaDeleteResult):
|
|
1339
|
+
all_dfs.append(x.criteria_deletes)
|
|
1340
|
+
combined_length = sum([len(x) for x in all_dfs])
|
|
1341
|
+
# first, don't bother if the count is less than 10000, since it's unlikely to be even close
|
|
1342
|
+
if combined_length > 10000:
|
|
1343
|
+
if sum([x.memory_usage(index=True).sum() for x in all_dfs]) > 200000000:
|
|
1344
|
+
logger.debug(f"Applying results queue immediately due to combined dataframe size")
|
|
1345
|
+
self.apply_results_queue()
|
|
1307
1346
|
|
|
1308
1347
|
def delete_by_criteria(self, stream_name: str, criteria: Dict[str, Any]):
|
|
1309
1348
|
"""
|
|
@@ -1329,27 +1368,22 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1329
1368
|
logger.debug(
|
|
1330
1369
|
f"Enqueuing {len(criteria)} delete criteria for stream {stream_name} for upload"
|
|
1331
1370
|
)
|
|
1332
|
-
existing_results: List[
|
|
1333
|
-
if stream_name in self.
|
|
1334
|
-
existing_results = self.
|
|
1335
|
-
existing_results.append(
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1371
|
+
existing_results: List[RecordsToUploadResult | StateResult | CriteriaDeleteResult] = []
|
|
1372
|
+
if stream_name in self._apply_results:
|
|
1373
|
+
existing_results = self._apply_results[stream_name]
|
|
1374
|
+
existing_results.append(
|
|
1375
|
+
CriteriaDeleteResult(
|
|
1376
|
+
criteria_deletes=pandas.DataFrame([{"STREAM_NAME":stream_name,"DELETE_CRITERIA": criteria}])))
|
|
1377
|
+
self._apply_results[stream_name] = existing_results
|
|
1339
1378
|
if self.development_mode is False:
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
# first, don't both if the count is less than 10000, since it's unlikely to be even close
|
|
1349
|
-
if combined_length > 10000:
|
|
1350
|
-
if sum([x.memory_usage(index=True).sum() for x in all_dfs if isinstance(x, pandas.DataFrame)]) > 200000000:
|
|
1351
|
-
logger.debug(f"Applying criteria deletes queue immediately due to combined dataframe size")
|
|
1352
|
-
self.apply_results_queue()
|
|
1379
|
+
self._apply_results_if_size_exceeded()
|
|
1380
|
+
|
|
1381
|
+
def mark_stream_started(self, stream_name: str):
|
|
1382
|
+
"""
|
|
1383
|
+
Marks a stream as started, this is called automatically per stream when using @managed_inbound_processing.
|
|
1384
|
+
"""
|
|
1385
|
+
logger.debug(f"Marking stream {stream_name} as started locally")
|
|
1386
|
+
self._stream_start_times[stream_name] = time.time()
|
|
1353
1387
|
|
|
1354
1388
|
def mark_stream_complete(self, stream_name: str):
|
|
1355
1389
|
"""
|
|
@@ -1357,6 +1391,20 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1357
1391
|
If @managed_inbound_processing is not used, call this whenever a stream has finished recieving records.
|
|
1358
1392
|
"""
|
|
1359
1393
|
logger.debug(f"Marking stream {stream_name} as completed locally")
|
|
1394
|
+
if stream_name in self._stream_start_times:
|
|
1395
|
+
start_time = self._stream_start_times[stream_name]
|
|
1396
|
+
duration = time.time() - start_time
|
|
1397
|
+
stream_duration_gauge.set(
|
|
1398
|
+
amount=duration,
|
|
1399
|
+
attributes={
|
|
1400
|
+
"stream_name": stream_name,
|
|
1401
|
+
"sync_run_id": str(self._run_id),
|
|
1402
|
+
"sync_id": str(self._sync_id),
|
|
1403
|
+
"branch_name": str(self._branch_name) if self._branch_name is not None else 'main',
|
|
1404
|
+
"sync_direction": "inbound",
|
|
1405
|
+
"plugin_id": self.plugin_instance.get_manifest().plugin_id,
|
|
1406
|
+
},
|
|
1407
|
+
)
|
|
1360
1408
|
with self._apply_results_lock:
|
|
1361
1409
|
self._completed_streams.append(stream_name)
|
|
1362
1410
|
# dedup just in case it's called twice
|
|
@@ -1463,7 +1511,7 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1463
1511
|
logger.debug(f"Failure to convert inbound data: {str(exception)}")
|
|
1464
1512
|
return data
|
|
1465
1513
|
|
|
1466
|
-
def _preprocess_results_list(self, stream_name: str, results: List[Dict],is_delete:Union[bool,List[bool]]):
|
|
1514
|
+
def _preprocess_results_list(self, stream_name: str, results: List[Dict],is_delete:Union[bool,List[bool]]) -> pandas.DataFrame:
|
|
1467
1515
|
"""
|
|
1468
1516
|
Creates a dataframe from the enqueued list, ready to upload.
|
|
1469
1517
|
The result is a dataframe contain all (and only):
|
|
@@ -1608,7 +1656,7 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1608
1656
|
hash_object = hashlib.sha256(key_string.encode())
|
|
1609
1657
|
return hash_object.hexdigest()
|
|
1610
1658
|
|
|
1611
|
-
def _apply_results_dataframe(self, stream_names: List[str], results_df: pandas.DataFrame)
|
|
1659
|
+
def _apply_results_dataframe(self, stream_names: List[str], results_df: pandas.DataFrame):
|
|
1612
1660
|
"""
|
|
1613
1661
|
Applies results for an inbound sync. The results are staged into a temporary
|
|
1614
1662
|
table in Snowflake, so that we can make an atomic commit at the end.
|
|
@@ -1635,7 +1683,6 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1635
1683
|
raise ValueError(
|
|
1636
1684
|
f"Failed to write results to table {self._full_results_table_name}"
|
|
1637
1685
|
)
|
|
1638
|
-
query_id = self._get_query_id_for_now()
|
|
1639
1686
|
logger.debug(
|
|
1640
1687
|
f"Wrote {nrows} rows and {nchunks} chunks to table {self._full_results_table_name}"
|
|
1641
1688
|
)
|
|
@@ -1648,7 +1695,6 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1648
1695
|
# )
|
|
1649
1696
|
for stream_name in stream_names:
|
|
1650
1697
|
self._results_exist[stream_name] = True
|
|
1651
|
-
return query_id
|
|
1652
1698
|
else:
|
|
1653
1699
|
logger.debug("Results dataframe is empty, not applying")
|
|
1654
1700
|
|
|
@@ -2330,6 +2376,11 @@ def __managed_inbound_processing_worker(
|
|
|
2330
2376
|
try:
|
|
2331
2377
|
stream: StoredStreamConfiguration = streams_queue.get_nowait()
|
|
2332
2378
|
logger.debug(f"stream returned from queue: {stream}")
|
|
2379
|
+
sync_request: InboundSyncRequest = cast(
|
|
2380
|
+
InboundSyncRequest, plugin_class_obj._sync_request
|
|
2381
|
+
) # pylint: disable=protected-access
|
|
2382
|
+
if stream.stream_name not in sync_request._stream_start_times:
|
|
2383
|
+
sync_request.mark_stream_started(stream.stream_name)
|
|
2333
2384
|
# restore the first argument, was originally the dataframe/generator but now it's the appropriately sized dataframe
|
|
2334
2385
|
try:
|
|
2335
2386
|
with tracer.start_as_current_span("managed_inbound_processing") as managed_inbound_processing_span:
|
|
@@ -2341,7 +2392,7 @@ def __managed_inbound_processing_worker(
|
|
|
2341
2392
|
logger.info(f"worker {worker_index} requested that {stream.stream_name} be not marked as complete")
|
|
2342
2393
|
else:
|
|
2343
2394
|
logger.info(f"worker {worker_index} marking stream {stream.stream_name} as complete")
|
|
2344
|
-
|
|
2395
|
+
sync_request.mark_stream_complete(stream.stream_name)
|
|
2345
2396
|
except InterruptedWhileWaitingException:
|
|
2346
2397
|
# If an inbound run is cancelled while waiting for rate limiting, this should mean that
|
|
2347
2398
|
# the cancellation is handled elsewhere, so we don't need to do anything special here other than stop waiting
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: omnata-plugin-runtime
|
|
3
|
-
Version: 0.11.4a320
|
|
4
|
-
Summary: Classes and common runtime components for building and running Omnata Plugins
|
|
5
|
-
License-File: LICENSE
|
|
6
|
-
Author: James Weakley
|
|
7
|
-
Author-email: james.weakley@omnata.com
|
|
8
|
-
Requires-Python: >=3.8,<=3.11
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Requires-Dist: annotated-types (<=0.6.0)
|
|
15
|
-
Requires-Dist: certifi (<=2024.8.30)
|
|
16
|
-
Requires-Dist: cffi (<=1.16.0)
|
|
17
|
-
Requires-Dist: charset-normalizer (<=3.3.2)
|
|
18
|
-
Requires-Dist: cryptography (<=43.0.0)
|
|
19
|
-
Requires-Dist: filelock (<=3.13.1)
|
|
20
|
-
Requires-Dist: idna (<=3.7)
|
|
21
|
-
Requires-Dist: jinja2 (>=3.1.2,<=3.1.4)
|
|
22
|
-
Requires-Dist: markupsafe (<=2.1.3)
|
|
23
|
-
Requires-Dist: numpy (<=2.1.3)
|
|
24
|
-
Requires-Dist: opentelemetry-api (<=1.23.0)
|
|
25
|
-
Requires-Dist: packaging (<=24.1)
|
|
26
|
-
Requires-Dist: pandas (<=2.2.3)
|
|
27
|
-
Requires-Dist: platformdirs (<=3.10.0)
|
|
28
|
-
Requires-Dist: protobuf (<=4.25.3)
|
|
29
|
-
Requires-Dist: pyarrow (<=16.1.0)
|
|
30
|
-
Requires-Dist: pycparser (<=2.21)
|
|
31
|
-
Requires-Dist: pydantic (>=2,<=2.8.2)
|
|
32
|
-
Requires-Dist: pydantic-core (<=2.21.0)
|
|
33
|
-
Requires-Dist: pyjwt (<=2.8.0)
|
|
34
|
-
Requires-Dist: pyopenssl (<=24.2.1)
|
|
35
|
-
Requires-Dist: pytz (<=2024.1)
|
|
36
|
-
Requires-Dist: pyyaml (<=6.0.1)
|
|
37
|
-
Requires-Dist: requests (>=2,<=2.32.3)
|
|
38
|
-
Requires-Dist: setuptools (<=72.1.0)
|
|
39
|
-
Requires-Dist: snowflake-connector-python (>=3,<=3.12.0)
|
|
40
|
-
Requires-Dist: snowflake-snowpark-python (>=1.20.0,<=1.24.0)
|
|
41
|
-
Requires-Dist: snowflake-telemetry-python (<=0.5.0)
|
|
42
|
-
Requires-Dist: tenacity (>=8,<=8.2.3)
|
|
43
|
-
Requires-Dist: tomlkit (<=0.11.1)
|
|
44
|
-
Requires-Dist: urllib3 (<=2.2.2)
|
|
45
|
-
Requires-Dist: wheel (<=0.43.0)
|
|
46
|
-
Requires-Dist: wrapt (<=1.14.1)
|
|
47
|
-
Description-Content-Type: text/markdown
|
|
48
|
-
|
|
49
|
-
# omnata-plugin-runtime
|
|
50
|
-
This package is a runtime dependency for [Omnata Plugins](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins).
|
|
51
|
-
|
|
52
|
-
It contains data classes, interfaces and application logic used to perform plugin operations.
|
|
53
|
-
|
|
54
|
-
For instructions on creating plugins, visit our [docs site](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins/creating-plugins).
|
|
55
|
-
|
|
56
|
-
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
[tool.poetry]
|
|
2
|
-
name = "omnata-plugin-runtime"
|
|
3
|
-
version = "0.11.4-a320"
|
|
4
|
-
description = "Classes and common runtime components for building and running Omnata Plugins"
|
|
5
|
-
authors = ["James Weakley <james.weakley@omnata.com>"]
|
|
6
|
-
readme = "README.md"
|
|
7
|
-
packages = [{include = "omnata_plugin_runtime", from = "src"}]
|
|
8
|
-
|
|
9
|
-
[tool.poetry.dependencies]
|
|
10
|
-
python = ">=3.8, <=3.11"
|
|
11
|
-
snowflake-snowpark-python = ">=1.20.0,<=1.24.0" # latest version available on Snowflake Anaconda, but allow pinning to 1.20.0 for to_pandas_batches workaround
|
|
12
|
-
snowflake-connector-python = "^3, <=3.12.0" # latest version available on Snowflake Anaconda
|
|
13
|
-
cryptography = "<=43.0.0"
|
|
14
|
-
annotated-types = "<=0.6.0"
|
|
15
|
-
pycparser = "<=2.21"
|
|
16
|
-
filelock = "<=3.13.1"
|
|
17
|
-
pydantic-core = "<=2.21.0"
|
|
18
|
-
# had to relax some of these thanks to snowcli pinning newer versions
|
|
19
|
-
certifi = "<=2024.8.30" # latest version available on Snowflake Anaconda
|
|
20
|
-
charset-normalizer = "<=3.3.2" # latest version available on Snowflake Anaconda
|
|
21
|
-
idna = "<=3.7" # latest version available on Snowflake Anaconda
|
|
22
|
-
jinja2 = ">=3.1.2,<=3.1.4" # 3.1.4 was latest version available on Snowflake Anaconda
|
|
23
|
-
markupsafe = "<=2.1.3" # latest version available on Snowflake Anaconda
|
|
24
|
-
numpy = "<=2.1.3" # latest version available on Snowflake Anaconda
|
|
25
|
-
packaging = "<=24.1" # latest version available on Snowflake Anaconda
|
|
26
|
-
pandas = "<=2.2.3" # latest version available on Snowflake Anaconda
|
|
27
|
-
platformdirs = "<=3.10.0" # latest version available on Snowflake Anaconda
|
|
28
|
-
pydantic = "^2, <=2.8.2" # latest version available on Snowflake Anaconda
|
|
29
|
-
pyjwt = "<=2.8.0" # latest version available on Snowflake Anaconda
|
|
30
|
-
pyopenssl = "<=24.2.1" # latest version available on Snowflake Anaconda
|
|
31
|
-
pytz = "<=2024.1" # latest version available on Snowflake Anaconda
|
|
32
|
-
requests = "^2, <=2.32.3" # latest version available on Snowflake Anaconda
|
|
33
|
-
setuptools = "<=72.1.0" # latest version available on Snowflake Anaconda
|
|
34
|
-
tomlkit = "<=0.11.1" # latest version available on Snowflake Anaconda
|
|
35
|
-
tenacity = "^8, <=8.2.3" # latest version available on Snowflake Anaconda
|
|
36
|
-
urllib3 = "<=2.2.2" # latest version available on Snowflake Anaconda
|
|
37
|
-
wheel = "<=0.43.0" # latest version available on Snowflake Anaconda
|
|
38
|
-
pyyaml = "<=6.0.1" # latest version available on Snowflake Anaconda
|
|
39
|
-
cffi = "<=1.16.0" # latest version available on Snowflake Anaconda
|
|
40
|
-
pyarrow = "<=16.1.0" # latest version available on Snowflake Anaconda
|
|
41
|
-
wrapt = "<=1.14.1" # latest version available on Snowflake Anaconda
|
|
42
|
-
opentelemetry-api = "<=1.23.0" # latest version available on Snowflake Anaconda
|
|
43
|
-
snowflake-telemetry-python = "<=0.5.0" # latest version available on Snowflake Anaconda
|
|
44
|
-
protobuf = "<=4.25.3" # latest version available on Snowflake Anaconda
|
|
45
|
-
|
|
46
|
-
[tool.poetry.dev-dependencies]
|
|
47
|
-
pytest = "^6.2.4"
|
|
48
|
-
deepdiff = "^6"
|
|
49
|
-
requests-mock = ">=1.9.3"
|
|
50
|
-
|
|
51
|
-
[tool.pytest.ini_options]
|
|
52
|
-
addopts = ["--import-mode=importlib"]
|
|
53
|
-
testpaths = ["tests"]
|
|
54
|
-
|
|
55
|
-
[build-system]
|
|
56
|
-
requires = ["poetry-core"]
|
|
57
|
-
build-backend = "poetry.core.masonry.api"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/api.py
RENAMED
|
File without changes
|
|
File without changes
|
{omnata_plugin_runtime-0.11.4a320 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/forms.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|