PyPI - omnata-plugin-runtime - Versions diffs - 0.10.7a254__tar.gz → 0.10.8a256__tar.gz - Mend

@@ -52,6 +52,9 @@ class JsonSchemaProperty(BaseModel):
     requiredStreamNames: Optional[List[str]] = Field(
         None, description="The names of the streams that are depended upon by this column, via joins. If these streams are not selected, the column will be omitted."
     )
+    referencedFields: Optional[Dict[str,List[str]]] = Field(
+        None, description="The names of fields that are referenced by this field, keyed on the stream name (or None if it's the current stream). This is used to order the fields, and also to cascade the removal of unsupported fields (e.g. in formulas)."
+    )
     @model_validator(mode='after')
     def validate(self) -> Self:
@@ -165,6 +168,9 @@ class SnowflakeViewColumn(BaseModel):
     required_stream_names: Optional[List[str]] = Field(
         default=None, description="The names of the streams that are depended upon by this column, via joins. If these streams are not selected, the column will be omitted"
     )
+    referenced_columns: Optional[Dict[str,List[str]]] = Field(
+        default=None, description="The names of columns that are referenced by this column, keyed on the stream name (or None if it's the current stream). This is used to order the columns, and also to cascade the removal of unsupported columns (e.g. in formulas)."
+    )
     def __repr__(self) -> str:
         return "SnowflakeViewColumn(name=%r, definition=%r, comment=%r)" % (
@@ -230,19 +236,23 @@ class SnowflakeViewColumn(BaseModel):
             if not json_schema_property.snowflakeColumnExpression:
                 expression=f"""{expression}::{json_schema_property.snowflake_data_type}"""
         required_stream_names = None
+        referenced_columns = None
         if json_schema_property.requiredStreamNames:
             required_stream_names = json_schema_property.requiredStreamNames
+        if json_schema_property.referencedFields:
+            referenced_columns = json_schema_property.referencedFields
         return cls(
             name=final_column_name,
             original_name=column_name,
             expression=expression,
             comment=comment,
             is_join_column=json_schema_property.isJoinColumn,
-            required_stream_names=required_stream_names
+            required_stream_names=required_stream_names,
+            referenced_columns=referenced_columns
         )
     @classmethod
-    def order_by_reference(cls,columns:List[Self]) -> List[Self]:
+    def order_by_reference(cls,current_stream_name:str,columns:List[Self]) -> List[Self]:
         """
         In some situations, column expressions may reference the alias of another column
         This is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
@@ -262,7 +272,7 @@ class SnowflakeViewColumn(BaseModel):
             for other_column in columns:
                 if column==other_column:
                     continue
-                if f'"{column.original_name}"' in other_column.expression:
+                if column.original_name in (other_column.referenced_columns or {}).get(current_stream_name,[]):
                     if column not in columns_to_move:
                         columns_to_move.append(column)
@@ -413,6 +423,12 @@ class SnowflakeViewPart(BaseModel):
     select {', '.join([c.definition(original_name=original_name) for c in self.direct_columns()])}
     from {self.raw_table_location.get_fully_qualified_name()}
 ) """
+    def columns_missing(self,columns_to_check:List[str]) -> List[str]:
+        """
+        Returns a list of columns that are missing from the view part.
+        """
+        return [c for c in columns_to_check if c not in [c.original_name for c in self.columns]]
 class SnowflakeViewParts(BaseModel):
     """
@@ -484,7 +500,7 @@ class SnowflakeViewParts(BaseModel):
             column_name_environment=column_name_environment,
             column_name_expression=column_name_expression
         )
-        joined_parts = []
+        joined_parts:List[SnowflakeViewPart] = []
         # remove the joins from the main part if they are not in the raw stream locations
         main_stream_view_part.joins = [join for join in main_stream_view_part.joins
                                        if join.join_stream_name in raw_stream_locations
@@ -499,21 +515,49 @@ class SnowflakeViewParts(BaseModel):
                 column_name_environment=column_name_environment,
                 column_name_expression=column_name_expression
             ))
-        # For each column, the plugin can advise which streams are required for the join, which comes through as required_stream_names
+        # For each column, the plugin can advise which fields (of the same stream or joined) are required for the join, which comes through as referenced_columns
         # on the SnowflakeViewColumn object.
-        # Until this generate function is called with the raw stream names, we don't know which streams the user has actually selected.
-        # So now there's a pruning process where we remove columns from the main view part that depend on streams that are not selected
-        for column in main_stream_view_part.columns:
-            if column.required_stream_names:
-                for required_stream_name in column.required_stream_names:
-                    if required_stream_name not in raw_stream_locations:
-                        logger.warning(f"Column {column.name} in stream {stream_name} requires stream {required_stream_name} to be selected, but it was not provided")
-                        main_stream_view_part.columns.remove(column)
-                        break
+        # Until this generate function is called with the raw stream names, we don't know which streams the user has actually selected, nor which
+        # fields are actually available (some may be dropped due to something like an unsupported formula).
+        # So now there's a pruning process where we remove columns that reference fields that are not available.
+        # We'll start by doing a first pass and removing unavailable columns from other streams
+        # then, we can do a final pass and remove columns that reference fields that are not available in the current stream
+        prune_count = 0
+        while prune(main_stream_view_part,joined_parts):
+            prune_count += 1
+            if prune_count > 10000:
+                raise ValueError("Pruning of columns from the view has entered an infinite loop")
         return cls(main_part=main_stream_view_part, joined_parts=joined_parts)
+def prune(view_part:SnowflakeViewPart,joined_parts:List[SnowflakeViewPart]) -> bool:
+    """
+    Prunes columns from the main view part that reference fields that are not available in the joined parts.
+    Returns True if columns were removed, False otherwise.
+    """
+    for column in view_part.columns:
+        if column.referenced_columns:
+            for referenced_stream_name, referenced_fields in column.referenced_columns.items():
+                if referenced_stream_name == view_part.stream_name:
+                    part = view_part
+                else:
+                    part = next((part for part in joined_parts if part.stream_name==referenced_stream_name),None)
+                    if part is None:
+                        logger.warning(f"Column {column.name} in stream {view_part.stream_name} references stream {referenced_stream_name}, but it was not provided")
+                        view_part.columns.remove(column)
+                        return True
+                columns_missing_from_join = part.columns_missing(referenced_fields)
+                if len(columns_missing_from_join) > 0:
+                    logger.warning(f"Column {column.name} in stream {view_part.stream_name} references fields {columns_missing_from_join} in stream {referenced_stream_name}, but they were not provided")
+                    view_part.columns.remove(column)
+                    return True
+                else:
+                    # no columns were removed, but we need to check if the columns that are referenced are not themselves referencing other missing columns
+                    return prune(part,joined_parts)
+    return False
 class JsonSchemaTopLevel(BaseModel):
     """
@@ -679,7 +723,7 @@ def normalized_view_part(
     #- APP_IDENTIFIER
     #- Direct and joined columns, ordered so that columns that reference other columns are defined after the columns they reference
     #- OMNATA_RETRIEVE_DATE, OMNATA_RAW_RECORD, OMNATA_IS_DELETED, OMNATA_RUN_ID
-    view_columns = SnowflakeViewColumn.order_by_reference(direct_view_columns +
+    view_columns = SnowflakeViewColumn.order_by_reference(stream_name,direct_view_columns +
                                                               join_view_columns)
     return SnowflakeViewPart(
         stream_name=stream_name,

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: omnata-plugin-runtime
-Version: 0.10.7a254
+Version: 0.10.8a256
 Summary: Classes and common runtime components for building and running Omnata Plugins
 Author: James Weakley
 Author-email: james.weakley@omnata.com

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "omnata-plugin-runtime"
-version = "0.10.7-a254"
+version = "0.10.8-a256"
 description = "Classes and common runtime components for building and running Omnata Plugins"
 authors = ["James Weakley <james.weakley@omnata.com>"]
 readme = "README.md"

omnata-plugin-runtime 0.10.7a254tar.gz → 0.10.8a256tar.gz

Potentially problematic release.

omnata-plugin-runtime 0.10.7a254__tar.gz → 0.10.8a256__tar.gz

Potentially problematic release.

omnata-plugin-runtime 0.10.7a254tar.gz → 0.10.8a256tar.gz