PyPI - omnata-plugin-runtime - Versions diffs - 0.9.1a210__tar.gz → 0.9.1a211__tar.gz - Mend

omnata-plugin-runtime 0.9.1a210tar.gz → 0.9.1a211tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{omnata_plugin_runtime-0.9.1a210 → omnata_plugin_runtime-0.9.1a211}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: omnata-plugin-runtime
-Version: 0.9.1a210
+Version: 0.9.1a211
 Summary: Classes and common runtime components for building and running Omnata Plugins
 Author: James Weakley
 Author-email: james.weakley@omnata.com

{omnata_plugin_runtime-0.9.1a210 → omnata_plugin_runtime-0.9.1a211}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "omnata-plugin-runtime"
-version = "0.9.1-a210"
+version = "0.9.1-a211"
 description = "Classes and common runtime components for building and running Omnata Plugins"
 authors = ["James Weakley <james.weakley@omnata.com>"]
 readme = "README.md"

{omnata_plugin_runtime-0.9.1a210 → omnata_plugin_runtime-0.9.1a211}/src/omnata_plugin_runtime/json_schema.py RENAMED Viewed

@@ -207,13 +207,35 @@ class SnowflakeViewColumn(BaseModel):
             expression=f"""TO_{timestamp_type}({expression}::varchar,'{timestamp_format}')"""
         else:
             if not json_schema_property.snowflakeColumnExpression:
-                expression=f"""{expression}::{json_schema_property.type}"""
+                expression=f"""{expression}::{json_schema_property.snowflake_data_type}"""
         return cls(
             name=final_column_name,
             expression=expression,
             comment=comment,
             is_join_column=json_schema_property.isJoinColumn,
         )
+    @classmethod
+    def order_by_reference(cls,join_columns:List[Self]) -> List[Self]:
+        """
+        In some situations, column expressions may reference the alias of another column
+        This is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
+        So we need to sort the columns so that if the name of the column appears (in quotes) in the expression of another column, it is ordered first
+        """
+        # Collect columns to be moved
+        columns_to_move:List[Self] = []
+        for column in join_columns:
+            for other_column in join_columns:
+                if f'"{column.name}"' in other_column.expression:
+                    if column not in columns_to_move:
+                        columns_to_move.append(column)
+        # Move collected columns to the front
+        for column in columns_to_move:
+            join_columns.remove(column)
+            join_columns.insert(0, column)
+        return join_columns
 class SnowflakeViewJoin(BaseModel):
@@ -259,11 +281,41 @@ class SnowflakeViewJoin(BaseModel):
 ON "{self.left_alias}"."{self.left_column}" = "{self.join_stream_alias}"."{self.join_stream_column}" """
-class SnowflakeViewParts(BaseModel):
+class FullyQualifiedTable(BaseModel):
     """
-    Represents the definition of a Snowflake normalized view.
+    Represents a fully qualified table name in Snowflake, including database, schema, and table name.
+    This is not a template, it's a fully specified object.
     """
+    database_name: Optional[str] = Field(default=None, description="The database name")
+    schema_name: str = Field(..., description="The schema name")
+    table_name: str = Field(..., description="The table name")
+    def get_fully_qualified_name(self, table_override: Optional[str] = None) -> str:
+        """
+        If table_override is provided, it will be used instead of the table name
+        """
+        actual_table_name = (
+            self.table_name if table_override is None else table_override
+        )
+        # We try to make this resilient to quoting
+        schema_name = self.schema_name.replace('"', "")
+        table_name = actual_table_name.replace('"', "")
+        if self.database_name is None or self.database_name == "":
+            return f'"{schema_name}"."{table_name}"'
+        database_name = self.database_name.replace('"', "")
+        return f'"{database_name}"."{schema_name}"."{table_name}"'
+class SnowflakeViewPart(BaseModel):
+    """
+    Represents a stream within a normalized view.
+    Because a normalized view can be built from multiple streams, this is potentially only part of the view.
+    """
+    stream_name: str = Field(..., description="The name of the stream")
+    raw_table_location: FullyQualifiedTable = Field(
+        ..., description="The location of the raw table that the stream is sourced from"
+    )
     comment: Optional[str] = Field(
         None, description="The comment to assign to the view"
     )
@@ -284,7 +336,7 @@ class SnowflakeViewParts(BaseModel):
         """
         Returns the columns that are sourced from joins.
         """
-        return [c for c in self.columns if c.is_join_column]
+        return SnowflakeViewColumn.order_by_reference([c for c in self.columns if c.is_join_column])
     def comment_clause(self) -> str:
         """
@@ -298,31 +350,86 @@ class SnowflakeViewParts(BaseModel):
         return [
             c.name_with_comment() for c in (self.direct_columns() + self.join_columns())
         ]
+    def cte_text(self) -> str:
+        """
+        Returns the CTE text for this view part.
+        """
+        return f""" "{self.stream_name}" as (
+    select {', '.join([c.definition() for c in self.direct_columns()])}
+    from {self.raw_table_location.get_fully_qualified_name()}
+) """
-class FullyQualifiedTable(BaseModel):
+class SnowflakeViewParts(BaseModel):
     """
-    Represents a fully qualified table name in Snowflake, including database, schema, and table name.
-    This is not a template, it's a fully specified object.
+    Represents a set of streams within a normalized view.
+    This is the top level object that represents the whole view.
     """
-    database_name: Optional[str] = Field(default=None, description="The database name")
-    schema_name: str = Field(..., description="The schema name")
-    table_name: str = Field(..., description="The table name")
+    main_part: SnowflakeViewPart = Field(
+        ..., description="The main part of the view, which is the stream that the view is named after"
+    )
+    joined_parts: List[SnowflakeViewPart] = Field(
+        ..., description="The other streams that are joined to the main stream"
+    )
-    def get_fully_qualified_name(self, table_override: Optional[str] = None) -> str:
+    def view_body(self):
         """
-        If table_override is provided, it will be used instead of the table name
+        Creates a view definition from the parts
         """
-        actual_table_name = (
-            self.table_name if table_override is None else table_override
+        ctes = [self.main_part.cte_text()] + [part.cte_text() for part in self.joined_parts]
+        all_ctes = "\n,".join(ctes)
+        join_columns = self.main_part.join_columns()
+        join_column_clauses = [c.definition() for c in join_columns]
+        # we select * from the original view (in the CTE) and then add any expressions that come from the join columns
+        final_column_clauses = [f'"{self.main_part.stream_name}".*'] + join_column_clauses
+        view_body = f"""with {all_ctes}
+    select {', '.join(final_column_clauses)}
+    from "{self.main_part.stream_name}" """
+        if len(self.main_part.joins) > 0:
+            join_clauses = [join.definition() for join in self.main_part.joins]
+            view_body += "\n" + ("\n".join(join_clauses))
+        return view_body
+    @classmethod
+    def generate(cls,
+        raw_stream_locations: Dict[str,FullyQualifiedTable],
+        stream_schemas: Dict[str,Dict],
+        stream_name: str,
+        include_default_columns: bool = True,
+        column_name_environment: Environment = Environment(),
+        column_name_expression: str = "{{column_name}}"
+    ) -> Self:
+        """
+        Returns the building blocks required to create a normalized view from a stream.
+        This includes any joins that are required, via CTEs.
+        """
+        # we start with the view parts for the view we are building
+        main_stream_view_part = normalized_view_part(
+            stream_name=stream_name,
+            raw_table_location=raw_stream_locations[stream_name],
+            include_default_columns=include_default_columns,
+            stream_schema=stream_schemas.get(stream_name),
+            column_name_environment=column_name_environment,
+            column_name_expression=column_name_expression
         )
-        # We try to make this resilient to quoting
-        schema_name = self.schema_name.replace('"', "")
-        table_name = actual_table_name.replace('"', "")
-        if self.database_name is None or self.database_name == "":
-            return f'"{schema_name}"."{table_name}"'
-        database_name = self.database_name.replace('"', "")
-        return f'"{database_name}"."{schema_name}"."{table_name}"'
+        joined_parts = []
+        for join in main_stream_view_part.joins:
+            if join.join_stream_name not in raw_stream_locations:
+                raise ValueError(f"Stream {join.join_stream_name} is required as a join for stream {stream_name}, but its location was not provided")
+            if join.join_stream_name not in stream_schemas:
+                raise ValueError(f"Stream {join.join_stream_name} is required as a join for stream {stream_name}, but its schema was not provided")
+            joined_parts.append(normalized_view_part(
+                stream_name=join.join_stream_name,
+                raw_table_location=raw_stream_locations[join.join_stream_name],
+                include_default_columns=include_default_columns,
+                stream_schema=stream_schemas[join.join_stream_name],
+                column_name_environment=column_name_environment,
+                column_name_expression=column_name_expression
+            ))
+        return cls(main_part=main_stream_view_part, joined_parts=joined_parts)
 class JsonSchemaTopLevel(BaseModel):
     """
@@ -341,9 +448,9 @@ class JsonSchemaTopLevel(BaseModel):
     )
     def build_view_columns(self,
-                            column_name_environment: Environment = Environment(),
-                            column_name_expression: str = "{{column_name}}"
-                           ) -> List[SnowflakeViewColumn]:
+            column_name_environment: Environment,
+            column_name_expression: str
+        ) -> List[SnowflakeViewColumn]:
         """
         Returns a list of column definitions from a json schema
         """
@@ -413,10 +520,14 @@ class JsonSchemaTopLevel(BaseModel):
         )]
-def normalized_view_parts(
+def normalized_view_part(
+    stream_name:str,
+    raw_table_location:FullyQualifiedTable,
     include_default_columns: bool,
+    column_name_environment: Environment,
+    column_name_expression: str,
     stream_schema: Optional[Dict] = None,
-) -> SnowflakeViewParts:
+) -> SnowflakeViewPart:
     """
     Returns an object containing:
     - A top level comment for the view
@@ -461,78 +572,14 @@ def normalized_view_parts(
             )
         )
     json_schema = JsonSchemaTopLevel.model_validate(stream_schema)
-    return SnowflakeViewParts(
-        columns=snowflake_columns + json_schema.build_view_columns(),
+    return SnowflakeViewPart(
+        stream_name=stream_name,
+        raw_table_location=raw_table_location,
+        columns=snowflake_columns + json_schema.build_view_columns(
+            column_name_environment=column_name_environment,
+            column_name_expression=column_name_expression
+        ),
         joins=json_schema.joins or [],
         comment=json_schema.description
     )
-def normalized_view_body(
-    stream_locations: Dict[str,FullyQualifiedTable],
-    stream_schemas: Dict[str,Dict],
-    stream_name: str,
-    include_default_columns: bool = True,
-) -> str:
-    """
-    Returns the SQL for the body of a normalized view.
-    Because views are created over raw data (potentially several joined raw tables), we have
-    to pass in the locations of those raw tables, keyed by stream name.
-    The stream schema is also passed in, keyed by stream name, and used to build the columns and joins.
-    """
-    main_stream_raw_table_name_quoted = stream_locations[stream_name].get_fully_qualified_name()
-    # we start with the view parts for the view we are building
-    main_stream_view_part = normalized_view_parts(
-        include_default_columns=include_default_columns,
-        stream_schema=stream_schemas.get(stream_name)
-    )
-    # we use a CTE because we may need to use aliases in the joins
-    main_stream_cte = f""" "{stream_name}" as (
-    select {', '.join([c.definition() for c in main_stream_view_part.direct_columns()])}
-    from {main_stream_raw_table_name_quoted}
-) """
-    ctes = [main_stream_cte]
-    # we also use CTEs that recreate the views that the joins reference.
-    # the reason for this is that we can't rely on the view being there,
-    # and it's also possible that they reference each other
-    for join in main_stream_view_part.joins:
-        join_view_part = normalized_view_parts(
-            include_default_columns=include_default_columns,
-            stream_schema=stream_schemas.get(join.join_stream_name)
-        )
-        join_stream_raw_table_name_quoted = stream_locations[stream_name].get_fully_qualified_name()
-        join_view_cte = f""" "{join.join_stream_name}" as (
-        select {', '.join([c.definition() for c in join_view_part.direct_columns()])}
-        from {join_stream_raw_table_name_quoted}
-        ) """
-        ctes.append(join_view_cte)
-    join_columns = main_stream_view_part.join_columns()
-    # in some situations, column expressions may reference the alias of another column
-    # this is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
-    # so we need to sort the columns so that if the name of the column appears (in quotes) in the expression of another column, it is ordered first
-    # Collect columns to be moved
-    columns_to_move = []
-    for column in join_columns:
-        for other_column in join_columns:
-            if f'"{column.name}"' in other_column.expression:
-                if column not in columns_to_move:
-                    columns_to_move.append(column)
-    # Move collected columns to the front
-    for column in columns_to_move:
-        join_columns.remove(column)
-        join_columns.insert(0, column)
-    join_column_clauses = [c.definition() for c in join_columns]
-    # we select * from the original view (in the CTE) and then add any expressions that come from the join columns
-    final_column_clauses = [f'"{stream_name}".*'] + join_column_clauses
-    all_ctes = "\n,".join(ctes)
-    view_body = f"""with {all_ctes}
-select {', '.join(final_column_clauses)} from "{stream_name}" """
-    if len(main_stream_view_part.joins) > 0:
-        join_clauses = [join.definition() for join in main_stream_view_part.joins]
-        view_body += "\n" + ("\n".join(join_clauses))
-    return view_body