omnata-plugin-runtime 0.9.1a210__tar.gz → 0.9.1a211__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: omnata-plugin-runtime
3
- Version: 0.9.1a210
3
+ Version: 0.9.1a211
4
4
  Summary: Classes and common runtime components for building and running Omnata Plugins
5
5
  Author: James Weakley
6
6
  Author-email: james.weakley@omnata.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "omnata-plugin-runtime"
3
- version = "0.9.1-a210"
3
+ version = "0.9.1-a211"
4
4
  description = "Classes and common runtime components for building and running Omnata Plugins"
5
5
  authors = ["James Weakley <james.weakley@omnata.com>"]
6
6
  readme = "README.md"
@@ -207,13 +207,35 @@ class SnowflakeViewColumn(BaseModel):
207
207
  expression=f"""TO_{timestamp_type}({expression}::varchar,'{timestamp_format}')"""
208
208
  else:
209
209
  if not json_schema_property.snowflakeColumnExpression:
210
- expression=f"""{expression}::{json_schema_property.type}"""
210
+ expression=f"""{expression}::{json_schema_property.snowflake_data_type}"""
211
211
  return cls(
212
212
  name=final_column_name,
213
213
  expression=expression,
214
214
  comment=comment,
215
215
  is_join_column=json_schema_property.isJoinColumn,
216
216
  )
217
+
218
+ @classmethod
219
+ def order_by_reference(cls,join_columns:List[Self]) -> List[Self]:
220
+ """
221
+ In some situations, column expressions may reference the alias of another column
222
+ This is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
223
+ So we need to sort the columns so that if the name of the column appears (in quotes) in the expression of another column, it is ordered first
224
+ """
225
+
226
+ # Collect columns to be moved
227
+ columns_to_move:List[Self] = []
228
+ for column in join_columns:
229
+ for other_column in join_columns:
230
+ if f'"{column.name}"' in other_column.expression:
231
+ if column not in columns_to_move:
232
+ columns_to_move.append(column)
233
+
234
+ # Move collected columns to the front
235
+ for column in columns_to_move:
236
+ join_columns.remove(column)
237
+ join_columns.insert(0, column)
238
+ return join_columns
217
239
 
218
240
 
219
241
  class SnowflakeViewJoin(BaseModel):
@@ -259,11 +281,41 @@ class SnowflakeViewJoin(BaseModel):
259
281
  ON "{self.left_alias}"."{self.left_column}" = "{self.join_stream_alias}"."{self.join_stream_column}" """
260
282
 
261
283
 
262
- class SnowflakeViewParts(BaseModel):
284
+ class FullyQualifiedTable(BaseModel):
263
285
  """
264
- Represents the definition of a Snowflake normalized view.
286
+ Represents a fully qualified table name in Snowflake, including database, schema, and table name.
287
+ This is not a template, it's a fully specified object.
265
288
  """
266
289
 
290
+ database_name: Optional[str] = Field(default=None, description="The database name")
291
+ schema_name: str = Field(..., description="The schema name")
292
+ table_name: str = Field(..., description="The table name")
293
+
294
+ def get_fully_qualified_name(self, table_override: Optional[str] = None) -> str:
295
+ """
296
+ If table_override is provided, it will be used instead of the table name
297
+ """
298
+ actual_table_name = (
299
+ self.table_name if table_override is None else table_override
300
+ )
301
+ # We try to make this resilient to quoting
302
+ schema_name = self.schema_name.replace('"', "")
303
+ table_name = actual_table_name.replace('"', "")
304
+ if self.database_name is None or self.database_name == "":
305
+ return f'"{schema_name}"."{table_name}"'
306
+ database_name = self.database_name.replace('"', "")
307
+ return f'"{database_name}"."{schema_name}"."{table_name}"'
308
+
309
+
310
+ class SnowflakeViewPart(BaseModel):
311
+ """
312
+ Represents a stream within a normalized view.
313
+ Because a normalized view can be built from multiple streams, this is potentially only part of the view.
314
+ """
315
+ stream_name: str = Field(..., description="The name of the stream")
316
+ raw_table_location: FullyQualifiedTable = Field(
317
+ ..., description="The location of the raw table that the stream is sourced from"
318
+ )
267
319
  comment: Optional[str] = Field(
268
320
  None, description="The comment to assign to the view"
269
321
  )
@@ -284,7 +336,7 @@ class SnowflakeViewParts(BaseModel):
284
336
  """
285
337
  Returns the columns that are sourced from joins.
286
338
  """
287
- return [c for c in self.columns if c.is_join_column]
339
+ return SnowflakeViewColumn.order_by_reference([c for c in self.columns if c.is_join_column])
288
340
 
289
341
  def comment_clause(self) -> str:
290
342
  """
@@ -298,31 +350,86 @@ class SnowflakeViewParts(BaseModel):
298
350
  return [
299
351
  c.name_with_comment() for c in (self.direct_columns() + self.join_columns())
300
352
  ]
353
+
354
+ def cte_text(self) -> str:
355
+ """
356
+ Returns the CTE text for this view part.
357
+ """
358
+ return f""" "{self.stream_name}" as (
359
+ select {', '.join([c.definition() for c in self.direct_columns()])}
360
+ from {self.raw_table_location.get_fully_qualified_name()}
361
+ ) """
301
362
 
302
- class FullyQualifiedTable(BaseModel):
363
+ class SnowflakeViewParts(BaseModel):
303
364
  """
304
- Represents a fully qualified table name in Snowflake, including database, schema, and table name.
305
- This is not a template, it's a fully specified object.
365
+ Represents a set of streams within a normalized view.
366
+ This is the top level object that represents the whole view.
306
367
  """
307
368
 
308
- database_name: Optional[str] = Field(default=None, description="The database name")
309
- schema_name: str = Field(..., description="The schema name")
310
- table_name: str = Field(..., description="The table name")
369
+ main_part: SnowflakeViewPart = Field(
370
+ ..., description="The main part of the view, which is the stream that the view is named after"
371
+ )
372
+ joined_parts: List[SnowflakeViewPart] = Field(
373
+ ..., description="The other streams that are joined to the main stream"
374
+ )
311
375
 
312
- def get_fully_qualified_name(self, table_override: Optional[str] = None) -> str:
376
+ def view_body(self):
313
377
  """
314
- If table_override is provided, it will be used instead of the table name
378
+ Creates a view definition from the parts
315
379
  """
316
- actual_table_name = (
317
- self.table_name if table_override is None else table_override
380
+ ctes = [self.main_part.cte_text()] + [part.cte_text() for part in self.joined_parts]
381
+ all_ctes = "\n,".join(ctes)
382
+ join_columns = self.main_part.join_columns()
383
+ join_column_clauses = [c.definition() for c in join_columns]
384
+ # we select * from the original view (in the CTE) and then add any expressions that come from the join columns
385
+ final_column_clauses = [f'"{self.main_part.stream_name}".*'] + join_column_clauses
386
+ view_body = f"""with {all_ctes}
387
+ select {', '.join(final_column_clauses)}
388
+ from "{self.main_part.stream_name}" """
389
+ if len(self.main_part.joins) > 0:
390
+ join_clauses = [join.definition() for join in self.main_part.joins]
391
+ view_body += "\n" + ("\n".join(join_clauses))
392
+ return view_body
393
+
394
+ @classmethod
395
+ def generate(cls,
396
+ raw_stream_locations: Dict[str,FullyQualifiedTable],
397
+ stream_schemas: Dict[str,Dict],
398
+ stream_name: str,
399
+ include_default_columns: bool = True,
400
+ column_name_environment: Environment = Environment(),
401
+ column_name_expression: str = "{{column_name}}"
402
+ ) -> Self:
403
+ """
404
+ Returns the building blocks required to create a normalized view from a stream.
405
+ This includes any joins that are required, via CTEs.
406
+ """
407
+ # we start with the view parts for the view we are building
408
+ main_stream_view_part = normalized_view_part(
409
+ stream_name=stream_name,
410
+ raw_table_location=raw_stream_locations[stream_name],
411
+ include_default_columns=include_default_columns,
412
+ stream_schema=stream_schemas.get(stream_name),
413
+ column_name_environment=column_name_environment,
414
+ column_name_expression=column_name_expression
318
415
  )
319
- # We try to make this resilient to quoting
320
- schema_name = self.schema_name.replace('"', "")
321
- table_name = actual_table_name.replace('"', "")
322
- if self.database_name is None or self.database_name == "":
323
- return f'"{schema_name}"."{table_name}"'
324
- database_name = self.database_name.replace('"', "")
325
- return f'"{database_name}"."{schema_name}"."{table_name}"'
416
+ joined_parts = []
417
+ for join in main_stream_view_part.joins:
418
+ if join.join_stream_name not in raw_stream_locations:
419
+ raise ValueError(f"Stream {join.join_stream_name} is required as a join for stream {stream_name}, but its location was not provided")
420
+ if join.join_stream_name not in stream_schemas:
421
+ raise ValueError(f"Stream {join.join_stream_name} is required as a join for stream {stream_name}, but its schema was not provided")
422
+ joined_parts.append(normalized_view_part(
423
+ stream_name=join.join_stream_name,
424
+ raw_table_location=raw_stream_locations[join.join_stream_name],
425
+ include_default_columns=include_default_columns,
426
+ stream_schema=stream_schemas[join.join_stream_name],
427
+ column_name_environment=column_name_environment,
428
+ column_name_expression=column_name_expression
429
+ ))
430
+ return cls(main_part=main_stream_view_part, joined_parts=joined_parts)
431
+
432
+
326
433
 
327
434
  class JsonSchemaTopLevel(BaseModel):
328
435
  """
@@ -341,9 +448,9 @@ class JsonSchemaTopLevel(BaseModel):
341
448
  )
342
449
 
343
450
  def build_view_columns(self,
344
- column_name_environment: Environment = Environment(),
345
- column_name_expression: str = "{{column_name}}"
346
- ) -> List[SnowflakeViewColumn]:
451
+ column_name_environment: Environment,
452
+ column_name_expression: str
453
+ ) -> List[SnowflakeViewColumn]:
347
454
  """
348
455
  Returns a list of column definitions from a json schema
349
456
  """
@@ -413,10 +520,14 @@ class JsonSchemaTopLevel(BaseModel):
413
520
  )]
414
521
 
415
522
 
416
- def normalized_view_parts(
523
+ def normalized_view_part(
524
+ stream_name:str,
525
+ raw_table_location:FullyQualifiedTable,
417
526
  include_default_columns: bool,
527
+ column_name_environment: Environment,
528
+ column_name_expression: str,
418
529
  stream_schema: Optional[Dict] = None,
419
- ) -> SnowflakeViewParts:
530
+ ) -> SnowflakeViewPart:
420
531
  """
421
532
  Returns an object containing:
422
533
  - A top level comment for the view
@@ -461,78 +572,14 @@ def normalized_view_parts(
461
572
  )
462
573
  )
463
574
  json_schema = JsonSchemaTopLevel.model_validate(stream_schema)
464
- return SnowflakeViewParts(
465
- columns=snowflake_columns + json_schema.build_view_columns(),
575
+
576
+ return SnowflakeViewPart(
577
+ stream_name=stream_name,
578
+ raw_table_location=raw_table_location,
579
+ columns=snowflake_columns + json_schema.build_view_columns(
580
+ column_name_environment=column_name_environment,
581
+ column_name_expression=column_name_expression
582
+ ),
466
583
  joins=json_schema.joins or [],
467
584
  comment=json_schema.description
468
585
  )
469
-
470
- def normalized_view_body(
471
- stream_locations: Dict[str,FullyQualifiedTable],
472
- stream_schemas: Dict[str,Dict],
473
- stream_name: str,
474
- include_default_columns: bool = True,
475
- ) -> str:
476
- """
477
- Returns the SQL for the body of a normalized view.
478
- Because views are created over raw data (potentially several joined raw tables), we have
479
- to pass in the locations of those raw tables, keyed by stream name.
480
- The stream schema is also passed in, keyed by stream name, and used to build the columns and joins.
481
- """
482
- main_stream_raw_table_name_quoted = stream_locations[stream_name].get_fully_qualified_name()
483
- # we start with the view parts for the view we are building
484
- main_stream_view_part = normalized_view_parts(
485
- include_default_columns=include_default_columns,
486
- stream_schema=stream_schemas.get(stream_name)
487
- )
488
- # we use a CTE because we may need to use aliases in the joins
489
- main_stream_cte = f""" "{stream_name}" as (
490
- select {', '.join([c.definition() for c in main_stream_view_part.direct_columns()])}
491
- from {main_stream_raw_table_name_quoted}
492
- ) """
493
- ctes = [main_stream_cte]
494
- # we also use CTEs that recreate the views that the joins reference.
495
- # the reason for this is that we can't rely on the view being there,
496
- # and it's also possible that they reference each other
497
- for join in main_stream_view_part.joins:
498
- join_view_part = normalized_view_parts(
499
- include_default_columns=include_default_columns,
500
- stream_schema=stream_schemas.get(join.join_stream_name)
501
- )
502
- join_stream_raw_table_name_quoted = stream_locations[stream_name].get_fully_qualified_name()
503
- join_view_cte = f""" "{join.join_stream_name}" as (
504
- select {', '.join([c.definition() for c in join_view_part.direct_columns()])}
505
- from {join_stream_raw_table_name_quoted}
506
- ) """
507
- ctes.append(join_view_cte)
508
-
509
- join_columns = main_stream_view_part.join_columns()
510
- # in some situations, column expressions may reference the alias of another column
511
- # this is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
512
- # so we need to sort the columns so that if the name of the column appears (in quotes) in the expression of another column, it is ordered first
513
-
514
- # Collect columns to be moved
515
- columns_to_move = []
516
-
517
- for column in join_columns:
518
- for other_column in join_columns:
519
- if f'"{column.name}"' in other_column.expression:
520
- if column not in columns_to_move:
521
- columns_to_move.append(column)
522
-
523
- # Move collected columns to the front
524
- for column in columns_to_move:
525
- join_columns.remove(column)
526
- join_columns.insert(0, column)
527
-
528
- join_column_clauses = [c.definition() for c in join_columns]
529
- # we select * from the original view (in the CTE) and then add any expressions that come from the join columns
530
- final_column_clauses = [f'"{stream_name}".*'] + join_column_clauses
531
- all_ctes = "\n,".join(ctes)
532
- view_body = f"""with {all_ctes}
533
- select {', '.join(final_column_clauses)} from "{stream_name}" """
534
-
535
- if len(main_stream_view_part.joins) > 0:
536
- join_clauses = [join.definition() for join in main_stream_view_part.joins]
537
- view_body += "\n" + ("\n".join(join_clauses))
538
- return view_body