omnata-plugin-runtime 0.9.1a210__tar.gz → 0.9.1a212__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: omnata-plugin-runtime
3
- Version: 0.9.1a210
3
+ Version: 0.9.1a212
4
4
  Summary: Classes and common runtime components for building and running Omnata Plugins
5
5
  Author: James Weakley
6
6
  Author-email: james.weakley@omnata.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "omnata-plugin-runtime"
3
- version = "0.9.1-a210"
3
+ version = "0.9.1-a212"
4
4
  description = "Classes and common runtime components for building and running Omnata Plugins"
5
5
  authors = ["James Weakley <james.weakley@omnata.com>"]
6
6
  readme = "README.md"
@@ -207,13 +207,35 @@ class SnowflakeViewColumn(BaseModel):
207
207
  expression=f"""TO_{timestamp_type}({expression}::varchar,'{timestamp_format}')"""
208
208
  else:
209
209
  if not json_schema_property.snowflakeColumnExpression:
210
- expression=f"""{expression}::{json_schema_property.type}"""
210
+ expression=f"""{expression}::{json_schema_property.snowflake_data_type}"""
211
211
  return cls(
212
212
  name=final_column_name,
213
213
  expression=expression,
214
214
  comment=comment,
215
215
  is_join_column=json_schema_property.isJoinColumn,
216
216
  )
217
+
218
+ @classmethod
219
+ def order_by_reference(cls,join_columns:List[Self]) -> List[Self]:
220
+ """
221
+ In some situations, column expressions may reference the alias of another column
222
+ This is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
223
+ So we need to sort the columns so that if the name of the column appears (in quotes) in the expression of another column, it is ordered first
224
+ """
225
+
226
+ # Collect columns to be moved
227
+ columns_to_move:List[Self] = []
228
+ for column in join_columns:
229
+ for other_column in join_columns:
230
+ if f'"{column.name}"' in other_column.expression:
231
+ if column not in columns_to_move:
232
+ columns_to_move.append(column)
233
+
234
+ # Move collected columns to the front
235
+ for column in columns_to_move:
236
+ join_columns.remove(column)
237
+ join_columns.insert(0, column)
238
+ return join_columns
217
239
 
218
240
 
219
241
  class SnowflakeViewJoin(BaseModel):
@@ -259,11 +281,54 @@ class SnowflakeViewJoin(BaseModel):
259
281
  ON "{self.left_alias}"."{self.left_column}" = "{self.join_stream_alias}"."{self.join_stream_column}" """
260
282
 
261
283
 
262
- class SnowflakeViewParts(BaseModel):
284
+ class FullyQualifiedTable(BaseModel):
263
285
  """
264
- Represents the definition of a Snowflake normalized view.
286
+ Represents a fully qualified table name in Snowflake, including database, schema, and table name.
287
+ This is not a template, it's a fully specified object.
265
288
  """
266
289
 
290
+ database_name: Optional[str] = Field(default=None, description="The database name")
291
+ schema_name: str = Field(..., description="The schema name")
292
+ table_name: str = Field(..., description="The table name")
293
+
294
+ def get_fully_qualified_name(self, table_override: Optional[str] = None) -> str:
295
+ """
296
+ If table_override is provided, it will be used instead of the table name
297
+ """
298
+ actual_table_name = (
299
+ self.table_name if table_override is None else table_override
300
+ )
301
+ # We try to make this resilient to quoting
302
+ schema_name = self.schema_name.replace('"', "")
303
+ table_name = actual_table_name.replace('"', "")
304
+ if self.database_name is None or self.database_name == "":
305
+ return f'"{schema_name}"."{table_name}"'
306
+ database_name = self.database_name.replace('"', "")
307
+ return f'"{database_name}"."{schema_name}"."{table_name}"'
308
+
309
+ def get_fully_qualified_stage_name(self) -> str:
310
+ """
311
+ Stage name is derived from the table name
312
+ """
313
+ return self.get_fully_qualified_name(table_override=f"{self.table_name}_STAGE")
314
+
315
+ def get_fully_qualified_criteria_deletes_table_name(self) -> str:
316
+ """
317
+ Deletes table name is derived from the table name
318
+ """
319
+ return self.get_fully_qualified_name(
320
+ table_override=f"{self.table_name}_CRITERIA_DELETES"
321
+ )
322
+
323
+ class SnowflakeViewPart(BaseModel):
324
+ """
325
+ Represents a stream within a normalized view.
326
+ Because a normalized view can be built from multiple streams, this is potentially only part of the view.
327
+ """
328
+ stream_name: str = Field(..., description="The name of the stream")
329
+ raw_table_location: FullyQualifiedTable = Field(
330
+ ..., description="The location of the raw table that the stream is sourced from"
331
+ )
267
332
  comment: Optional[str] = Field(
268
333
  None, description="The comment to assign to the view"
269
334
  )
@@ -284,7 +349,7 @@ class SnowflakeViewParts(BaseModel):
284
349
  """
285
350
  Returns the columns that are sourced from joins.
286
351
  """
287
- return [c for c in self.columns if c.is_join_column]
352
+ return SnowflakeViewColumn.order_by_reference([c for c in self.columns if c.is_join_column])
288
353
 
289
354
  def comment_clause(self) -> str:
290
355
  """
@@ -298,31 +363,86 @@ class SnowflakeViewParts(BaseModel):
298
363
  return [
299
364
  c.name_with_comment() for c in (self.direct_columns() + self.join_columns())
300
365
  ]
366
+
367
+ def cte_text(self) -> str:
368
+ """
369
+ Returns the CTE text for this view part.
370
+ """
371
+ return f""" "{self.stream_name}" as (
372
+ select {', '.join([c.definition() for c in self.direct_columns()])}
373
+ from {self.raw_table_location.get_fully_qualified_name()}
374
+ ) """
301
375
 
302
- class FullyQualifiedTable(BaseModel):
376
+ class SnowflakeViewParts(BaseModel):
303
377
  """
304
- Represents a fully qualified table name in Snowflake, including database, schema, and table name.
305
- This is not a template, it's a fully specified object.
378
+ Represents a set of streams within a normalized view.
379
+ This is the top level object that represents the whole view.
306
380
  """
307
381
 
308
- database_name: Optional[str] = Field(default=None, description="The database name")
309
- schema_name: str = Field(..., description="The schema name")
310
- table_name: str = Field(..., description="The table name")
382
+ main_part: SnowflakeViewPart = Field(
383
+ ..., description="The main part of the view, which is the stream that the view is named after"
384
+ )
385
+ joined_parts: List[SnowflakeViewPart] = Field(
386
+ ..., description="The other streams that are joined to the main stream"
387
+ )
311
388
 
312
- def get_fully_qualified_name(self, table_override: Optional[str] = None) -> str:
389
+ def view_body(self):
313
390
  """
314
- If table_override is provided, it will be used instead of the table name
391
+ Creates a view definition from the parts
315
392
  """
316
- actual_table_name = (
317
- self.table_name if table_override is None else table_override
393
+ ctes = [self.main_part.cte_text()] + [part.cte_text() for part in self.joined_parts]
394
+ all_ctes = "\n,".join(ctes)
395
+ join_columns = self.main_part.join_columns()
396
+ join_column_clauses = [c.definition() for c in join_columns]
397
+ # we select * from the original view (in the CTE) and then add any expressions that come from the join columns
398
+ final_column_clauses = [f'"{self.main_part.stream_name}".*'] + join_column_clauses
399
+ view_body = f"""with {all_ctes}
400
+ select {', '.join(final_column_clauses)}
401
+ from "{self.main_part.stream_name}" """
402
+ if len(self.main_part.joins) > 0:
403
+ join_clauses = [join.definition() for join in self.main_part.joins]
404
+ view_body += "\n" + ("\n".join(join_clauses))
405
+ return view_body
406
+
407
+ @classmethod
408
+ def generate(cls,
409
+ raw_stream_locations: Dict[str,FullyQualifiedTable],
410
+ stream_schemas: Dict[str,Dict],
411
+ stream_name: str,
412
+ include_default_columns: bool = True,
413
+ column_name_environment: Environment = Environment(),
414
+ column_name_expression: str = "{{column_name}}"
415
+ ) -> Self:
416
+ """
417
+ Returns the building blocks required to create a normalized view from a stream.
418
+ This includes any joins that are required, via CTEs.
419
+ """
420
+ # we start with the view parts for the view we are building
421
+ main_stream_view_part = normalized_view_part(
422
+ stream_name=stream_name,
423
+ raw_table_location=raw_stream_locations[stream_name],
424
+ include_default_columns=include_default_columns,
425
+ stream_schema=stream_schemas.get(stream_name),
426
+ column_name_environment=column_name_environment,
427
+ column_name_expression=column_name_expression
318
428
  )
319
- # We try to make this resilient to quoting
320
- schema_name = self.schema_name.replace('"', "")
321
- table_name = actual_table_name.replace('"', "")
322
- if self.database_name is None or self.database_name == "":
323
- return f'"{schema_name}"."{table_name}"'
324
- database_name = self.database_name.replace('"', "")
325
- return f'"{database_name}"."{schema_name}"."{table_name}"'
429
+ joined_parts = []
430
+ for join in main_stream_view_part.joins:
431
+ if join.join_stream_name not in raw_stream_locations:
432
+ raise ValueError(f"Stream {join.join_stream_name} is required as a join for stream {stream_name}, but its location was not provided")
433
+ if join.join_stream_name not in stream_schemas:
434
+ raise ValueError(f"Stream {join.join_stream_name} is required as a join for stream {stream_name}, but its schema was not provided")
435
+ joined_parts.append(normalized_view_part(
436
+ stream_name=join.join_stream_name,
437
+ raw_table_location=raw_stream_locations[join.join_stream_name],
438
+ include_default_columns=include_default_columns,
439
+ stream_schema=stream_schemas[join.join_stream_name],
440
+ column_name_environment=column_name_environment,
441
+ column_name_expression=column_name_expression
442
+ ))
443
+ return cls(main_part=main_stream_view_part, joined_parts=joined_parts)
444
+
445
+
326
446
 
327
447
  class JsonSchemaTopLevel(BaseModel):
328
448
  """
@@ -341,9 +461,9 @@ class JsonSchemaTopLevel(BaseModel):
341
461
  )
342
462
 
343
463
  def build_view_columns(self,
344
- column_name_environment: Environment = Environment(),
345
- column_name_expression: str = "{{column_name}}"
346
- ) -> List[SnowflakeViewColumn]:
464
+ column_name_environment: Environment,
465
+ column_name_expression: str
466
+ ) -> List[SnowflakeViewColumn]:
347
467
  """
348
468
  Returns a list of column definitions from a json schema
349
469
  """
@@ -413,10 +533,14 @@ class JsonSchemaTopLevel(BaseModel):
413
533
  )]
414
534
 
415
535
 
416
- def normalized_view_parts(
536
+ def normalized_view_part(
537
+ stream_name:str,
538
+ raw_table_location:FullyQualifiedTable,
417
539
  include_default_columns: bool,
540
+ column_name_environment: Environment,
541
+ column_name_expression: str,
418
542
  stream_schema: Optional[Dict] = None,
419
- ) -> SnowflakeViewParts:
543
+ ) -> SnowflakeViewPart:
420
544
  """
421
545
  Returns an object containing:
422
546
  - A top level comment for the view
@@ -461,78 +585,14 @@ def normalized_view_parts(
461
585
  )
462
586
  )
463
587
  json_schema = JsonSchemaTopLevel.model_validate(stream_schema)
464
- return SnowflakeViewParts(
465
- columns=snowflake_columns + json_schema.build_view_columns(),
588
+
589
+ return SnowflakeViewPart(
590
+ stream_name=stream_name,
591
+ raw_table_location=raw_table_location,
592
+ columns=snowflake_columns + json_schema.build_view_columns(
593
+ column_name_environment=column_name_environment,
594
+ column_name_expression=column_name_expression
595
+ ),
466
596
  joins=json_schema.joins or [],
467
597
  comment=json_schema.description
468
598
  )
469
-
470
- def normalized_view_body(
471
- stream_locations: Dict[str,FullyQualifiedTable],
472
- stream_schemas: Dict[str,Dict],
473
- stream_name: str,
474
- include_default_columns: bool = True,
475
- ) -> str:
476
- """
477
- Returns the SQL for the body of a normalized view.
478
- Because views are created over raw data (potentially several joined raw tables), we have
479
- to pass in the locations of those raw tables, keyed by stream name.
480
- The stream schema is also passed in, keyed by stream name, and used to build the columns and joins.
481
- """
482
- main_stream_raw_table_name_quoted = stream_locations[stream_name].get_fully_qualified_name()
483
- # we start with the view parts for the view we are building
484
- main_stream_view_part = normalized_view_parts(
485
- include_default_columns=include_default_columns,
486
- stream_schema=stream_schemas.get(stream_name)
487
- )
488
- # we use a CTE because we may need to use aliases in the joins
489
- main_stream_cte = f""" "{stream_name}" as (
490
- select {', '.join([c.definition() for c in main_stream_view_part.direct_columns()])}
491
- from {main_stream_raw_table_name_quoted}
492
- ) """
493
- ctes = [main_stream_cte]
494
- # we also use CTEs that recreate the views that the joins reference.
495
- # the reason for this is that we can't rely on the view being there,
496
- # and it's also possible that they reference each other
497
- for join in main_stream_view_part.joins:
498
- join_view_part = normalized_view_parts(
499
- include_default_columns=include_default_columns,
500
- stream_schema=stream_schemas.get(join.join_stream_name)
501
- )
502
- join_stream_raw_table_name_quoted = stream_locations[stream_name].get_fully_qualified_name()
503
- join_view_cte = f""" "{join.join_stream_name}" as (
504
- select {', '.join([c.definition() for c in join_view_part.direct_columns()])}
505
- from {join_stream_raw_table_name_quoted}
506
- ) """
507
- ctes.append(join_view_cte)
508
-
509
- join_columns = main_stream_view_part.join_columns()
510
- # in some situations, column expressions may reference the alias of another column
511
- # this is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
512
- # so we need to sort the columns so that if the name of the column appears (in quotes) in the expression of another column, it is ordered first
513
-
514
- # Collect columns to be moved
515
- columns_to_move = []
516
-
517
- for column in join_columns:
518
- for other_column in join_columns:
519
- if f'"{column.name}"' in other_column.expression:
520
- if column not in columns_to_move:
521
- columns_to_move.append(column)
522
-
523
- # Move collected columns to the front
524
- for column in columns_to_move:
525
- join_columns.remove(column)
526
- join_columns.insert(0, column)
527
-
528
- join_column_clauses = [c.definition() for c in join_columns]
529
- # we select * from the original view (in the CTE) and then add any expressions that come from the join columns
530
- final_column_clauses = [f'"{stream_name}".*'] + join_column_clauses
531
- all_ctes = "\n,".join(ctes)
532
- view_body = f"""with {all_ctes}
533
- select {', '.join(final_column_clauses)} from "{stream_name}" """
534
-
535
- if len(main_stream_view_part.joins) > 0:
536
- join_clauses = [join.definition() for join in main_stream_view_part.joins]
537
- view_body += "\n" + ("\n".join(join_clauses))
538
- return view_body