omnata-plugin-runtime 0.9.1a210__tar.gz → 0.9.1a211__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: omnata-plugin-runtime
3
- Version: 0.9.1a210
3
+ Version: 0.9.1a211
4
4
  Summary: Classes and common runtime components for building and running Omnata Plugins
5
5
  Author: James Weakley
6
6
  Author-email: james.weakley@omnata.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "omnata-plugin-runtime"
3
- version = "0.9.1-a210"
3
+ version = "0.9.1-a211"
4
4
  description = "Classes and common runtime components for building and running Omnata Plugins"
5
5
  authors = ["James Weakley <james.weakley@omnata.com>"]
6
6
  readme = "README.md"
@@ -207,13 +207,35 @@ class SnowflakeViewColumn(BaseModel):
207
207
  expression=f"""TO_{timestamp_type}({expression}::varchar,'{timestamp_format}')"""
208
208
  else:
209
209
  if not json_schema_property.snowflakeColumnExpression:
210
- expression=f"""{expression}::{json_schema_property.type}"""
210
+ expression=f"""{expression}::{json_schema_property.snowflake_data_type}"""
211
211
  return cls(
212
212
  name=final_column_name,
213
213
  expression=expression,
214
214
  comment=comment,
215
215
  is_join_column=json_schema_property.isJoinColumn,
216
216
  )
217
+
218
+ @classmethod
219
+ def order_by_reference(cls,join_columns:List[Self]) -> List[Self]:
220
+ """
221
+ In some situations, column expressions may reference the alias of another column
222
+ This is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
223
+ So we need to sort the columns so that if the name of the column appears (in quotes) in the expression of another column, it is ordered first
224
+ """
225
+
226
+ # Collect columns to be moved
227
+ columns_to_move:List[Self] = []
228
+ for column in join_columns:
229
+ for other_column in join_columns:
230
+ if f'"{column.name}"' in other_column.expression:
231
+ if column not in columns_to_move:
232
+ columns_to_move.append(column)
233
+
234
+ # Move collected columns to the front
235
+ for column in columns_to_move:
236
+ join_columns.remove(column)
237
+ join_columns.insert(0, column)
238
+ return join_columns
217
239
 
218
240
 
219
241
  class SnowflakeViewJoin(BaseModel):
@@ -259,11 +281,41 @@ class SnowflakeViewJoin(BaseModel):
259
281
  ON "{self.left_alias}"."{self.left_column}" = "{self.join_stream_alias}"."{self.join_stream_column}" """
260
282
 
261
283
 
262
- class SnowflakeViewParts(BaseModel):
284
+ class FullyQualifiedTable(BaseModel):
263
285
  """
264
- Represents the definition of a Snowflake normalized view.
286
+ Represents a fully qualified table name in Snowflake, including database, schema, and table name.
287
+ This is not a template, it's a fully specified object.
265
288
  """
266
289
 
290
+ database_name: Optional[str] = Field(default=None, description="The database name")
291
+ schema_name: str = Field(..., description="The schema name")
292
+ table_name: str = Field(..., description="The table name")
293
+
294
+ def get_fully_qualified_name(self, table_override: Optional[str] = None) -> str:
295
+ """
296
+ If table_override is provided, it will be used instead of the table name
297
+ """
298
+ actual_table_name = (
299
+ self.table_name if table_override is None else table_override
300
+ )
301
+ # We try to make this resilient to quoting
302
+ schema_name = self.schema_name.replace('"', "")
303
+ table_name = actual_table_name.replace('"', "")
304
+ if self.database_name is None or self.database_name == "":
305
+ return f'"{schema_name}"."{table_name}"'
306
+ database_name = self.database_name.replace('"', "")
307
+ return f'"{database_name}"."{schema_name}"."{table_name}"'
308
+
309
+
310
+ class SnowflakeViewPart(BaseModel):
311
+ """
312
+ Represents a stream within a normalized view.
313
+ Because a normalized view can be built from multiple streams, this is potentially only part of the view.
314
+ """
315
+ stream_name: str = Field(..., description="The name of the stream")
316
+ raw_table_location: FullyQualifiedTable = Field(
317
+ ..., description="The location of the raw table that the stream is sourced from"
318
+ )
267
319
  comment: Optional[str] = Field(
268
320
  None, description="The comment to assign to the view"
269
321
  )
@@ -284,7 +336,7 @@ class SnowflakeViewParts(BaseModel):
284
336
  """
285
337
  Returns the columns that are sourced from joins.
286
338
  """
287
- return [c for c in self.columns if c.is_join_column]
339
+ return SnowflakeViewColumn.order_by_reference([c for c in self.columns if c.is_join_column])
288
340
 
289
341
  def comment_clause(self) -> str:
290
342
  """
@@ -298,31 +350,86 @@ class SnowflakeViewParts(BaseModel):
298
350
  return [
299
351
  c.name_with_comment() for c in (self.direct_columns() + self.join_columns())
300
352
  ]
353
+
354
+ def cte_text(self) -> str:
355
+ """
356
+ Returns the CTE text for this view part.
357
+ """
358
+ return f""" "{self.stream_name}" as (
359
+ select {', '.join([c.definition() for c in self.direct_columns()])}
360
+ from {self.raw_table_location.get_fully_qualified_name()}
361
+ ) """
301
362
 
302
- class FullyQualifiedTable(BaseModel):
363
+ class SnowflakeViewParts(BaseModel):
303
364
  """
304
- Represents a fully qualified table name in Snowflake, including database, schema, and table name.
305
- This is not a template, it's a fully specified object.
365
+ Represents a set of streams within a normalized view.
366
+ This is the top level object that represents the whole view.
306
367
  """
307
368
 
308
- database_name: Optional[str] = Field(default=None, description="The database name")
309
- schema_name: str = Field(..., description="The schema name")
310
- table_name: str = Field(..., description="The table name")
369
+ main_part: SnowflakeViewPart = Field(
370
+ ..., description="The main part of the view, which is the stream that the view is named after"
371
+ )
372
+ joined_parts: List[SnowflakeViewPart] = Field(
373
+ ..., description="The other streams that are joined to the main stream"
374
+ )
311
375
 
312
- def get_fully_qualified_name(self, table_override: Optional[str] = None) -> str:
376
+ def view_body(self):
313
377
  """
314
- If table_override is provided, it will be used instead of the table name
378
+ Creates a view definition from the parts
315
379
  """
316
- actual_table_name = (
317
- self.table_name if table_override is None else table_override
380
+ ctes = [self.main_part.cte_text()] + [part.cte_text() for part in self.joined_parts]
381
+ all_ctes = "\n,".join(ctes)
382
+ join_columns = self.main_part.join_columns()
383
+ join_column_clauses = [c.definition() for c in join_columns]
384
+ # we select * from the original view (in the CTE) and then add any expressions that come from the join columns
385
+ final_column_clauses = [f'"{self.main_part.stream_name}".*'] + join_column_clauses
386
+ view_body = f"""with {all_ctes}
387
+ select {', '.join(final_column_clauses)}
388
+ from "{self.main_part.stream_name}" """
389
+ if len(self.main_part.joins) > 0:
390
+ join_clauses = [join.definition() for join in self.main_part.joins]
391
+ view_body += "\n" + ("\n".join(join_clauses))
392
+ return view_body
393
+
394
+ @classmethod
395
+ def generate(cls,
396
+ raw_stream_locations: Dict[str,FullyQualifiedTable],
397
+ stream_schemas: Dict[str,Dict],
398
+ stream_name: str,
399
+ include_default_columns: bool = True,
400
+ column_name_environment: Environment = Environment(),
401
+ column_name_expression: str = "{{column_name}}"
402
+ ) -> Self:
403
+ """
404
+ Returns the building blocks required to create a normalized view from a stream.
405
+ This includes any joins that are required, via CTEs.
406
+ """
407
+ # we start with the view parts for the view we are building
408
+ main_stream_view_part = normalized_view_part(
409
+ stream_name=stream_name,
410
+ raw_table_location=raw_stream_locations[stream_name],
411
+ include_default_columns=include_default_columns,
412
+ stream_schema=stream_schemas.get(stream_name),
413
+ column_name_environment=column_name_environment,
414
+ column_name_expression=column_name_expression
318
415
  )
319
- # We try to make this resilient to quoting
320
- schema_name = self.schema_name.replace('"', "")
321
- table_name = actual_table_name.replace('"', "")
322
- if self.database_name is None or self.database_name == "":
323
- return f'"{schema_name}"."{table_name}"'
324
- database_name = self.database_name.replace('"', "")
325
- return f'"{database_name}"."{schema_name}"."{table_name}"'
416
+ joined_parts = []
417
+ for join in main_stream_view_part.joins:
418
+ if join.join_stream_name not in raw_stream_locations:
419
+ raise ValueError(f"Stream {join.join_stream_name} is required as a join for stream {stream_name}, but its location was not provided")
420
+ if join.join_stream_name not in stream_schemas:
421
+ raise ValueError(f"Stream {join.join_stream_name} is required as a join for stream {stream_name}, but its schema was not provided")
422
+ joined_parts.append(normalized_view_part(
423
+ stream_name=join.join_stream_name,
424
+ raw_table_location=raw_stream_locations[join.join_stream_name],
425
+ include_default_columns=include_default_columns,
426
+ stream_schema=stream_schemas[join.join_stream_name],
427
+ column_name_environment=column_name_environment,
428
+ column_name_expression=column_name_expression
429
+ ))
430
+ return cls(main_part=main_stream_view_part, joined_parts=joined_parts)
431
+
432
+
326
433
 
327
434
  class JsonSchemaTopLevel(BaseModel):
328
435
  """
@@ -341,9 +448,9 @@ class JsonSchemaTopLevel(BaseModel):
341
448
  )
342
449
 
343
450
  def build_view_columns(self,
344
- column_name_environment: Environment = Environment(),
345
- column_name_expression: str = "{{column_name}}"
346
- ) -> List[SnowflakeViewColumn]:
451
+ column_name_environment: Environment,
452
+ column_name_expression: str
453
+ ) -> List[SnowflakeViewColumn]:
347
454
  """
348
455
  Returns a list of column definitions from a json schema
349
456
  """
@@ -413,10 +520,14 @@ class JsonSchemaTopLevel(BaseModel):
413
520
  )]
414
521
 
415
522
 
416
- def normalized_view_parts(
523
+ def normalized_view_part(
524
+ stream_name:str,
525
+ raw_table_location:FullyQualifiedTable,
417
526
  include_default_columns: bool,
527
+ column_name_environment: Environment,
528
+ column_name_expression: str,
418
529
  stream_schema: Optional[Dict] = None,
419
- ) -> SnowflakeViewParts:
530
+ ) -> SnowflakeViewPart:
420
531
  """
421
532
  Returns an object containing:
422
533
  - A top level comment for the view
@@ -461,78 +572,14 @@ def normalized_view_parts(
461
572
  )
462
573
  )
463
574
  json_schema = JsonSchemaTopLevel.model_validate(stream_schema)
464
- return SnowflakeViewParts(
465
- columns=snowflake_columns + json_schema.build_view_columns(),
575
+
576
+ return SnowflakeViewPart(
577
+ stream_name=stream_name,
578
+ raw_table_location=raw_table_location,
579
+ columns=snowflake_columns + json_schema.build_view_columns(
580
+ column_name_environment=column_name_environment,
581
+ column_name_expression=column_name_expression
582
+ ),
466
583
  joins=json_schema.joins or [],
467
584
  comment=json_schema.description
468
585
  )
469
-
470
- def normalized_view_body(
471
- stream_locations: Dict[str,FullyQualifiedTable],
472
- stream_schemas: Dict[str,Dict],
473
- stream_name: str,
474
- include_default_columns: bool = True,
475
- ) -> str:
476
- """
477
- Returns the SQL for the body of a normalized view.
478
- Because views are created over raw data (potentially several joined raw tables), we have
479
- to pass in the locations of those raw tables, keyed by stream name.
480
- The stream schema is also passed in, keyed by stream name, and used to build the columns and joins.
481
- """
482
- main_stream_raw_table_name_quoted = stream_locations[stream_name].get_fully_qualified_name()
483
- # we start with the view parts for the view we are building
484
- main_stream_view_part = normalized_view_parts(
485
- include_default_columns=include_default_columns,
486
- stream_schema=stream_schemas.get(stream_name)
487
- )
488
- # we use a CTE because we may need to use aliases in the joins
489
- main_stream_cte = f""" "{stream_name}" as (
490
- select {', '.join([c.definition() for c in main_stream_view_part.direct_columns()])}
491
- from {main_stream_raw_table_name_quoted}
492
- ) """
493
- ctes = [main_stream_cte]
494
- # we also use CTEs that recreate the views that the joins reference.
495
- # the reason for this is that we can't rely on the view being there,
496
- # and it's also possible that they reference each other
497
- for join in main_stream_view_part.joins:
498
- join_view_part = normalized_view_parts(
499
- include_default_columns=include_default_columns,
500
- stream_schema=stream_schemas.get(join.join_stream_name)
501
- )
502
- join_stream_raw_table_name_quoted = stream_locations[stream_name].get_fully_qualified_name()
503
- join_view_cte = f""" "{join.join_stream_name}" as (
504
- select {', '.join([c.definition() for c in join_view_part.direct_columns()])}
505
- from {join_stream_raw_table_name_quoted}
506
- ) """
507
- ctes.append(join_view_cte)
508
-
509
- join_columns = main_stream_view_part.join_columns()
510
- # in some situations, column expressions may reference the alias of another column
511
- # this is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
512
- # so we need to sort the columns so that if the name of the column appears (in quotes) in the expression of another column, it is ordered first
513
-
514
- # Collect columns to be moved
515
- columns_to_move = []
516
-
517
- for column in join_columns:
518
- for other_column in join_columns:
519
- if f'"{column.name}"' in other_column.expression:
520
- if column not in columns_to_move:
521
- columns_to_move.append(column)
522
-
523
- # Move collected columns to the front
524
- for column in columns_to_move:
525
- join_columns.remove(column)
526
- join_columns.insert(0, column)
527
-
528
- join_column_clauses = [c.definition() for c in join_columns]
529
- # we select * from the original view (in the CTE) and then add any expressions that come from the join columns
530
- final_column_clauses = [f'"{stream_name}".*'] + join_column_clauses
531
- all_ctes = "\n,".join(ctes)
532
- view_body = f"""with {all_ctes}
533
- select {', '.join(final_column_clauses)} from "{stream_name}" """
534
-
535
- if len(main_stream_view_part.joins) > 0:
536
- join_clauses = [join.definition() for join in main_stream_view_part.joins]
537
- view_body += "\n" + ("\n".join(join_clauses))
538
- return view_body