omnata-plugin-runtime 0.10.7a253__tar.gz → 0.10.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: omnata-plugin-runtime
3
- Version: 0.10.7a253
3
+ Version: 0.10.8
4
4
  Summary: Classes and common runtime components for building and running Omnata Plugins
5
5
  Author: James Weakley
6
6
  Author-email: james.weakley@omnata.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "omnata-plugin-runtime"
3
- version = "0.10.7-a253"
3
+ version = "0.10.8"
4
4
  description = "Classes and common runtime components for building and running Omnata Plugins"
5
5
  authors = ["James Weakley <james.weakley@omnata.com>"]
6
6
  readme = "README.md"
@@ -52,6 +52,9 @@ class JsonSchemaProperty(BaseModel):
52
52
  requiredStreamNames: Optional[List[str]] = Field(
53
53
  None, description="The names of the streams that are depended upon by this column, via joins. If these streams are not selected, the column will be omitted."
54
54
  )
55
+ referencedFields: Optional[Dict[str,List[str]]] = Field(
56
+ None, description="The names of fields that are referenced by this field, keyed on the stream name (or None if it's the current stream). This is used to order the fields, and also to cascade the removal of unsupported fields (e.g. in formulas)."
57
+ )
55
58
 
56
59
  @model_validator(mode='after')
57
60
  def validate(self) -> Self:
@@ -165,6 +168,9 @@ class SnowflakeViewColumn(BaseModel):
165
168
  required_stream_names: Optional[List[str]] = Field(
166
169
  default=None, description="The names of the streams that are depended upon by this column, via joins. If these streams are not selected, the column will be omitted"
167
170
  )
171
+ referenced_columns: Optional[Dict[str,List[str]]] = Field(
172
+ default=None, description="The names of columns that are referenced by this column, keyed on the stream name (or None if it's the current stream). This is used to order the columns, and also to cascade the removal of unsupported columns (e.g. in formulas)."
173
+ )
168
174
 
169
175
  def __repr__(self) -> str:
170
176
  return "SnowflakeViewColumn(name=%r, definition=%r, comment=%r)" % (
@@ -230,19 +236,23 @@ class SnowflakeViewColumn(BaseModel):
230
236
  if not json_schema_property.snowflakeColumnExpression:
231
237
  expression=f"""{expression}::{json_schema_property.snowflake_data_type}"""
232
238
  required_stream_names = None
239
+ referenced_columns = None
233
240
  if json_schema_property.requiredStreamNames:
234
241
  required_stream_names = json_schema_property.requiredStreamNames
242
+ if json_schema_property.referencedFields:
243
+ referenced_columns = json_schema_property.referencedFields
235
244
  return cls(
236
245
  name=final_column_name,
237
246
  original_name=column_name,
238
247
  expression=expression,
239
248
  comment=comment,
240
249
  is_join_column=json_schema_property.isJoinColumn,
241
- required_stream_names=required_stream_names
250
+ required_stream_names=required_stream_names,
251
+ referenced_columns=referenced_columns
242
252
  )
243
253
 
244
254
  @classmethod
245
- def order_by_reference(cls,join_columns:List[Self]) -> List[Self]:
255
+ def order_by_reference(cls,current_stream_name:str,columns:List[Self]) -> List[Self]:
246
256
  """
247
257
  In some situations, column expressions may reference the alias of another column
248
258
  This is allowed in Snowflake, as long as the aliased column is defined before it's used in a later column
@@ -253,28 +263,24 @@ class SnowflakeViewColumn(BaseModel):
253
263
  columns_to_move:List[Self] = []
254
264
  # Collect Omnata System columns and keep them at the front
255
265
  omnata_system_columns_start = []
256
- omnata_system_columns_end = []
257
- for column in join_columns[:]:
258
- if column.original_name=="OMNATA_APP_IDENTIFIER":
259
- join_columns.remove(column)
266
+ for column in columns[:]:
267
+ if column.original_name.startswith("OMNATA_"):
268
+ columns.remove(column)
260
269
  omnata_system_columns_start.append(column)
261
- elif column.original_name.startswith("OMNATA_"):
262
- join_columns.remove(column)
263
- omnata_system_columns_end.append(column)
264
270
 
265
- for column in join_columns:
266
- for other_column in join_columns:
271
+ for column in columns:
272
+ for other_column in columns:
267
273
  if column==other_column:
268
274
  continue
269
- if f'"{column.original_name}"' in other_column.expression:
275
+ if column.original_name in (other_column.referenced_columns or {}).get(current_stream_name,[]):
270
276
  if column not in columns_to_move:
271
277
  columns_to_move.append(column)
272
278
 
273
279
  # Move collected columns to the front
274
280
  for column in columns_to_move:
275
- join_columns.remove(column)
276
- join_columns.insert(0, column)
277
- return omnata_system_columns_start + join_columns + omnata_system_columns_end
281
+ columns.remove(column)
282
+ columns.insert(0, column)
283
+ return omnata_system_columns_start + columns
278
284
 
279
285
 
280
286
  class SnowflakeViewJoin(BaseModel):
@@ -417,6 +423,12 @@ class SnowflakeViewPart(BaseModel):
417
423
  select {', '.join([c.definition(original_name=original_name) for c in self.direct_columns()])}
418
424
  from {self.raw_table_location.get_fully_qualified_name()}
419
425
  ) """
426
+
427
+ def columns_missing(self,columns_to_check:List[str]) -> List[str]:
428
+ """
429
+ Returns a list of columns that are missing from the view part.
430
+ """
431
+ return [c for c in columns_to_check if c not in [c.original_name for c in self.columns]]
420
432
 
421
433
  class SnowflakeViewParts(BaseModel):
422
434
  """
@@ -488,7 +500,7 @@ class SnowflakeViewParts(BaseModel):
488
500
  column_name_environment=column_name_environment,
489
501
  column_name_expression=column_name_expression
490
502
  )
491
- joined_parts = []
503
+ joined_parts:List[SnowflakeViewPart] = []
492
504
  # remove the joins from the main part if they are not in the raw stream locations
493
505
  main_stream_view_part.joins = [join for join in main_stream_view_part.joins
494
506
  if join.join_stream_name in raw_stream_locations
@@ -503,21 +515,50 @@ class SnowflakeViewParts(BaseModel):
503
515
  column_name_environment=column_name_environment,
504
516
  column_name_expression=column_name_expression
505
517
  ))
506
- # For each column, the plugin can advise which streams are required for the join, which comes through as required_stream_names
518
+ # For each column, the plugin can advise which fields (of the same stream or joined) are required for the join, which comes through as referenced_columns
507
519
  # on the SnowflakeViewColumn object.
508
- # Until this generate function is called with the raw stream names, we don't know which streams the user has actually selected.
509
- # So now there's a pruning process where we remove columns from the main view part that depend on streams that are not selected
510
- for column in main_stream_view_part.columns:
511
- if column.required_stream_names:
512
- for required_stream_name in column.required_stream_names:
513
- if required_stream_name not in raw_stream_locations:
514
- logger.warning(f"Column {column.name} in stream {stream_name} requires stream {required_stream_name} to be selected, but it was not provided")
515
- main_stream_view_part.columns.remove(column)
516
- break
520
+ # Until this generate function is called with the raw stream names, we don't know which streams the user has actually selected, nor which
521
+ # fields are actually available (some may be dropped due to something like an unsupported formula).
522
+ # So now there's a pruning process where we remove columns that reference fields that are not available.
523
+ # We'll start by doing a first pass and removing unavailable columns from other streams
524
+ # then, we can do a final pass and remove columns that reference fields that are not available in the current stream
525
+ prune_count = 0
526
+ while prune(main_stream_view_part,joined_parts):
527
+ prune_count += 1
528
+ if prune_count > 10000:
529
+ raise ValueError("Pruning of columns from the view has entered an infinite loop")
517
530
 
518
531
  return cls(main_part=main_stream_view_part, joined_parts=joined_parts)
519
532
 
520
-
533
+ def prune(view_part:SnowflakeViewPart,joined_parts:List[SnowflakeViewPart]) -> bool:
534
+ """
535
+ Prunes columns from the main view part that reference fields that are not available in the joined parts.
536
+ Returns True if columns were removed, False otherwise.
537
+ """
538
+ for column in view_part.columns:
539
+ if column.referenced_columns:
540
+ for referenced_stream_name, referenced_fields in column.referenced_columns.items():
541
+
542
+ if referenced_stream_name == view_part.stream_name:
543
+ part = view_part
544
+ else:
545
+ part = next((part for part in joined_parts if part.stream_name==referenced_stream_name),None)
546
+ if part is None:
547
+ logger.warning(f"Column {column.name} in stream {view_part.stream_name} references stream {referenced_stream_name}, but it was not provided")
548
+ view_part.columns.remove(column)
549
+ return True
550
+
551
+ columns_missing_from_join = part.columns_missing(referenced_fields)
552
+ if len(columns_missing_from_join) > 0:
553
+ logger.warning(f"Column {column.name} in stream {view_part.stream_name} references fields {columns_missing_from_join} in stream {referenced_stream_name}, but they were not provided")
554
+ view_part.columns.remove(column)
555
+ return True
556
+ else:
557
+ # no columns were removed, but we need to check if the columns that are referenced are not themselves referencing other missing columns
558
+ if part != view_part:
559
+ return prune(part,joined_parts)
560
+
561
+ return False
521
562
 
522
563
  class JsonSchemaTopLevel(BaseModel):
523
564
  """
@@ -683,7 +724,7 @@ def normalized_view_part(
683
724
  #- APP_IDENTIFIER
684
725
  #- Direct and joined columns, ordered so that columns that reference other columns are defined after the columns they reference
685
726
  #- OMNATA_RETRIEVE_DATE, OMNATA_RAW_RECORD, OMNATA_IS_DELETED, OMNATA_RUN_ID
686
- view_columns = SnowflakeViewColumn.order_by_reference(direct_view_columns +
727
+ view_columns = SnowflakeViewColumn.order_by_reference(stream_name,direct_view_columns +
687
728
  join_view_columns)
688
729
  return SnowflakeViewPart(
689
730
  stream_name=stream_name,