omnata-plugin-runtime 0.3.11__py3-none-any.whl → 0.3.18a59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -104,6 +104,7 @@ class InformationField(SubscriptableBaseModel):
104
104
  depends_on: Optional[str] = None
105
105
  type: Literal["information"] = "information"
106
106
  reload_on_change: bool = False
107
+ secret: bool = False
107
108
 
108
109
  class InformationBoxField(SubscriptableBaseModel):
109
110
  """
@@ -123,6 +124,7 @@ class InformationBoxField(SubscriptableBaseModel):
123
124
  reload_on_change: bool = False
124
125
  box_type: Literal["info", "warning", "error"] = "info"
125
126
  box_icon: Optional[str] = None
127
+ secret: bool = False
126
128
 
127
129
  class FormSshKeypair(SubscriptableBaseModel):
128
130
  """
@@ -868,6 +868,12 @@ class InboundSyncRequest(SyncRequest):
868
868
  s.stream_name: s for s in streams
869
869
  }
870
870
  self._apply_results: Dict[str, List[pandas.DataFrame]] = {}
871
+ # named by convention, see SyncRunProcessor.enqueue
872
+ self._criteria_deletes_table_name = (
873
+ f"{self._source_app_name}.{self._results_schema_name}.{self._results_table_name}_CRITERIA_DELETES"
874
+ )
875
+ # These are similar to the results, but represent requests to delete records by some criteria
876
+ self._apply_results_criteria_deletes: Dict[str, List[pandas.DataFrame]] = {}
871
877
  self._latest_states: Dict[str, Any] = {}
872
878
  self._temp_tables = {}
873
879
  self._temp_table_lock = threading.Lock()
@@ -890,24 +896,31 @@ class InboundSyncRequest(SyncRequest):
890
896
  logger.info("InboundSyncRequest apply_results_queue ")
891
897
  if self._apply_results is not None:
892
898
  with self._apply_results_lock:
899
+ results:List[pandas.DataFrame] = []
900
+ stream_names:List[str] = []
893
901
  for stream_name, stream_results in self._apply_results.items():
894
- results = [
902
+ non_empty_dfs = [
895
903
  x for x in stream_results if x is not None and len(x) > 0
896
- ] # remove any None/empty dataframes
897
- if len(results) > 0:
898
- logger.info(
899
- f"Applying {len(results)} batches of queued results"
900
- )
901
- # upload all cached apply results
902
- all_dfs = pandas.concat(results)
903
- #logger.info(f"applying: {all_dfs}")
904
- self._apply_results_dataframe(stream_name, all_dfs)
905
- # add the count of this batch to the total for this stream
906
- self._stream_record_counts[
907
- stream_name
908
- ] = self._stream_record_counts[stream_name] + len(all_dfs)
909
- # update the stream state object too
910
- self._apply_latest_states()
904
+ ]
905
+ # get the total length of all the dataframes
906
+ total_length = sum([len(x) for x in non_empty_dfs])
907
+ # add the count of this batch to the total for this stream
908
+ self._stream_record_counts[
909
+ stream_name
910
+ ] = self._stream_record_counts[stream_name] + total_length
911
+ results.extend(non_empty_dfs) # remove any None/empty dataframes
912
+ stream_names.append(stream_name)
913
+ if len(results) > 0:
914
+ logger.info(
915
+ f"Applying {len(results)} batches of queued results"
916
+ )
917
+ # upload all cached apply results
918
+ all_dfs = pandas.concat(results)
919
+ #logger.info(f"applying: {all_dfs}")
920
+ self._apply_results_dataframe(stream_names, all_dfs)
921
+ # update the stream state object too
922
+ self._apply_latest_states()
923
+ for stream_name in stream_names:
911
924
  self._apply_results[stream_name] = None
912
925
  self._apply_results = {}
913
926
  # update the inbound stream record counts, so we can see progress
@@ -918,6 +931,22 @@ class InboundSyncRequest(SyncRequest):
918
931
  stream_errors=self._omnata_log_handler.stream_global_errors
919
932
  )
920
933
  )
934
+ # also take care of uploading delete requests
935
+ if hasattr(self,'_apply_results_criteria_deletes') and self._apply_results_criteria_deletes is not None:
936
+ with self._apply_results_lock:
937
+ results:List[pandas.DataFrame] = []
938
+ for stream_name, stream_results in self._apply_results_criteria_deletes.items():
939
+ results.extend([
940
+ x for x in stream_results if x is not None and len(x) > 0
941
+ ])
942
+ if len(results) > 0:
943
+ logger.info(
944
+ f"Applying {len(results)} batches of queued criteria deletes"
945
+ )
946
+ # upload all cached apply results
947
+ all_dfs = pandas.concat(results)
948
+ #logger.info(f"applying: {all_dfs}")
949
+ self._apply_criteria_deletes_dataframe(all_dfs)
921
950
 
922
951
  def apply_cancellation(self):
923
952
  """
@@ -953,9 +982,15 @@ class InboundSyncRequest(SyncRequest):
953
982
  message=PluginMessageAbandonedStreams(abandoned_streams=abandoned_streams)
954
983
  )
955
984
 
956
- def enqueue_results(self, stream_name: str, results: List[Dict], new_state: Any):
985
+ def enqueue_results(self, stream_name: str, results: List[Dict], new_state: Any, is_delete:Union[bool,List[bool]] = False):
957
986
  """
958
- Adds some results to the queue for applying asynchronously
987
+ Adds some results to the queue for applying asynchronously.
988
+ stream_name: str, the name of the stream
989
+ results: List[Dict], the results to enqueue
990
+ new_state: Any, the new state which applies to the stream, given the new results
991
+ is_delete: Union[bool,List[bool]], whether the results are deletes or not
992
+ is_delete can be a single value, which means all results are the same, or a list of booleans, which means each result is different
993
+ For records where is_delete is True, you can provide the current record value if it is known, or just the identifier
959
994
  """
960
995
  logger.info(f"Enqueueing {len(results)} results for upload")
961
996
  if stream_name is None or len(stream_name) == 0:
@@ -964,7 +999,7 @@ class InboundSyncRequest(SyncRequest):
964
999
  existing_results: List[pandas.DataFrame] = []
965
1000
  if stream_name in self._apply_results:
966
1001
  existing_results = self._apply_results[stream_name]
967
- existing_results.append(self._preprocess_results_list(stream_name, results))
1002
+ existing_results.append(self._preprocess_results_list(stream_name, results, is_delete))
968
1003
  self._apply_results[stream_name] = existing_results
969
1004
  current_latest = self._latest_states or {}
970
1005
  self._latest_states = {**current_latest, **{stream_name: new_state}}
@@ -984,7 +1019,52 @@ class InboundSyncRequest(SyncRequest):
984
1019
  if sum([x.memory_usage(index=True).sum() for x in all_dfs]) > 200000000:
985
1020
  logger.info(f"Applying results queue immediately due to combined dataframe size")
986
1021
  self.apply_results_queue()
987
-
1022
+
1023
+ def delete_by_criteria(self, stream_name: str, criteria: Dict[str, Any]):
1024
+ """
1025
+ Submits some critera (field→value dict) which will cause matching records to be marked as deleted at the end of the run.
1026
+ This feature was created primarily for array fields that become child streams.
1027
+ The parent record is updated, which means there is a set of new children, but we need to delete the previously sync'd records and we don't know their identifiers.
1028
+
1029
+ The criteria is applied before the new records for the current run are applied. In other words, it will not delete any records from the current run.
1030
+
1031
+ For a record to be deleted, it must match fields with all the criteria supplied. At least one field value must be provided.
1032
+ """
1033
+ if len(criteria) == 0:
1034
+ raise ValueError("At least one field value must be provided for deletion criteria")
1035
+ if stream_name not in self._streams_dict:
1036
+ raise ValueError(
1037
+ f"Cannot delete records for stream {stream_name} as its configuration doesn't exist"
1038
+ )
1039
+ # append the new criteria to the self._criteria_deletes_table_name table
1040
+ # this table has two columns:
1041
+ # STREAM_NAME: string
1042
+ # DELETE_CRITERIA: object
1043
+ with self._apply_results_lock:
1044
+ logger.info(
1045
+ f"Enqueuing {len(criteria)} delete criteria for stream {stream_name} for upload"
1046
+ )
1047
+ existing_results: List[pandas.DataFrame] = []
1048
+ if stream_name in self._apply_results_criteria_deletes:
1049
+ existing_results = self._apply_results_criteria_deletes[stream_name]
1050
+ existing_results.append(pandas.DataFrame([{"STREAM_NAME":stream_name,"DELETE_CRITERIA": criteria}]))
1051
+ self._apply_results_criteria_deletes[stream_name] = existing_results
1052
+ # if the total size of all the dataframes exceeds 200MB, apply the results immediately
1053
+ # we'll use df.memory_usage(index=True) for this
1054
+ if self.development_mode is False:
1055
+ # note: we want to do it for all values in self._apply_results_criteria_deletes, not just the new one
1056
+ # so first we need to get the list of lists from the dictionary values and flatten it
1057
+ # then we can sum the memory usage of each dataframe
1058
+ # if the total exceeds 200MB, we apply the results immediately
1059
+ all_df_lists:List[List[pandas.DataFrame]] = list(self._apply_results_criteria_deletes.values())
1060
+ # flatten
1061
+ all_dfs:List[pandas.DataFrame] = [x for sublist in all_df_lists for x in sublist]
1062
+ combined_length = sum([len(x) for x in all_dfs])
1063
+ # first, don't both if the count is less than 10000, since it's unlikely to be even close
1064
+ if combined_length > 10000:
1065
+ if sum([x.memory_usage(index=True).sum() for x in all_dfs]) > 200000000:
1066
+ logger.info(f"Applying criteria deletes queue immediately due to combined dataframe size")
1067
+ self.apply_results_queue()
988
1068
 
989
1069
  def mark_stream_complete(self, stream_name: str):
990
1070
  """
@@ -1045,14 +1125,15 @@ class InboundSyncRequest(SyncRequest):
1045
1125
  logger.debug(f"Failure to convert inbound data: {str(exception)}")
1046
1126
  return data
1047
1127
 
1048
- def _preprocess_results_list(self, stream_name: str, results: List[Dict]):
1128
+ def _preprocess_results_list(self, stream_name: str, results: List[Dict],is_delete:Union[bool,List[bool]]):
1049
1129
  """
1050
1130
  Creates a dataframe from the enqueued list, ready to upload.
1051
1131
  The result is a dataframe contain all (and only):
1052
1132
  'APP_IDENTIFIER' string
1053
1133
  'STREAM_NAME' string
1054
1134
  'RETRIEVE_DATE' datetime (UTC)
1055
- 'RECORD_DATA' object
1135
+ 'RECORD_DATA' object,
1136
+ 'IS_DELETED' boolean
1056
1137
  """
1057
1138
  # for required_column in ['RECORD_DATA']:
1058
1139
  # if required_column not in results_df.columns:
@@ -1063,6 +1144,9 @@ class InboundSyncRequest(SyncRequest):
1063
1144
  )
1064
1145
  logger.info(f"preprocessing for stream: {self._streams_dict[stream_name]}")
1065
1146
  if len(results) > 0:
1147
+ if isinstance(is_delete, list):
1148
+ if len(results) != len(is_delete):
1149
+ raise ValueError(f"results and is_delete lists must be the same length")
1066
1150
  # We need to remove any values (included nesting) which are empty dicts. This is to prevent the arrow error:
1067
1151
  # Cannot write struct type '<field_name>' with no child field to Parquet. Consider adding a dummy child field.
1068
1152
  results = [remove_empty_dict_values(result) for result in results]
@@ -1108,11 +1192,22 @@ class InboundSyncRequest(SyncRequest):
1108
1192
  )
1109
1193
 
1110
1194
  results_df["APP_IDENTIFIER"] = results_df["RECORD_DATA"].apply(lambda x: get_nested_value(dict(x),primary_key_field))
1111
- # we jump the record data to a json string to make uploading to Snowflake less error prone
1112
- results_df["RECORD_DATA"] = results_df["RECORD_DATA"].apply(json.dumps)
1195
+ # ensure APP_IDENTIFIER is a string
1196
+ results_df["APP_IDENTIFIER"] = results_df["APP_IDENTIFIER"].apply(str)
1113
1197
  # the timestamps in Snowflake are TIMESTAMP_LTZ, so we upload in string format to ensure the
1114
1198
  # timezone information is present.
1115
1199
  results_df["RETRIEVE_DATE"] = str(datetime.datetime.now().astimezone())
1200
+ # create the IS_DELETED column from the is_delete list
1201
+ results_df["IS_DELETED"] = is_delete
1202
+ # for each record, if IS_DELETED is true and RECORD_DATA only contains a single key, we assume that's the identifier
1203
+ # in this case, we nullify the RECORD_DATA column to indicate that the delete operation does not contain the full record
1204
+ for index, row in results_df.iterrows():
1205
+ if row["IS_DELETED"] and len(row["RECORD_DATA"]) == 1:
1206
+ results_df.at[index, "RECORD_DATA"] = None
1207
+ # we dump the record data to a json string to make uploading to Snowflake less error prone, but only if it's not None
1208
+ results_df["RECORD_DATA"] = results_df["RECORD_DATA"].apply(
1209
+ lambda x: json.dumps(x) if x is not None else None
1210
+ )
1116
1211
  results_df["STREAM_NAME"] = stream_name
1117
1212
  else:
1118
1213
  results_df = pandas.DataFrame(
@@ -1122,16 +1217,17 @@ class InboundSyncRequest(SyncRequest):
1122
1217
  "STREAM_NAME",
1123
1218
  "RECORD_DATA",
1124
1219
  "RETRIEVE_DATE",
1220
+ "IS_DELETED"
1125
1221
  ],
1126
1222
  )
1127
1223
  # trim out the columns we don't need to return
1128
1224
  return results_df[
1129
1225
  results_df.columns.intersection(
1130
- ["APP_IDENTIFIER", "STREAM_NAME", "RECORD_DATA", "RETRIEVE_DATE"]
1226
+ ["APP_IDENTIFIER", "STREAM_NAME", "RECORD_DATA", "RETRIEVE_DATE", "IS_DELETED"]
1131
1227
  )
1132
1228
  ]
1133
1229
 
1134
- def _apply_results_dataframe(self, stream_name: str, results_df: pandas.DataFrame):
1230
+ def _apply_results_dataframe(self, stream_names: List[str], results_df: pandas.DataFrame):
1135
1231
  """
1136
1232
  Applies results for an inbound sync. The results are staged into a temporary
1137
1233
  table in Snowflake, so that we can make an atomic commit at the end.
@@ -1164,7 +1260,8 @@ class InboundSyncRequest(SyncRequest):
1164
1260
  # column_order='index',
1165
1261
  # #create_temp_table=True
1166
1262
  # )
1167
- self._results_exist[stream_name] = True
1263
+ for stream_name in stream_names:
1264
+ self._results_exist[stream_name] = True
1168
1265
  else:
1169
1266
  logger.info("Results dataframe is empty, not applying")
1170
1267
 
@@ -1175,6 +1272,34 @@ class InboundSyncRequest(SyncRequest):
1175
1272
  """
1176
1273
  self._plugin_message(PluginMessageStreamState(stream_state=self._latest_states))
1177
1274
 
1275
+ def _apply_criteria_deletes_dataframe(self, results_df: pandas.DataFrame):
1276
+ """
1277
+ Applies results for an inbound sync. The results are staged into a temporary
1278
+ table in Snowflake, so that we can make an atomic commit at the end.
1279
+ """
1280
+ if len(results_df) > 0:
1281
+ with self._snowflake_query_lock:
1282
+ logger.info(
1283
+ f"Applying {len(results_df)} criteria deletes to {self._criteria_deletes_table_name}"
1284
+ )
1285
+ # try setting parquet engine here, since the engine parameter does not seem to make it through to the write_pandas function
1286
+ success, nchunks, nrows, _ = write_pandas(
1287
+ conn=self._session._conn._cursor.connection, # pylint: disable=protected-access
1288
+ df=results_df,
1289
+ table_name=self._criteria_deletes_table_name,
1290
+ quote_identifiers=False, # already done in get_temp_table_name
1291
+ table_type="transient"
1292
+ )
1293
+ if not success:
1294
+ raise ValueError(
1295
+ f"Failed to write results to table {self._criteria_deletes_table_name}"
1296
+ )
1297
+ logger.info(
1298
+ f"Wrote {nrows} rows and {nchunks} chunks to table {self._criteria_deletes_table_name}"
1299
+ )
1300
+ else:
1301
+ logger.info("Results dataframe is empty, not applying")
1302
+
1178
1303
 
1179
1304
  class ConnectResponse(SubscriptableBaseModel):
1180
1305
  """
@@ -1464,14 +1589,15 @@ class OmnataPlugin(ABC):
1464
1589
  )
1465
1590
  sent_initial = True
1466
1591
  else:
1467
- events.append(
1468
- SnowflakeBillingEvent(
1469
- billing_class="DAILY_ACTIVE_ADDITIONAL",
1470
- billing_subclass="",
1471
- timestamp=datetime.datetime.now(tz=datetime.timezone.utc),
1472
- base_charge=additional_charge,
1592
+ if additional_charge is not None and additional_charge > 0:
1593
+ events.append(
1594
+ SnowflakeBillingEvent(
1595
+ billing_class="DAILY_ACTIVE_ADDITIONAL",
1596
+ billing_subclass="",
1597
+ timestamp=datetime.datetime.now(tz=datetime.timezone.utc),
1598
+ base_charge=additional_charge,
1599
+ )
1473
1600
  )
1474
- )
1475
1601
  return events
1476
1602
 
1477
1603
  def additional_loggers(self) -> List[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: omnata-plugin-runtime
3
- Version: 0.3.11
3
+ Version: 0.3.18a59
4
4
  Summary: Classes and common runtime components for building and running Omnata Plugins
5
5
  Author: James Weakley
6
6
  Author-email: james.weakley@omnata.com
@@ -1,12 +1,12 @@
1
1
  omnata_plugin_runtime/__init__.py,sha256=w63LVME5nY-hQ4BBzfacy9kvTunwqHGs8iiSPGAX2ns,1214
2
2
  omnata_plugin_runtime/api.py,sha256=_N5ok5LN7GDO4J9n3yduXp3tpjmhpySY__U2baiygrs,6217
3
3
  omnata_plugin_runtime/configuration.py,sha256=at29ExowF_T4_2U9gY0BF4IVdwC-vDytmNRHL7UCWh8,34742
4
- omnata_plugin_runtime/forms.py,sha256=30CJB24TqfLYNnkplZdUbeqA-P9rUIBujVKXw_S-wKY,18371
4
+ omnata_plugin_runtime/forms.py,sha256=pw_aKVsXSz47EP8PFBI3VDwdSN5IjvZxp8JTjO1V130,18421
5
5
  omnata_plugin_runtime/logging.py,sha256=bn7eKoNWvtuyTk7RTwBS9UARMtqkiICtgMtzq3KA2V0,3272
6
- omnata_plugin_runtime/omnata_plugin.py,sha256=pXyaviPk0cd1c_IyClt8-uMevyM-fvUz0CEUqAtdCLI,91213
6
+ omnata_plugin_runtime/omnata_plugin.py,sha256=hygZU0EwuFmob-LAL57C_tj8Ft_OTQ81NoEZkEqHppc,99276
7
7
  omnata_plugin_runtime/plugin_entrypoints.py,sha256=_XgmWsrHoSshkl5Z2T27BAGVnBh4yH-8lni5sdGlSz8,27670
8
8
  omnata_plugin_runtime/rate_limiting.py,sha256=se6MftQI5NrVHaLb1hByPCgAESPQhkAgIG7KIU1clDU,16562
9
- omnata_plugin_runtime-0.3.11.dist-info/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
10
- omnata_plugin_runtime-0.3.11.dist-info/METADATA,sha256=kEcF2ez8GmHWxNUxcsFHbGS4gGk5oTcqjkfeAURQrS4,1601
11
- omnata_plugin_runtime-0.3.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
12
- omnata_plugin_runtime-0.3.11.dist-info/RECORD,,
9
+ omnata_plugin_runtime-0.3.18a59.dist-info/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
10
+ omnata_plugin_runtime-0.3.18a59.dist-info/METADATA,sha256=L8hW1zr3XCQyZTGauD_267anwepEb6SU1bBm5dkZyO8,1604
11
+ omnata_plugin_runtime-0.3.18a59.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
12
+ omnata_plugin_runtime-0.3.18a59.dist-info/RECORD,,