mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +25 -111
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +38 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +41 -47
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +68 -0
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
  15. mlrun/common/formatters/base.py +78 -0
  16. mlrun/common/formatters/function.py +41 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +25 -4
  21. mlrun/common/schemas/alert.py +203 -0
  22. mlrun/common/schemas/api_gateway.py +148 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +8 -2
  25. mlrun/common/schemas/client_spec.py +2 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/hub.py +7 -9
  29. mlrun/common/schemas/model_monitoring/__init__.py +19 -3
  30. mlrun/common/schemas/model_monitoring/constants.py +96 -26
  31. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  32. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  33. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  34. mlrun/common/schemas/pipeline.py +0 -9
  35. mlrun/common/schemas/project.py +22 -21
  36. mlrun/common/types.py +7 -1
  37. mlrun/config.py +87 -19
  38. mlrun/data_types/data_types.py +4 -0
  39. mlrun/data_types/to_pandas.py +9 -9
  40. mlrun/datastore/__init__.py +5 -8
  41. mlrun/datastore/alibaba_oss.py +130 -0
  42. mlrun/datastore/azure_blob.py +4 -5
  43. mlrun/datastore/base.py +69 -30
  44. mlrun/datastore/datastore.py +10 -2
  45. mlrun/datastore/datastore_profile.py +90 -6
  46. mlrun/datastore/google_cloud_storage.py +1 -1
  47. mlrun/datastore/hdfs.py +5 -0
  48. mlrun/datastore/inmem.py +2 -2
  49. mlrun/datastore/redis.py +2 -2
  50. mlrun/datastore/s3.py +5 -0
  51. mlrun/datastore/snowflake_utils.py +43 -0
  52. mlrun/datastore/sources.py +172 -44
  53. mlrun/datastore/store_resources.py +7 -7
  54. mlrun/datastore/targets.py +285 -41
  55. mlrun/datastore/utils.py +68 -5
  56. mlrun/datastore/v3io.py +27 -50
  57. mlrun/db/auth_utils.py +152 -0
  58. mlrun/db/base.py +149 -14
  59. mlrun/db/factory.py +1 -1
  60. mlrun/db/httpdb.py +608 -178
  61. mlrun/db/nopdb.py +191 -7
  62. mlrun/errors.py +11 -0
  63. mlrun/execution.py +37 -20
  64. mlrun/feature_store/__init__.py +0 -2
  65. mlrun/feature_store/api.py +21 -52
  66. mlrun/feature_store/feature_set.py +48 -23
  67. mlrun/feature_store/feature_vector.py +2 -1
  68. mlrun/feature_store/ingestion.py +7 -6
  69. mlrun/feature_store/retrieval/base.py +9 -4
  70. mlrun/feature_store/retrieval/conversion.py +9 -9
  71. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  72. mlrun/feature_store/retrieval/job.py +9 -3
  73. mlrun/feature_store/retrieval/local_merger.py +2 -0
  74. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  75. mlrun/feature_store/steps.py +30 -19
  76. mlrun/features.py +4 -13
  77. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  78. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  79. mlrun/frameworks/lgbm/__init__.py +1 -1
  80. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  81. mlrun/frameworks/lgbm/model_handler.py +1 -1
  82. mlrun/frameworks/parallel_coordinates.py +2 -1
  83. mlrun/frameworks/pytorch/__init__.py +2 -2
  84. mlrun/frameworks/sklearn/__init__.py +1 -1
  85. mlrun/frameworks/tf_keras/__init__.py +5 -2
  86. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  87. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  88. mlrun/frameworks/xgboost/__init__.py +1 -1
  89. mlrun/k8s_utils.py +10 -11
  90. mlrun/launcher/__init__.py +1 -1
  91. mlrun/launcher/base.py +6 -5
  92. mlrun/launcher/client.py +8 -6
  93. mlrun/launcher/factory.py +1 -1
  94. mlrun/launcher/local.py +9 -3
  95. mlrun/launcher/remote.py +9 -3
  96. mlrun/lists.py +6 -2
  97. mlrun/model.py +58 -19
  98. mlrun/model_monitoring/__init__.py +1 -1
  99. mlrun/model_monitoring/api.py +127 -301
  100. mlrun/model_monitoring/application.py +5 -296
  101. mlrun/model_monitoring/applications/__init__.py +11 -0
  102. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  103. mlrun/model_monitoring/applications/base.py +282 -0
  104. mlrun/model_monitoring/applications/context.py +214 -0
  105. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  106. mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
  107. mlrun/model_monitoring/applications/results.py +99 -0
  108. mlrun/model_monitoring/controller.py +30 -36
  109. mlrun/model_monitoring/db/__init__.py +18 -0
  110. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  111. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  112. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
  113. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  114. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  115. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  116. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  117. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  118. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  119. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  120. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
  121. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  122. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  123. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  124. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  125. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  126. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  127. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  128. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  129. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  130. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  131. mlrun/model_monitoring/evidently_application.py +6 -118
  132. mlrun/model_monitoring/features_drift_table.py +34 -22
  133. mlrun/model_monitoring/helpers.py +100 -7
  134. mlrun/model_monitoring/model_endpoint.py +3 -2
  135. mlrun/model_monitoring/stream_processing.py +93 -228
  136. mlrun/model_monitoring/tracking_policy.py +7 -1
  137. mlrun/model_monitoring/writer.py +152 -124
  138. mlrun/package/packagers_manager.py +1 -0
  139. mlrun/package/utils/_formatter.py +2 -2
  140. mlrun/platforms/__init__.py +11 -10
  141. mlrun/platforms/iguazio.py +21 -202
  142. mlrun/projects/operations.py +30 -16
  143. mlrun/projects/pipelines.py +92 -99
  144. mlrun/projects/project.py +757 -268
  145. mlrun/render.py +15 -14
  146. mlrun/run.py +160 -162
  147. mlrun/runtimes/__init__.py +55 -3
  148. mlrun/runtimes/base.py +33 -19
  149. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  150. mlrun/runtimes/funcdoc.py +0 -28
  151. mlrun/runtimes/kubejob.py +28 -122
  152. mlrun/runtimes/local.py +5 -2
  153. mlrun/runtimes/mpijob/__init__.py +0 -20
  154. mlrun/runtimes/mpijob/abstract.py +8 -8
  155. mlrun/runtimes/mpijob/v1.py +1 -1
  156. mlrun/runtimes/nuclio/__init__.py +1 -0
  157. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  158. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  159. mlrun/runtimes/nuclio/application/application.py +523 -0
  160. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  161. mlrun/runtimes/nuclio/function.py +98 -58
  162. mlrun/runtimes/nuclio/serving.py +36 -42
  163. mlrun/runtimes/pod.py +196 -45
  164. mlrun/runtimes/remotesparkjob.py +1 -1
  165. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  166. mlrun/runtimes/utils.py +6 -73
  167. mlrun/secrets.py +6 -2
  168. mlrun/serving/remote.py +2 -3
  169. mlrun/serving/routers.py +7 -4
  170. mlrun/serving/server.py +7 -8
  171. mlrun/serving/states.py +73 -43
  172. mlrun/serving/v2_serving.py +8 -7
  173. mlrun/track/tracker.py +2 -1
  174. mlrun/utils/async_http.py +25 -5
  175. mlrun/utils/helpers.py +141 -75
  176. mlrun/utils/http.py +1 -1
  177. mlrun/utils/logger.py +39 -7
  178. mlrun/utils/notifications/notification/__init__.py +14 -9
  179. mlrun/utils/notifications/notification/base.py +12 -0
  180. mlrun/utils/notifications/notification/console.py +2 -0
  181. mlrun/utils/notifications/notification/git.py +3 -1
  182. mlrun/utils/notifications/notification/ipython.py +2 -0
  183. mlrun/utils/notifications/notification/slack.py +101 -21
  184. mlrun/utils/notifications/notification/webhook.py +11 -1
  185. mlrun/utils/notifications/notification_pusher.py +147 -16
  186. mlrun/utils/retryer.py +3 -2
  187. mlrun/utils/v3io_clients.py +0 -1
  188. mlrun/utils/version/version.json +2 -2
  189. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
  190. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  191. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
  192. mlrun/kfpops.py +0 -868
  193. mlrun/model_monitoring/batch.py +0 -974
  194. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  195. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  196. mlrun/platforms/other.py +0 -305
  197. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  198. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  199. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  200. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import json
15
+ import operator
15
16
  import os
16
17
  import warnings
17
18
  from base64 import b64encode
@@ -28,6 +29,8 @@ from nuclio.config import split_path
28
29
 
29
30
  import mlrun
30
31
  from mlrun.config import config
32
+ from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
+ from mlrun.datastore.utils import transform_list_filters_to_tuple
31
34
  from mlrun.secrets import SecretsStore
32
35
 
33
36
  from ..model import DataSource
@@ -101,8 +104,12 @@ class BaseSourceDriver(DataSource):
101
104
  start_time=None,
102
105
  end_time=None,
103
106
  time_field=None,
107
+ additional_filters=None,
104
108
  ):
105
109
  """return the source data as dataframe"""
110
+ mlrun.utils.helpers.additional_filters_warning(
111
+ additional_filters, self.__class__
112
+ )
106
113
  return mlrun.store_manager.object(url=self.path).as_df(
107
114
  columns=columns,
108
115
  df_module=df_module,
@@ -113,7 +120,11 @@ class BaseSourceDriver(DataSource):
113
120
 
114
121
  def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
115
122
  if self.support_spark:
116
- df = load_spark_dataframe_with_options(session, self.get_spark_options())
123
+ spark_options = self.get_spark_options()
124
+ spark_format = spark_options.pop("format", None)
125
+ df = load_spark_dataframe_with_options(
126
+ session, spark_options, format=spark_format
127
+ )
117
128
  if named_view:
118
129
  df.createOrReplaceTempView(self.name)
119
130
  return self._filter_spark_df(df, time_field, columns)
@@ -169,7 +180,7 @@ class CSVSource(BaseSourceDriver):
169
180
  self,
170
181
  name: str = "",
171
182
  path: str = None,
172
- attributes: dict[str, str] = None,
183
+ attributes: dict[str, object] = None,
173
184
  key_field: str = None,
174
185
  schedule: str = None,
175
186
  parse_dates: Union[None, int, str, list[int], list[str]] = None,
@@ -204,11 +215,11 @@ class CSVSource(BaseSourceDriver):
204
215
  )
205
216
 
206
217
  def get_spark_options(self):
207
- store, path, url = mlrun.store_manager.get_or_create_store(self.path)
218
+ store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
208
219
  spark_options = store.get_spark_options()
209
220
  spark_options.update(
210
221
  {
211
- "path": url,
222
+ "path": store.spark_url + path,
212
223
  "format": "csv",
213
224
  "header": "true",
214
225
  "inferSchema": "true",
@@ -240,7 +251,11 @@ class CSVSource(BaseSourceDriver):
240
251
  start_time=None,
241
252
  end_time=None,
242
253
  time_field=None,
254
+ additional_filters=None,
243
255
  ):
256
+ mlrun.utils.helpers.additional_filters_warning(
257
+ additional_filters, self.__class__
258
+ )
244
259
  reader_args = self.attributes.get("reader_args", {})
245
260
  return mlrun.store_manager.object(url=self.path).as_df(
246
261
  columns=columns,
@@ -276,6 +291,12 @@ class ParquetSource(BaseSourceDriver):
276
291
  :parameter start_time: filters out data before this time
277
292
  :parameter end_time: filters out data after this time
278
293
  :parameter attributes: additional parameters to pass to storey.
294
+ :param additional_filters: List of additional_filter conditions as tuples.
295
+ Each tuple should be in the format (column_name, operator, value).
296
+ Supported operators: "=", ">=", "<=", ">", "<".
297
+ Example: [("Product", "=", "Computer")]
298
+ For all supported filters, please see:
299
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
279
300
  """
280
301
 
281
302
  kind = "parquet"
@@ -286,13 +307,19 @@ class ParquetSource(BaseSourceDriver):
286
307
  self,
287
308
  name: str = "",
288
309
  path: str = None,
289
- attributes: dict[str, str] = None,
310
+ attributes: dict[str, object] = None,
290
311
  key_field: str = None,
291
312
  time_field: str = None,
292
313
  schedule: str = None,
293
314
  start_time: Optional[Union[datetime, str]] = None,
294
315
  end_time: Optional[Union[datetime, str]] = None,
316
+ additional_filters: Optional[list[Union[tuple, list]]] = None,
295
317
  ):
318
+ if additional_filters:
319
+ attributes = copy(attributes) or {}
320
+ additional_filters = transform_list_filters_to_tuple(additional_filters)
321
+ attributes["additional_filters"] = additional_filters
322
+
296
323
  super().__init__(
297
324
  name,
298
325
  path,
@@ -320,6 +347,10 @@ class ParquetSource(BaseSourceDriver):
320
347
  def end_time(self, end_time):
321
348
  self._end_time = self._convert_to_datetime(end_time)
322
349
 
350
+ @property
351
+ def additional_filters(self):
352
+ return self.attributes.get("additional_filters")
353
+
323
354
  @staticmethod
324
355
  def _convert_to_datetime(time):
325
356
  if time and isinstance(time, str):
@@ -336,16 +367,17 @@ class ParquetSource(BaseSourceDriver):
336
367
  start_time=None,
337
368
  end_time=None,
338
369
  context=None,
370
+ additional_filters=None,
339
371
  ):
340
372
  import storey
341
373
 
342
- attributes = self.attributes or {}
374
+ attributes = copy(self.attributes)
375
+ attributes.pop("additional_filters", None)
343
376
  if context:
344
377
  attributes["context"] = context
345
-
378
+ additional_filters = transform_list_filters_to_tuple(additional_filters)
346
379
  data_item = mlrun.store_manager.object(self.path)
347
380
  store, path, url = mlrun.store_manager.get_or_create_store(self.path)
348
-
349
381
  return storey.ParquetSource(
350
382
  paths=url, # unlike self.path, it already has store:// replaced
351
383
  key_field=self.key_field or key_field,
@@ -353,11 +385,22 @@ class ParquetSource(BaseSourceDriver):
353
385
  end_filter=self.end_time,
354
386
  start_filter=self.start_time,
355
387
  filter_column=self.time_field or time_field,
388
+ additional_filters=self.additional_filters or additional_filters,
356
389
  **attributes,
357
390
  )
358
391
 
392
+ @classmethod
393
+ def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
394
+ new_obj = super().from_dict(
395
+ struct=struct, fields=fields, deprecated_fields=deprecated_fields
396
+ )
397
+ new_obj.attributes["additional_filters"] = transform_list_filters_to_tuple(
398
+ new_obj.additional_filters
399
+ )
400
+ return new_obj
401
+
359
402
  def get_spark_options(self):
360
- store, path, url = mlrun.store_manager.get_or_create_store(self.path)
403
+ store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
361
404
  spark_options = store.get_spark_options()
362
405
  spark_options.update(
363
406
  {
@@ -375,8 +418,10 @@ class ParquetSource(BaseSourceDriver):
375
418
  start_time=None,
376
419
  end_time=None,
377
420
  time_field=None,
421
+ additional_filters=None,
378
422
  ):
379
423
  reader_args = self.attributes.get("reader_args", {})
424
+ additional_filters = transform_list_filters_to_tuple(additional_filters)
380
425
  return mlrun.store_manager.object(url=self.path).as_df(
381
426
  columns=columns,
382
427
  df_module=df_module,
@@ -384,9 +429,88 @@ class ParquetSource(BaseSourceDriver):
384
429
  end_time=end_time or self.end_time,
385
430
  time_column=time_field or self.time_field,
386
431
  format="parquet",
432
+ additional_filters=additional_filters or self.additional_filters,
387
433
  **reader_args,
388
434
  )
389
435
 
436
+ def _build_spark_additional_filters(self, column_types: dict):
437
+ if not self.additional_filters:
438
+ return None
439
+ from pyspark.sql.functions import col, isnan, lit
440
+
441
+ operators = {
442
+ "==": operator.eq,
443
+ "=": operator.eq,
444
+ ">": operator.gt,
445
+ "<": operator.lt,
446
+ ">=": operator.ge,
447
+ "<=": operator.le,
448
+ "!=": operator.ne,
449
+ }
450
+
451
+ spark_filter = None
452
+ new_filter = lit(True)
453
+ for filter_tuple in self.additional_filters:
454
+ if not filter_tuple:
455
+ continue
456
+ col_name, op, value = filter_tuple
457
+ if op.lower() in ("in", "not in") and isinstance(value, (list, tuple, set)):
458
+ none_exists = False
459
+ value = list(value)
460
+ for sub_value in value:
461
+ if sub_value is None:
462
+ value.remove(sub_value)
463
+ none_exists = True
464
+ if none_exists:
465
+ filter_nan = column_types[col_name] not in ("timestamp", "date")
466
+ if value:
467
+ if op.lower() == "in":
468
+ new_filter = (
469
+ col(col_name).isin(value) | col(col_name).isNull()
470
+ )
471
+ if filter_nan:
472
+ new_filter = new_filter | isnan(col(col_name))
473
+
474
+ else:
475
+ new_filter = (
476
+ ~col(col_name).isin(value) & ~col(col_name).isNull()
477
+ )
478
+ if filter_nan:
479
+ new_filter = new_filter & ~isnan(col(col_name))
480
+ else:
481
+ if op.lower() == "in":
482
+ new_filter = col(col_name).isNull()
483
+ if filter_nan:
484
+ new_filter = new_filter | isnan(col(col_name))
485
+ else:
486
+ new_filter = ~col(col_name).isNull()
487
+ if filter_nan:
488
+ new_filter = new_filter & ~isnan(col(col_name))
489
+ else:
490
+ if op.lower() == "in":
491
+ new_filter = col(col_name).isin(value)
492
+ elif op.lower() == "not in":
493
+ new_filter = ~col(col_name).isin(value)
494
+ elif op in operators:
495
+ new_filter = operators[op](col(col_name), value)
496
+ else:
497
+ raise mlrun.errors.MLRunInvalidArgumentError(
498
+ f"unsupported filter operator: {op}"
499
+ )
500
+ if spark_filter is not None:
501
+ spark_filter = spark_filter & new_filter
502
+ else:
503
+ spark_filter = new_filter
504
+ return spark_filter
505
+
506
+ def _filter_spark_df(self, df, time_field=None, columns=None):
507
+ spark_additional_filters = self._build_spark_additional_filters(
508
+ column_types=dict(df.dtypes)
509
+ )
510
+ if spark_additional_filters is not None:
511
+ df = df.filter(spark_additional_filters)
512
+ return super()._filter_spark_df(df=df, time_field=time_field, columns=columns)
513
+
390
514
 
391
515
  class BigQuerySource(BaseSourceDriver):
392
516
  """
@@ -401,12 +525,17 @@ class BigQuerySource(BaseSourceDriver):
401
525
 
402
526
  # use sql query
403
527
  query_string = "SELECT * FROM `the-psf.pypi.downloads20210328` LIMIT 5000"
404
- source = BigQuerySource("bq1", query=query_string,
405
- gcp_project="my_project",
406
- materialization_dataset="dataviews")
528
+ source = BigQuerySource(
529
+ "bq1",
530
+ query=query_string,
531
+ gcp_project="my_project",
532
+ materialization_dataset="dataviews",
533
+ )
407
534
 
408
535
  # read a table
409
- source = BigQuerySource("bq2", table="the-psf.pypi.downloads20210328", gcp_project="my_project")
536
+ source = BigQuerySource(
537
+ "bq2", table="the-psf.pypi.downloads20210328", gcp_project="my_project"
538
+ )
410
539
 
411
540
 
412
541
  :parameter name: source name
@@ -509,10 +638,15 @@ class BigQuerySource(BaseSourceDriver):
509
638
  start_time=None,
510
639
  end_time=None,
511
640
  time_field=None,
641
+ additional_filters=None,
512
642
  ):
513
643
  from google.cloud import bigquery
514
644
  from google.cloud.bigquery_storage_v1 import BigQueryReadClient
515
645
 
646
+ mlrun.utils.helpers.additional_filters_warning(
647
+ additional_filters, self.__class__
648
+ )
649
+
516
650
  def schema_to_dtypes(schema):
517
651
  from mlrun.data_types.data_types import gbq_to_pandas_dtype
518
652
 
@@ -552,7 +686,6 @@ class BigQuerySource(BaseSourceDriver):
552
686
  else:
553
687
  df = rows_iterator.to_dataframe(dtypes=dtypes)
554
688
 
555
- # TODO : filter as part of the query
556
689
  return select_columns_from_df(
557
690
  filter_df_start_end_time(
558
691
  df,
@@ -673,32 +806,10 @@ class SnowflakeSource(BaseSourceDriver):
673
806
  **kwargs,
674
807
  )
675
808
 
676
- def _get_password(self):
677
- key = "SNOWFLAKE_PASSWORD"
678
- snowflake_password = os.getenv(key) or os.getenv(
679
- SecretsStore.k8s_env_variable_name_for_secret(key)
680
- )
681
-
682
- if not snowflake_password:
683
- raise mlrun.errors.MLRunInvalidArgumentError(
684
- "No password provided. Set password using the SNOWFLAKE_PASSWORD "
685
- "project secret or environment variable."
686
- )
687
-
688
- return snowflake_password
689
-
690
809
  def get_spark_options(self):
691
- return {
692
- "format": "net.snowflake.spark.snowflake",
693
- "query": self.attributes.get("query"),
694
- "sfURL": self.attributes.get("url"),
695
- "sfUser": self.attributes.get("user"),
696
- "sfPassword": self._get_password(),
697
- "sfDatabase": self.attributes.get("database"),
698
- "sfSchema": self.attributes.get("schema"),
699
- "sfWarehouse": self.attributes.get("warehouse"),
700
- "application": "iguazio_platform",
701
- }
810
+ spark_options = get_snowflake_spark_options(self.attributes)
811
+ spark_options["query"] = self.attributes.get("query")
812
+ return spark_options
702
813
 
703
814
 
704
815
  class CustomSource(BaseSourceDriver):
@@ -752,7 +863,19 @@ class DataFrameSource:
752
863
  context=self.context or context,
753
864
  )
754
865
 
755
- def to_dataframe(self, **kwargs):
866
+ def to_dataframe(
867
+ self,
868
+ columns=None,
869
+ df_module=None,
870
+ entities=None,
871
+ start_time=None,
872
+ end_time=None,
873
+ time_field=None,
874
+ additional_filters=None,
875
+ ):
876
+ mlrun.utils.helpers.additional_filters_warning(
877
+ additional_filters, self.__class__
878
+ )
756
879
  return self._df
757
880
 
758
881
  def is_iterator(self):
@@ -794,7 +917,8 @@ class OnlineSource(BaseSourceDriver):
794
917
  explicit_ack = (
795
918
  is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack()
796
919
  )
797
- src_class = storey.AsyncEmitSource(
920
+ # TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
921
+ src_class = storey.SyncEmitSource(
798
922
  context=context,
799
923
  key_field=self.key_field or key_field,
800
924
  full_event=True,
@@ -853,12 +977,11 @@ class StreamSource(OnlineSource):
853
977
  super().__init__(name, attributes=attrs, **kwargs)
854
978
 
855
979
  def add_nuclio_trigger(self, function):
856
- store, path, url = mlrun.store_manager.get_or_create_store(self.path)
980
+ store, _, url = mlrun.store_manager.get_or_create_store(self.path)
857
981
  if store.kind != "v3io":
858
982
  raise mlrun.errors.MLRunInvalidArgumentError(
859
983
  "Only profiles that reference the v3io datastore can be used with StreamSource"
860
984
  )
861
- path = "v3io:/" + path
862
985
  storage_options = store.get_storage_options()
863
986
  access_key = storage_options.get("v3io_access_key")
864
987
  endpoint, stream_path = parse_path(url)
@@ -882,7 +1005,7 @@ class StreamSource(OnlineSource):
882
1005
  kwargs["worker_allocation_mode"] = "static"
883
1006
 
884
1007
  function.add_v3io_stream_trigger(
885
- path,
1008
+ url,
886
1009
  self.name,
887
1010
  self.attributes["group"],
888
1011
  self.attributes["seek_to"],
@@ -947,6 +1070,7 @@ class KafkaSource(OnlineSource):
947
1070
  start_time=None,
948
1071
  end_time=None,
949
1072
  time_field=None,
1073
+ additional_filters=None,
950
1074
  ):
951
1075
  raise mlrun.MLRunInvalidArgumentError(
952
1076
  "KafkaSource does not support batch processing"
@@ -1087,9 +1211,13 @@ class SQLSource(BaseSourceDriver):
1087
1211
  start_time=None,
1088
1212
  end_time=None,
1089
1213
  time_field=None,
1214
+ additional_filters=None,
1090
1215
  ):
1091
1216
  import sqlalchemy as sqlalchemy
1092
1217
 
1218
+ mlrun.utils.helpers.additional_filters_warning(
1219
+ additional_filters, self.__class__
1220
+ )
1093
1221
  db_path = self.attributes.get("db_path")
1094
1222
  table_name = self.attributes.get("table_name")
1095
1223
  parse_dates = self.attributes.get("parse_dates")
@@ -17,7 +17,7 @@
17
17
  import mlrun
18
18
  import mlrun.artifacts
19
19
  from mlrun.config import config
20
- from mlrun.utils.helpers import is_legacy_artifact, parse_artifact_uri
20
+ from mlrun.utils.helpers import parse_artifact_uri
21
21
 
22
22
  from ..common.helpers import parse_versioned_object_uri
23
23
  from ..platforms.iguazio import parse_path
@@ -146,7 +146,11 @@ def get_store_resource(
146
146
 
147
147
  db = db or mlrun.get_run_db(secrets=secrets)
148
148
  kind, uri = parse_store_uri(uri)
149
- if kind == StorePrefix.FeatureSet:
149
+ if not kind:
150
+ raise mlrun.errors.MLRunInvalidArgumentError(
151
+ f"Cannot get store resource from invalid URI: {uri}"
152
+ )
153
+ elif kind == StorePrefix.FeatureSet:
150
154
  project, name, tag, uid = parse_versioned_object_uri(
151
155
  uri, project or config.default_project
152
156
  )
@@ -167,11 +171,7 @@ def get_store_resource(
167
171
  )
168
172
  if resource.get("kind", "") == "link":
169
173
  # todo: support other link types (not just iter, move this to the db/api layer
170
- link_iteration = (
171
- resource.get("link_iteration", 0)
172
- if is_legacy_artifact(resource)
173
- else resource["spec"].get("link_iteration", 0)
174
- )
174
+ link_iteration = resource["spec"].get("link_iteration", 0)
175
175
 
176
176
  resource = db.read_artifact(
177
177
  key,