mlrun 1.7.0rc7__py3-none-any.whl → 1.7.0rc11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (91) hide show
  1. mlrun/__init__.py +1 -0
  2. mlrun/__main__.py +2 -0
  3. mlrun/artifacts/model.py +29 -25
  4. mlrun/common/schemas/__init__.py +4 -0
  5. mlrun/common/schemas/alert.py +122 -0
  6. mlrun/common/schemas/api_gateway.py +8 -1
  7. mlrun/common/schemas/auth.py +4 -0
  8. mlrun/common/schemas/client_spec.py +1 -0
  9. mlrun/common/schemas/hub.py +7 -9
  10. mlrun/common/schemas/model_monitoring/constants.py +4 -2
  11. mlrun/{datastore/helpers.py → common/schemas/pagination.py} +11 -3
  12. mlrun/common/schemas/project.py +15 -10
  13. mlrun/config.py +35 -13
  14. mlrun/datastore/__init__.py +3 -7
  15. mlrun/datastore/base.py +6 -5
  16. mlrun/datastore/datastore_profile.py +19 -1
  17. mlrun/datastore/snowflake_utils.py +43 -0
  18. mlrun/datastore/sources.py +18 -30
  19. mlrun/datastore/targets.py +140 -12
  20. mlrun/datastore/utils.py +10 -5
  21. mlrun/datastore/v3io.py +27 -50
  22. mlrun/db/base.py +88 -2
  23. mlrun/db/httpdb.py +314 -41
  24. mlrun/db/nopdb.py +142 -0
  25. mlrun/execution.py +21 -14
  26. mlrun/feature_store/api.py +9 -5
  27. mlrun/feature_store/feature_set.py +39 -23
  28. mlrun/feature_store/feature_vector.py +2 -1
  29. mlrun/feature_store/retrieval/spark_merger.py +27 -23
  30. mlrun/feature_store/steps.py +30 -19
  31. mlrun/features.py +4 -13
  32. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  33. mlrun/frameworks/lgbm/__init__.py +1 -1
  34. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  35. mlrun/frameworks/lgbm/model_handler.py +1 -1
  36. mlrun/frameworks/pytorch/__init__.py +2 -2
  37. mlrun/frameworks/sklearn/__init__.py +1 -1
  38. mlrun/frameworks/tf_keras/__init__.py +1 -1
  39. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  40. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  41. mlrun/frameworks/xgboost/__init__.py +1 -1
  42. mlrun/kfpops.py +2 -5
  43. mlrun/launcher/base.py +1 -1
  44. mlrun/launcher/client.py +2 -2
  45. mlrun/model.py +2 -2
  46. mlrun/model_monitoring/application.py +11 -2
  47. mlrun/model_monitoring/applications/histogram_data_drift.py +3 -3
  48. mlrun/model_monitoring/controller.py +2 -3
  49. mlrun/model_monitoring/helpers.py +3 -1
  50. mlrun/model_monitoring/stream_processing.py +0 -1
  51. mlrun/model_monitoring/writer.py +32 -0
  52. mlrun/package/packagers_manager.py +1 -0
  53. mlrun/platforms/__init__.py +1 -1
  54. mlrun/platforms/other.py +1 -1
  55. mlrun/projects/operations.py +11 -4
  56. mlrun/projects/pipelines.py +1 -1
  57. mlrun/projects/project.py +180 -73
  58. mlrun/run.py +77 -41
  59. mlrun/runtimes/__init__.py +16 -0
  60. mlrun/runtimes/base.py +4 -1
  61. mlrun/runtimes/kubejob.py +26 -121
  62. mlrun/runtimes/mpijob/abstract.py +8 -8
  63. mlrun/runtimes/nuclio/api_gateway.py +58 -8
  64. mlrun/runtimes/nuclio/application/application.py +79 -1
  65. mlrun/runtimes/nuclio/application/reverse_proxy.go +9 -1
  66. mlrun/runtimes/nuclio/function.py +20 -13
  67. mlrun/runtimes/nuclio/serving.py +11 -10
  68. mlrun/runtimes/pod.py +148 -3
  69. mlrun/runtimes/utils.py +0 -28
  70. mlrun/secrets.py +6 -2
  71. mlrun/serving/remote.py +2 -3
  72. mlrun/serving/routers.py +7 -4
  73. mlrun/serving/server.py +1 -1
  74. mlrun/serving/states.py +14 -38
  75. mlrun/serving/v2_serving.py +8 -7
  76. mlrun/utils/helpers.py +1 -1
  77. mlrun/utils/http.py +1 -1
  78. mlrun/utils/notifications/notification/base.py +12 -0
  79. mlrun/utils/notifications/notification/console.py +2 -0
  80. mlrun/utils/notifications/notification/git.py +3 -1
  81. mlrun/utils/notifications/notification/ipython.py +2 -0
  82. mlrun/utils/notifications/notification/slack.py +41 -13
  83. mlrun/utils/notifications/notification/webhook.py +11 -1
  84. mlrun/utils/retryer.py +3 -2
  85. mlrun/utils/version/version.json +2 -2
  86. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/METADATA +15 -15
  87. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/RECORD +91 -89
  88. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/LICENSE +0 -0
  89. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/WHEEL +0 -0
  90. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/entry_points.txt +0 -0
  91. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/top_level.txt +0 -0
mlrun/db/nopdb.py CHANGED
@@ -528,6 +528,75 @@ class NopDB(RunDBInterface):
528
528
  ):
529
529
  pass
530
530
 
531
+ def remote_builder(
532
+ self,
533
+ func: "mlrun.runtimes.BaseRuntime",
534
+ with_mlrun: bool,
535
+ mlrun_version_specifier: Optional[str] = None,
536
+ skip_deployed: bool = False,
537
+ builder_env: Optional[dict] = None,
538
+ force_build: bool = False,
539
+ ):
540
+ pass
541
+
542
+ def deploy_nuclio_function(
543
+ self,
544
+ func: "mlrun.runtimes.RemoteRuntime",
545
+ builder_env: Optional[dict] = None,
546
+ ):
547
+ pass
548
+
549
+ def get_builder_status(
550
+ self,
551
+ func: "mlrun.runtimes.BaseRuntime",
552
+ offset: int = 0,
553
+ logs: bool = True,
554
+ last_log_timestamp: float = 0.0,
555
+ verbose: bool = False,
556
+ ):
557
+ pass
558
+
559
+ def get_nuclio_deploy_status(
560
+ self,
561
+ func: "mlrun.runtimes.RemoteRuntime",
562
+ last_log_timestamp: float = 0.0,
563
+ verbose: bool = False,
564
+ ):
565
+ pass
566
+
567
+ def set_run_notifications(
568
+ self,
569
+ project: str,
570
+ runs: list[mlrun.model.RunObject],
571
+ notifications: list[mlrun.model.Notification],
572
+ ):
573
+ pass
574
+
575
+ def store_run_notifications(
576
+ self,
577
+ notification_objects: list[mlrun.model.Notification],
578
+ run_uid: str,
579
+ project: str = None,
580
+ mask_params: bool = True,
581
+ ):
582
+ pass
583
+
584
+ def store_alert_notifications(
585
+ self,
586
+ session,
587
+ notification_objects: list[mlrun.model.Notification],
588
+ alert_id: str,
589
+ project: str,
590
+ mask_params: bool = True,
591
+ ):
592
+ pass
593
+
594
+ def get_log_size(self, uid, project=""):
595
+ pass
596
+
597
+ def watch_log(self, uid, project="", watch=True, offset=0):
598
+ pass
599
+
531
600
  def get_datastore_profile(
532
601
  self, name: str, project: str
533
602
  ) -> Optional[mlrun.common.schemas.DatastoreProfile]:
@@ -545,3 +614,76 @@ class NopDB(RunDBInterface):
545
614
  self, profile: mlrun.common.schemas.DatastoreProfile, project: str
546
615
  ):
547
616
  pass
617
+
618
+ def function_status(self, project, name, kind, selector):
619
+ pass
620
+
621
+ def start_function(
622
+ self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
623
+ ):
624
+ pass
625
+
626
+ def submit_workflow(
627
+ self,
628
+ project: str,
629
+ name: str,
630
+ workflow_spec: Union[
631
+ "mlrun.projects.pipelines.WorkflowSpec",
632
+ "mlrun.common.schemas.WorkflowSpec",
633
+ dict,
634
+ ],
635
+ arguments: Optional[dict] = None,
636
+ artifact_path: Optional[str] = None,
637
+ source: Optional[str] = None,
638
+ run_name: Optional[str] = None,
639
+ namespace: Optional[str] = None,
640
+ notifications: list["mlrun.model.Notification"] = None,
641
+ ) -> "mlrun.common.schemas.WorkflowResponse":
642
+ pass
643
+
644
+ def update_model_monitoring_controller(
645
+ self,
646
+ project: str,
647
+ base_period: int = 10,
648
+ image: str = "mlrun/mlrun",
649
+ ):
650
+ pass
651
+
652
+ def enable_model_monitoring(
653
+ self,
654
+ project: str,
655
+ base_period: int = 10,
656
+ image: str = "mlrun/mlrun",
657
+ deploy_histogram_data_drift_app: bool = True,
658
+ ) -> None:
659
+ pass
660
+
661
+ def deploy_histogram_data_drift_app(
662
+ self, project: str, image: str = "mlrun/mlrun"
663
+ ) -> None:
664
+ raise NotImplementedError
665
+
666
+ def generate_event(
667
+ self, name: str, event_data: Union[dict, mlrun.common.schemas.Event], project=""
668
+ ):
669
+ pass
670
+
671
+ def store_alert_config(
672
+ self,
673
+ alert_name: str,
674
+ alert_data: Union[dict, mlrun.common.schemas.AlertConfig],
675
+ project="",
676
+ ):
677
+ pass
678
+
679
+ def get_alert_config(self, alert_name: str, project=""):
680
+ pass
681
+
682
+ def list_alerts_configs(self, project=""):
683
+ pass
684
+
685
+ def delete_alert_config(self, alert_name: str, project=""):
686
+ pass
687
+
688
+ def reset_alert_config(self, alert_name: str, project=""):
689
+ pass
mlrun/execution.py CHANGED
@@ -224,12 +224,12 @@ class MLClientCtx:
224
224
  with context.get_child_context(myparam=param) as child:
225
225
  accuracy = child_handler(child, df, **child.parameters)
226
226
  accuracy_sum += accuracy
227
- child.log_result('accuracy', accuracy)
227
+ child.log_result("accuracy", accuracy)
228
228
  if accuracy > best_accuracy:
229
229
  child.mark_as_best()
230
230
  best_accuracy = accuracy
231
231
 
232
- context.log_result('avg_accuracy', accuracy_sum / len(param_list))
232
+ context.log_result("avg_accuracy", accuracy_sum / len(param_list))
233
233
 
234
234
  :param params: Extra (or override) params to parent context
235
235
  :param with_parent_params: Child will copy the parent parameters and add to them
@@ -289,7 +289,9 @@ class MLClientCtx:
289
289
 
290
290
  Example::
291
291
 
292
- feature_vector = context.get_store_resource("store://feature-vectors/default/myvec")
292
+ feature_vector = context.get_store_resource(
293
+ "store://feature-vectors/default/myvec"
294
+ )
293
295
  dataset = context.get_store_resource("store://artifacts/default/mydata")
294
296
 
295
297
  :param url: Store resource uri/path, store://<type>/<project>/<name>:<version>
@@ -421,7 +423,7 @@ class MLClientCtx:
421
423
 
422
424
  Example::
423
425
 
424
- data_path=context.artifact_subpath('data')
426
+ data_path = context.artifact_subpath("data")
425
427
 
426
428
  """
427
429
  return os.path.join(self.artifact_path, *subpaths)
@@ -525,7 +527,7 @@ class MLClientCtx:
525
527
 
526
528
  Example::
527
529
 
528
- context.log_result('accuracy', 0.85)
530
+ context.log_result("accuracy", 0.85)
529
531
 
530
532
  :param key: Result key
531
533
  :param value: Result value
@@ -539,7 +541,7 @@ class MLClientCtx:
539
541
 
540
542
  Example::
541
543
 
542
- context.log_results({'accuracy': 0.85, 'loss': 0.2})
544
+ context.log_results({"accuracy": 0.85, "loss": 0.2})
543
545
 
544
546
  :param results: Key/value dict or results
545
547
  :param commit: Commit (write to DB now vs wait for the end of the run)
@@ -674,7 +676,9 @@ class MLClientCtx:
674
676
  "age": [42, 52, 36, 24, 73],
675
677
  "testScore": [25, 94, 57, 62, 70],
676
678
  }
677
- df = pd.DataFrame(raw_data, columns=["first_name", "last_name", "age", "testScore"])
679
+ df = pd.DataFrame(
680
+ raw_data, columns=["first_name", "last_name", "age", "testScore"]
681
+ )
678
682
  context.log_dataset("mydf", df=df, stats=True)
679
683
 
680
684
  :param key: Artifact key
@@ -752,13 +756,16 @@ class MLClientCtx:
752
756
 
753
757
  Example::
754
758
 
755
- context.log_model("model", body=dumps(model),
756
- model_file="model.pkl",
757
- metrics=context.results,
758
- training_set=training_df,
759
- label_column='label',
760
- feature_vector=feature_vector_uri,
761
- labels={"app": "fraud"})
759
+ context.log_model(
760
+ "model",
761
+ body=dumps(model),
762
+ model_file="model.pkl",
763
+ metrics=context.results,
764
+ training_set=training_df,
765
+ label_column="label",
766
+ feature_vector=feature_vector_uri,
767
+ labels={"app": "fraud"},
768
+ )
762
769
 
763
770
  :param key: Artifact key or artifact class ()
764
771
  :param body: Will use the body as the artifact content
@@ -136,7 +136,10 @@ def get_offline_features(
136
136
  ]
137
137
  vector = FeatureVector(features=features)
138
138
  resp = get_offline_features(
139
- vector, entity_rows=trades, entity_timestamp_column="time", query="ticker in ['GOOG'] and bid>100"
139
+ vector,
140
+ entity_rows=trades,
141
+ entity_timestamp_column="time",
142
+ query="ticker in ['GOOG'] and bid>100",
140
143
  )
141
144
  print(resp.to_dataframe())
142
145
  print(vector.get_stats_table())
@@ -307,7 +310,7 @@ def get_online_feature_service(
307
310
 
308
311
  Example::
309
312
 
310
- svc = get_online_feature_service(vector_uri, entity_keys=['ticker'])
313
+ svc = get_online_feature_service(vector_uri, entity_keys=["ticker"])
311
314
  try:
312
315
  resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
313
316
  print(resp)
@@ -456,7 +459,7 @@ def ingest(
456
459
  df = ingest(stocks_set, stocks, infer_options=fstore.InferOptions.default())
457
460
 
458
461
  # for running as remote job
459
- config = RunConfig(image='mlrun/mlrun')
462
+ config = RunConfig(image="mlrun/mlrun")
460
463
  df = ingest(stocks_set, stocks, run_config=config)
461
464
 
462
465
  # specify source and targets
@@ -1121,9 +1124,10 @@ def _ingest_with_spark(
1121
1124
  df_to_write = target.prepare_spark_df(
1122
1125
  df_to_write, key_columns, timestamp_key, spark_options
1123
1126
  )
1127
+ write_format = spark_options.pop("format", None)
1124
1128
  if overwrite:
1125
1129
  write_spark_dataframe_with_options(
1126
- spark_options, df_to_write, "overwrite"
1130
+ spark_options, df_to_write, "overwrite", write_format=write_format
1127
1131
  )
1128
1132
  else:
1129
1133
  # appending an empty dataframe may cause an empty file to be created (e.g. when writing to parquet)
@@ -1131,7 +1135,7 @@ def _ingest_with_spark(
1131
1135
  df_to_write.persist()
1132
1136
  if df_to_write.count() > 0:
1133
1137
  write_spark_dataframe_with_options(
1134
- spark_options, df_to_write, "append"
1138
+ spark_options, df_to_write, "append", write_format=write_format
1135
1139
  )
1136
1140
  target.update_resource_status("ready")
1137
1141
 
@@ -337,7 +337,10 @@ class FeatureSet(ModelObj):
337
337
  example::
338
338
 
339
339
  import mlrun.feature_store as fstore
340
- ticks = fstore.FeatureSet("ticks", entities=["stock"], timestamp_key="timestamp")
340
+
341
+ ticks = fstore.FeatureSet(
342
+ "ticks", entities=["stock"], timestamp_key="timestamp"
343
+ )
341
344
  ticks.ingest(df)
342
345
 
343
346
  :param name: name of the feature set
@@ -625,12 +628,12 @@ class FeatureSet(ModelObj):
625
628
 
626
629
  import mlrun.feature_store as fstore
627
630
 
628
- ticks = fstore.FeatureSet("ticks",
629
- entities=["stock"],
630
- timestamp_key="timestamp")
631
- ticks.add_entity("country",
632
- mlrun.data_types.ValueType.STRING,
633
- description="stock country")
631
+ ticks = fstore.FeatureSet(
632
+ "ticks", entities=["stock"], timestamp_key="timestamp"
633
+ )
634
+ ticks.add_entity(
635
+ "country", mlrun.data_types.ValueType.STRING, description="stock country"
636
+ )
634
637
  ticks.add_entity("year", mlrun.data_types.ValueType.INT16)
635
638
  ticks.save()
636
639
 
@@ -650,13 +653,23 @@ class FeatureSet(ModelObj):
650
653
  import mlrun.feature_store as fstore
651
654
  from mlrun.features import Feature
652
655
 
653
- ticks = fstore.FeatureSet("ticks",
654
- entities=["stock"],
655
- timestamp_key="timestamp")
656
- ticks.add_feature(Feature(value_type=mlrun.data_types.ValueType.STRING,
657
- description="client consistency"),"ABC01")
658
- ticks.add_feature(Feature(value_type=mlrun.data_types.ValueType.FLOAT,
659
- description="client volatility"),"SAB")
656
+ ticks = fstore.FeatureSet(
657
+ "ticks", entities=["stock"], timestamp_key="timestamp"
658
+ )
659
+ ticks.add_feature(
660
+ Feature(
661
+ value_type=mlrun.data_types.ValueType.STRING,
662
+ description="client consistency",
663
+ ),
664
+ "ABC01",
665
+ )
666
+ ticks.add_feature(
667
+ Feature(
668
+ value_type=mlrun.data_types.ValueType.FLOAT,
669
+ description="client volatility",
670
+ ),
671
+ "SAB",
672
+ )
660
673
  ticks.save()
661
674
 
662
675
  :param feature: setting of Feature
@@ -860,15 +873,18 @@ class FeatureSet(ModelObj):
860
873
  example::
861
874
 
862
875
  import mlrun.feature_store as fstore
876
+
863
877
  ...
864
- ticks = fstore.FeatureSet("ticks",
865
- entities=["stock"],
866
- timestamp_key="timestamp")
867
- ticks.add_aggregation(name='priceN',
868
- column='price',
869
- operations=['avg'],
870
- windows=['1d'],
871
- period='1h')
878
+ ticks = fstore.FeatureSet(
879
+ "ticks", entities=["stock"], timestamp_key="timestamp"
880
+ )
881
+ ticks.add_aggregation(
882
+ name="priceN",
883
+ column="price",
884
+ operations=["avg"],
885
+ windows=["1d"],
886
+ period="1h",
887
+ )
872
888
  ticks.plot(rankdir="LR", with_targets=True)
873
889
 
874
890
  :param filename: target filepath for the graph image (None for the notebook)
@@ -1005,7 +1021,7 @@ class FeatureSet(ModelObj):
1005
1021
  df = stocks_set.ingest(stocks, infer_options=fstore.InferOptions.default())
1006
1022
 
1007
1023
  # for running as remote job
1008
- config = RunConfig(image='mlrun/mlrun')
1024
+ config = RunConfig(image="mlrun/mlrun")
1009
1025
  df = ingest(stocks_set, stocks, run_config=config)
1010
1026
 
1011
1027
  # specify source and targets
@@ -486,6 +486,7 @@ class FeatureVector(ModelObj):
486
486
  example::
487
487
 
488
488
  import mlrun.feature_store as fstore
489
+
489
490
  features = ["quotes.bid", "quotes.asks_sum_5h as asks_5h", "stocks.*"]
490
491
  vector = fstore.FeatureVector("my-vec", features)
491
492
 
@@ -852,7 +853,7 @@ class FeatureVector(ModelObj):
852
853
 
853
854
  Example::
854
855
 
855
- svc = vector_uri.get_online_feature_service(entity_keys=['ticker'])
856
+ svc = vector_uri.get_online_feature_service(entity_keys=["ticker"])
856
857
  try:
857
858
  resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
858
859
  print(resp)
@@ -24,6 +24,32 @@ from .base import BaseMerger
24
24
  from .conversion import PandasConversionMixin
25
25
 
26
26
 
27
+ def spark_df_to_pandas(spark_df):
28
+ # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
29
+ # when we upgrade pyspark, we should check whether this workaround is still necessary
30
+ # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
31
+ if semver.parse(pd.__version__)["major"] >= 2:
32
+ import pyspark.sql.functions as pyspark_functions
33
+
34
+ type_conversion_dict = {}
35
+ for field in spark_df.schema.fields:
36
+ if str(field.dataType) == "TimestampType":
37
+ spark_df = spark_df.withColumn(
38
+ field.name,
39
+ pyspark_functions.date_format(
40
+ pyspark_functions.to_timestamp(field.name),
41
+ "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
42
+ ),
43
+ )
44
+ type_conversion_dict[field.name] = "datetime64[ns]"
45
+ df = PandasConversionMixin.toPandas(spark_df)
46
+ if type_conversion_dict:
47
+ df = df.astype(type_conversion_dict)
48
+ return df
49
+ else:
50
+ return PandasConversionMixin.toPandas(spark_df)
51
+
52
+
27
53
  class SparkFeatureMerger(BaseMerger):
28
54
  engine = "spark"
29
55
  support_offline = True
@@ -166,29 +192,7 @@ class SparkFeatureMerger(BaseMerger):
166
192
  def get_df(self, to_pandas=True):
167
193
  if to_pandas:
168
194
  if self._pandas_df is None:
169
- df = self._result_df
170
- # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
171
- # when we upgrade pyspark, we should check whether this workaround is still necessary
172
- # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
173
- if semver.parse(pd.__version__)["major"] >= 2:
174
- import pyspark.sql.functions as pyspark_functions
175
-
176
- type_conversion_dict = {}
177
- for field in df.schema.fields:
178
- if str(field.dataType) == "TimestampType":
179
- df = df.withColumn(
180
- field.name,
181
- pyspark_functions.date_format(
182
- pyspark_functions.to_timestamp(field.name),
183
- "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
184
- ),
185
- )
186
- type_conversion_dict[field.name] = "datetime64[ns]"
187
- df = PandasConversionMixin.toPandas(df)
188
- if type_conversion_dict:
189
- df = df.astype(type_conversion_dict)
190
- else:
191
- df = PandasConversionMixin.toPandas(df)
195
+ df = spark_df_to_pandas(self._result_df)
192
196
  self._pandas_df = df
193
197
  self._set_indexes(self._pandas_df)
194
198
  return self._pandas_df
@@ -162,13 +162,19 @@ class MapValues(StepToDict, MLRunStep):
162
162
  example::
163
163
 
164
164
  # replace the value "U" with '0' in the age column
165
- graph.to(MapValues(mapping={'age': {'U': '0'}}, with_original_features=True))
165
+ graph.to(MapValues(mapping={"age": {"U": "0"}}, with_original_features=True))
166
166
 
167
167
  # replace integers, example
168
- graph.to(MapValues(mapping={'not': {0: 1, 1: 0}}))
168
+ graph.to(MapValues(mapping={"not": {0: 1, 1: 0}}))
169
169
 
170
170
  # replace by range, use -inf and inf for extended range
171
- graph.to(MapValues(mapping={'numbers': {'ranges': {'negative': [-inf, 0], 'positive': [0, inf]}}}))
171
+ graph.to(
172
+ MapValues(
173
+ mapping={
174
+ "numbers": {"ranges": {"negative": [-inf, 0], "positive": [0, inf]}}
175
+ }
176
+ )
177
+ )
172
178
 
173
179
  :param mapping: a dict with entry per column and the associated old/new values map
174
180
  :param with_original_features: set to True to keep the original features
@@ -424,8 +430,10 @@ class OneHotEncoder(StepToDict, MLRunStep):
424
430
 
425
431
  example::
426
432
 
427
- mapping = {'category': ['food', 'health', 'transportation'],
428
- 'gender': ['male', 'female']}
433
+ mapping = {
434
+ "category": ["food", "health", "transportation"],
435
+ "gender": ["male", "female"],
436
+ }
429
437
  graph.to(OneHotEncoder(mapping=one_hot_encoder_mapping))
430
438
 
431
439
  :param mapping: a dict of per column categories (to map to binary fields)
@@ -542,10 +550,12 @@ class DateExtractor(StepToDict, MLRunStep):
542
550
 
543
551
  # (taken from the fraud-detection end-to-end feature store demo)
544
552
  # Define the Transactions FeatureSet
545
- transaction_set = fstore.FeatureSet("transactions",
546
- entities=[fstore.Entity("source")],
547
- timestamp_key='timestamp',
548
- description="transactions feature set")
553
+ transaction_set = fstore.FeatureSet(
554
+ "transactions",
555
+ entities=[fstore.Entity("source")],
556
+ timestamp_key="timestamp",
557
+ description="transactions feature set",
558
+ )
549
559
 
550
560
  # Get FeatureSet computation graph
551
561
  transaction_graph = transaction_set.graph
@@ -553,11 +563,11 @@ class DateExtractor(StepToDict, MLRunStep):
553
563
  # Add the custom `DateExtractor` step
554
564
  # to the computation graph
555
565
  transaction_graph.to(
556
- class_name='DateExtractor',
557
- name='Extract Dates',
558
- parts = ['hour', 'day_of_week'],
559
- timestamp_col = 'timestamp',
560
- )
566
+ class_name="DateExtractor",
567
+ name="Extract Dates",
568
+ parts=["hour", "day_of_week"],
569
+ timestamp_col="timestamp",
570
+ )
561
571
 
562
572
  :param parts: list of pandas style date-time parts you want to extract.
563
573
  :param timestamp_col: The name of the column containing the timestamps to extract from,
@@ -694,11 +704,12 @@ class DropFeatures(StepToDict, MLRunStep):
694
704
 
695
705
  example::
696
706
 
697
- feature_set = fstore.FeatureSet("fs-new",
698
- entities=[fstore.Entity("id")],
699
- description="feature set",
700
- engine="pandas",
701
- )
707
+ feature_set = fstore.FeatureSet(
708
+ "fs-new",
709
+ entities=[fstore.Entity("id")],
710
+ description="feature set",
711
+ engine="pandas",
712
+ )
702
713
  # Pre-processing graph steps
703
714
  feature_set.graph.to(DropFeatures(features=["age"]))
704
715
  df_pandas = feature_set.ingest(data)
mlrun/features.py CHANGED
@@ -238,10 +238,7 @@ class Validator(ModelObj):
238
238
  from mlrun.features import Validator
239
239
 
240
240
  # Add validator to the feature 'bid' with check type
241
- quotes_set["bid"].validator = Validator(
242
- check_type=True,
243
- severity="info"
244
- )
241
+ quotes_set["bid"].validator = Validator(check_type=True, severity="info")
245
242
 
246
243
  :param check_type: check feature type e.g. True, False
247
244
  :param severity: severity name e.g. info, warning, etc.
@@ -280,10 +277,7 @@ class MinMaxValidator(Validator):
280
277
 
281
278
  # Add validator to the feature 'bid', where valid
282
279
  # minimal value is 52
283
- quotes_set["bid"].validator = MinMaxValidator(
284
- min=52,
285
- severity="info"
286
- )
280
+ quotes_set["bid"].validator = MinMaxValidator(min=52, severity="info")
287
281
 
288
282
  :param check_type: check feature type e.g. True, False
289
283
  :param severity: severity name e.g. info, warning, etc.
@@ -344,9 +338,7 @@ class MinMaxLenValidator(Validator):
344
338
  # Add length validator to the feature 'ticker', where valid
345
339
  # minimal length is 1 and maximal length is 10
346
340
  quotes_set["ticker"].validator = MinMaxLenValidator(
347
- min=1,
348
- max=10,
349
- severity="info"
341
+ min=1, max=10, severity="info"
350
342
  )
351
343
 
352
344
  :param check_type: check feature type e.g. True, False
@@ -408,8 +400,7 @@ class RegexValidator(Validator):
408
400
  # expression '(\b[A-Za-z]{1}[0-9]{7}\b)' where valid values are
409
401
  # e.g. A1234567, z9874563, etc.
410
402
  quotes_set["name"].validator = RegexValidator(
411
- regex=r"(\b[A-Za-z]{1}[0-9]{7}\b)",
412
- severity="info"
403
+ regex=r"(\b[A-Za-z]{1}[0-9]{7}\b)", severity="info"
413
404
  )
414
405
 
415
406
  :param check_type: check feature type e.g. True, False
@@ -363,7 +363,7 @@ class AutoMLRun:
363
363
 
364
364
  {
365
365
  "/.../custom_model.py": "MyModel",
366
- "/.../custom_objects.py": ["object1", "object2"]
366
+ "/.../custom_objects.py": ["object1", "object2"],
367
367
  }
368
368
 
369
369
  All the paths will be accessed from the given 'custom_objects_directory',
@@ -464,7 +464,7 @@ class AutoMLRun:
464
464
 
465
465
  {
466
466
  "/.../custom_model.py": "MyModel",
467
- "/.../custom_objects.py": ["object1", "object2"]
467
+ "/.../custom_objects.py": ["object1", "object2"],
468
468
  }
469
469
 
470
470
  All the paths will be accessed from the given 'custom_objects_directory',
@@ -241,7 +241,7 @@ def apply_mlrun(
241
241
 
242
242
  {
243
243
  "/.../custom_model.py": "MyModel",
244
- "/.../custom_objects.py": ["object1", "object2"]
244
+ "/.../custom_objects.py": ["object1", "object2"],
245
245
  }
246
246
 
247
247
  All the paths will be accessed from the given 'custom_objects_directory', meaning
@@ -63,11 +63,9 @@ class Callback(ABC):
63
63
  def on_train_end(self):
64
64
  print("{self.name}: Done training!")
65
65
 
66
+
66
67
  apply_mlrun()
67
- lgb.train(
68
- ...,
69
- callbacks=[ExampleCallback(name="Example")]
70
- )
68
+ lgb.train(..., callbacks=[ExampleCallback(name="Example")])
71
69
  """
72
70
 
73
71
  def __init__(self, order: int = 10, before_iteration: bool = False):
@@ -103,7 +103,7 @@ class LGBMModelHandler(MLModelHandler):
103
103
 
104
104
  {
105
105
  "/.../custom_model.py": "MyModel",
106
- "/.../custom_objects.py": ["object1", "object2"]
106
+ "/.../custom_objects.py": ["object1", "object2"],
107
107
  }
108
108
 
109
109
  All the paths will be accessed from the given 'custom_objects_directory',