datatoolpack 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatoolpack
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Official Python SDK for the AutoData ML data preparation pipeline API
5
5
  Home-page: https://autodata.datatoolpack.com
6
6
  Author: AutoData Team
@@ -500,6 +500,115 @@ with AutoDataClient(api_key="dtpk_...") as client:
500
500
 
501
501
  ---
502
502
 
503
+ ## Quality Alerts
504
+
505
+ Monitor pipeline metrics and get notified when thresholds are breached:
506
+
507
+ ```python
508
+ # Create a quality alert rule
509
+ rule = client.create_quality_alert(
510
+ name="High row loss",
511
+ metric="row_loss_pct", # row_loss_pct, null_pct, column_drop_count, duration_seconds
512
+ operator=">", # >, <, >=, <=, ==
513
+ threshold=10.0,
514
+ severity="critical", # warning or critical
515
+ stage="mdh", # optional: anomaly, dtc, mdh, cds, dsm, dsg
516
+ )
517
+
518
+ # List rules
519
+ rules = client.list_quality_alerts()
520
+
521
+ # Get fired alert events
522
+ events = client.get_alert_events(session_id="optional-filter")
523
+
524
+ # Delete a rule
525
+ client.delete_quality_alert(rule_id=rule["rule"]["id"])
526
+ ```
527
+
528
+ ## Sync Watermarks
529
+
530
+ Track incremental sync progress for connector-based pipelines:
531
+
532
+ ```python
533
+ # List all watermarks
534
+ watermarks = client.list_watermarks()
535
+
536
+ # Reset a watermark (re-sync from beginning)
537
+ client.reset_watermark(watermark_id="wm-123")
538
+ ```
539
+
540
+ ## Scheduled Runs
541
+
542
+ Automate recurring pipeline executions:
543
+
544
+ ```python
545
+ # Create an interval-based schedule (every 24 hours)
546
+ schedule = client.create_scheduled_run(
547
+ name="Daily ETL",
548
+ schedule_type="interval",
549
+ interval_minutes=1440,
550
+ connector_type="snowflake",
551
+ table="orders",
552
+ credential_id="cred-id",
553
+ y_columns=["target"],
554
+ )
555
+
556
+ # Create with cron expression
557
+ schedule = client.create_scheduled_run(
558
+ name="Weekday ETL",
559
+ schedule_type="cron",
560
+ cron_expression="0 8 * * 1-5",
561
+ connector_type="snowflake",
562
+ table="orders",
563
+ credential_id="cred-id",
564
+ y_columns=["target"],
565
+ )
566
+
567
+ # List and delete
568
+ schedules = client.list_scheduled_runs()
569
+ client.delete_scheduled_run(run_id="run-id")
570
+ ```
571
+
572
+ ## Folder Listeners
573
+
574
+ Automatically trigger pipelines when new files appear:
575
+
576
+ ```python
577
+ # Create an S3 folder listener
578
+ listener = client.create_listener(
579
+ name="S3 Ingest",
580
+ source_type="s3", # s3, gcs, azure_blob, local, sftp
581
+ folder_path="s3://bucket/incoming/",
582
+ credential_id="cred-id",
583
+ y_columns=["target"],
584
+ pipeline_config={"enable_dsg": False},
585
+ )
586
+
587
+ # List and delete
588
+ listeners = client.list_listeners()
589
+ client.delete_listener(listener_id="listener-id")
590
+ ```
591
+
592
+ ## Pipeline Retry
593
+
594
+ Retry a failed pipeline from its last checkpoint:
595
+
596
+ ```python
597
+ result = client.retry_session(session_id="failed-session-id")
598
+ print(result) # {'success': True, 'session_id': '...', 'message': 'Retrying from last checkpoint'}
599
+ ```
600
+
601
+ ## Worker Status
602
+
603
+ Check the processing worker fleet status:
604
+
605
+ ```python
606
+ status = client.worker_status()
607
+ print(status) # {'backend': 'local', 'active_jobs': 2, 'queue_size': 0, ...}
608
+ ```
609
+
610
+ ---
611
+
503
612
  ## Requirements
504
613
 
505
614
  - Python >= 3.8
@@ -463,6 +463,115 @@ with AutoDataClient(api_key="dtpk_...") as client:
463
463
 
464
464
  ---
465
465
 
466
+ ## Quality Alerts
467
+
468
+ Monitor pipeline metrics and get notified when thresholds are breached:
469
+
470
+ ```python
471
+ # Create a quality alert rule
472
+ rule = client.create_quality_alert(
473
+ name="High row loss",
474
+ metric="row_loss_pct", # row_loss_pct, null_pct, column_drop_count, duration_seconds
475
+ operator=">", # >, <, >=, <=, ==
476
+ threshold=10.0,
477
+ severity="critical", # warning or critical
478
+ stage="mdh", # optional: anomaly, dtc, mdh, cds, dsm, dsg
479
+ )
480
+
481
+ # List rules
482
+ rules = client.list_quality_alerts()
483
+
484
+ # Get fired alert events
485
+ events = client.get_alert_events(session_id="optional-filter")
486
+
487
+ # Delete a rule
488
+ client.delete_quality_alert(rule_id=rule["rule"]["id"])
489
+ ```
490
+
491
+ ## Sync Watermarks
492
+
493
+ Track incremental sync progress for connector-based pipelines:
494
+
495
+ ```python
496
+ # List all watermarks
497
+ watermarks = client.list_watermarks()
498
+
499
+ # Reset a watermark (re-sync from beginning)
500
+ client.reset_watermark(watermark_id="wm-123")
501
+ ```
502
+
503
+ ## Scheduled Runs
504
+
505
+ Automate recurring pipeline executions:
506
+
507
+ ```python
508
+ # Create an interval-based schedule (every 24 hours)
509
+ schedule = client.create_scheduled_run(
510
+ name="Daily ETL",
511
+ schedule_type="interval",
512
+ interval_minutes=1440,
513
+ connector_type="snowflake",
514
+ table="orders",
515
+ credential_id="cred-id",
516
+ y_columns=["target"],
517
+ )
518
+
519
+ # Create with cron expression
520
+ schedule = client.create_scheduled_run(
521
+ name="Weekday ETL",
522
+ schedule_type="cron",
523
+ cron_expression="0 8 * * 1-5",
524
+ connector_type="snowflake",
525
+ table="orders",
526
+ credential_id="cred-id",
527
+ y_columns=["target"],
528
+ )
529
+
530
+ # List and delete
531
+ schedules = client.list_scheduled_runs()
532
+ client.delete_scheduled_run(run_id="run-id")
533
+ ```
534
+
535
+ ## Folder Listeners
536
+
537
+ Automatically trigger pipelines when new files appear:
538
+
539
+ ```python
540
+ # Create an S3 folder listener
541
+ listener = client.create_listener(
542
+ name="S3 Ingest",
543
+ source_type="s3", # s3, gcs, azure_blob, local, sftp
544
+ folder_path="s3://bucket/incoming/",
545
+ credential_id="cred-id",
546
+ y_columns=["target"],
547
+ pipeline_config={"enable_dsg": False},
548
+ )
549
+
550
+ # List and delete
551
+ listeners = client.list_listeners()
552
+ client.delete_listener(listener_id="listener-id")
553
+ ```
554
+
555
+ ## Pipeline Retry
556
+
557
+ Retry a failed pipeline from its last checkpoint:
558
+
559
+ ```python
560
+ result = client.retry_session(session_id="failed-session-id")
561
+ print(result) # {'success': True, 'session_id': '...', 'message': 'Retrying from last checkpoint'}
562
+ ```
563
+
564
+ ## Worker Status
565
+
566
+ Check the processing worker fleet status:
567
+
568
+ ```python
569
+ status = client.worker_status()
570
+ print(status) # {'backend': 'local', 'active_jobs': 2, 'queue_size': 0, ...}
571
+ ```
572
+
573
+ ---
574
+
466
575
  ## Requirements
467
576
 
468
577
  - Python >= 3.8
@@ -655,7 +655,7 @@ class AutoDataClient:
655
655
  result = r.json()
656
656
  session_id = result.get("session_id")
657
657
 
658
- if not wait:
658
+ if not wait or not session_id:
659
659
  return result
660
660
 
661
661
  try:
@@ -934,6 +934,136 @@ class AutoDataClient:
934
934
  self._raise_for_error(r)
935
935
  return r.json()
936
936
 
937
+ # ------------------------------------------------------------------
938
+ # Quality Alerts
939
+ # ------------------------------------------------------------------
940
+
941
+ def list_quality_alerts(self) -> List[Dict]:
942
+ """List quality alert rules for the authenticated user."""
943
+ r = self._request("GET", self._url("/quality-alerts"))
944
+ self._raise_for_error(r)
945
+ return r.json().get("rules", [])
946
+
947
+ def create_quality_alert(
948
+ self,
949
+ name: str,
950
+ metric: str,
951
+ operator: str,
952
+ threshold: float,
953
+ severity: str = "warning",
954
+ stage: Optional[str] = None,
955
+ ) -> Dict:
956
+ """Create a quality alert rule.
957
+
958
+ Args:
959
+ name: Rule name.
960
+ metric: One of row_loss_pct, null_pct, column_drop_count, duration_seconds.
961
+ operator: One of >, <, >=, <=, ==.
962
+ threshold: Numeric threshold value.
963
+ severity: 'warning' or 'critical'.
964
+ stage: Optional pipeline stage (anomaly, dtc, mdh, cds, dsm, dsg).
965
+ """
966
+ body: Dict = {"name": name, "metric": metric, "operator": operator,
967
+ "threshold": threshold, "severity": severity}
968
+ if stage:
969
+ body["stage"] = stage
970
+ r = self._request("POST", self._url("/quality-alerts"), json=body)
971
+ self._raise_for_error(r)
972
+ return r.json()
973
+
974
+ def delete_quality_alert(self, rule_id: str) -> Dict:
975
+ """Delete a quality alert rule."""
976
+ r = self._request("DELETE", self._url(f"/quality-alerts/{rule_id}"))
977
+ self._raise_for_error(r)
978
+ return r.json()
979
+
980
+ def get_alert_events(self, session_id: Optional[str] = None) -> List[Dict]:
981
+ """List recent quality alert events."""
982
+ params = {}
983
+ if session_id:
984
+ params["session_id"] = session_id
985
+ r = self._request("GET", self._url("/quality-alerts/events"), params=params)
986
+ self._raise_for_error(r)
987
+ return r.json().get("events", [])
988
+
989
+ # ------------------------------------------------------------------
990
+ # Scheduled Runs
991
+ # ------------------------------------------------------------------
992
+
993
+ def list_scheduled_runs(self) -> List[Dict]:
994
+ """List all scheduled runs."""
995
+ r = self._request("GET", f"{self.base_url}/api/scheduled-runs")
996
+ self._raise_for_error(r)
997
+ return r.json().get("scheduled_runs", [])
998
+
999
+ def create_scheduled_run(self, **kwargs) -> Dict:
1000
+ """Create a scheduled run.
1001
+
1002
+ Kwargs:
1003
+ name, schedule_type ('interval' or 'cron'), interval_minutes,
1004
+ cron_expression, connector_type, table, credential_id, y_columns, etc.
1005
+ """
1006
+ r = self._request("POST", f"{self.base_url}/api/scheduled-runs", json=kwargs)
1007
+ self._raise_for_error(r)
1008
+ return r.json()
1009
+
1010
+ def delete_scheduled_run(self, run_id: str) -> Dict:
1011
+ """Delete a scheduled run."""
1012
+ r = self._request("DELETE", self._api(f"/scheduled-runs/{run_id}"))
1013
+ self._raise_for_error(r)
1014
+ return r.json()
1015
+
1016
+ # ------------------------------------------------------------------
1017
+ # Folder Listeners
1018
+ # ------------------------------------------------------------------
1019
+
1020
+ def list_listeners(self) -> List[Dict]:
1021
+ """List folder listeners."""
1022
+ r = self._request("GET", self._url("/listeners/folder"))
1023
+ self._raise_for_error(r)
1024
+ return r.json().get("listeners", [])
1025
+
1026
+ def create_listener(self, **kwargs) -> Dict:
1027
+ """Create a folder listener.
1028
+
1029
+ Kwargs:
1030
+ name, source_type ('s3', 'gcs', 'azure_blob', 'local', 'sftp'),
1031
+ watch_path, credential_id, y_columns, pipeline_config, etc.
1032
+ """
1033
+ r = self._request("POST", self._url("/listeners/folder"), json=kwargs)
1034
+ self._raise_for_error(r)
1035
+ return r.json()
1036
+
1037
+ def delete_listener(self, listener_id: str) -> Dict:
1038
+ """Delete a folder listener."""
1039
+ r = self._request("DELETE", self._url(f"/listeners/folder/{listener_id}"))
1040
+ self._raise_for_error(r)
1041
+ return r.json()
1042
+
1043
+ # ------------------------------------------------------------------
1044
+ # Pipeline Retry
1045
+ # ------------------------------------------------------------------
1046
+
1047
+ def retry_session(self, session_id: str) -> Dict:
1048
+ """Retry a failed pipeline session from its last checkpoint.
1049
+
1050
+ Args:
1051
+ session_id: ID of the failed session to retry.
1052
+ """
1053
+ r = self._request("POST", self._url(f"/sessions/{session_id}/retry"))
1054
+ self._raise_for_error(r)
1055
+ return r.json()
1056
+
1057
+ # ------------------------------------------------------------------
1058
+ # Worker Status
1059
+ # ------------------------------------------------------------------
1060
+
1061
+ def worker_status(self) -> Dict:
1062
+ """Get the worker fleet status (queue size, active workers, etc.)."""
1063
+ r = self._request("GET", self._url("/workers/status"))
1064
+ self._raise_for_error(r)
1065
+ return r.json()
1066
+
937
1067
  # ------------------------------------------------------------------
938
1068
  # Backward-compatibility alias
939
1069
  # ------------------------------------------------------------------
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatoolpack
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Official Python SDK for the AutoData ML data preparation pipeline API
5
5
  Home-page: https://autodata.datatoolpack.com
6
6
  Author: AutoData Team
@@ -500,6 +500,115 @@ with AutoDataClient(api_key="dtpk_...") as client:
500
500
 
501
501
  ---
502
502
 
503
+ ## Quality Alerts
504
+
505
+ Monitor pipeline metrics and get notified when thresholds are breached:
506
+
507
+ ```python
508
+ # Create a quality alert rule
509
+ rule = client.create_quality_alert(
510
+ name="High row loss",
511
+ metric="row_loss_pct", # row_loss_pct, null_pct, column_drop_count, duration_seconds
512
+ operator=">", # >, <, >=, <=, ==
513
+ threshold=10.0,
514
+ severity="critical", # warning or critical
515
+ stage="mdh", # optional: anomaly, dtc, mdh, cds, dsm, dsg
516
+ )
517
+
518
+ # List rules
519
+ rules = client.list_quality_alerts()
520
+
521
+ # Get fired alert events
522
+ events = client.get_alert_events(session_id="optional-filter")
523
+
524
+ # Delete a rule
525
+ client.delete_quality_alert(rule_id=rule["rule"]["id"])
526
+ ```
527
+
528
+ ## Sync Watermarks
529
+
530
+ Track incremental sync progress for connector-based pipelines:
531
+
532
+ ```python
533
+ # List all watermarks
534
+ watermarks = client.list_watermarks()
535
+
536
+ # Reset a watermark (re-sync from beginning)
537
+ client.reset_watermark(watermark_id="wm-123")
538
+ ```
539
+
540
+ ## Scheduled Runs
541
+
542
+ Automate recurring pipeline executions:
543
+
544
+ ```python
545
+ # Create an interval-based schedule (every 24 hours)
546
+ schedule = client.create_scheduled_run(
547
+ name="Daily ETL",
548
+ schedule_type="interval",
549
+ interval_minutes=1440,
550
+ connector_type="snowflake",
551
+ table="orders",
552
+ credential_id="cred-id",
553
+ y_columns=["target"],
554
+ )
555
+
556
+ # Create with cron expression
557
+ schedule = client.create_scheduled_run(
558
+ name="Weekday ETL",
559
+ schedule_type="cron",
560
+ cron_expression="0 8 * * 1-5",
561
+ connector_type="snowflake",
562
+ table="orders",
563
+ credential_id="cred-id",
564
+ y_columns=["target"],
565
+ )
566
+
567
+ # List and delete
568
+ schedules = client.list_scheduled_runs()
569
+ client.delete_scheduled_run(run_id="run-id")
570
+ ```
571
+
572
+ ## Folder Listeners
573
+
574
+ Automatically trigger pipelines when new files appear:
575
+
576
+ ```python
577
+ # Create an S3 folder listener
578
+ listener = client.create_listener(
579
+ name="S3 Ingest",
580
+ source_type="s3", # s3, gcs, azure_blob, local, sftp
581
+ folder_path="s3://bucket/incoming/",
582
+ credential_id="cred-id",
583
+ y_columns=["target"],
584
+ pipeline_config={"enable_dsg": False},
585
+ )
586
+
587
+ # List and delete
588
+ listeners = client.list_listeners()
589
+ client.delete_listener(listener_id="listener-id")
590
+ ```
591
+
592
+ ## Pipeline Retry
593
+
594
+ Retry a failed pipeline from its last checkpoint:
595
+
596
+ ```python
597
+ result = client.retry_session(session_id="failed-session-id")
598
+ print(result) # {'success': True, 'session_id': '...', 'message': 'Retrying from last checkpoint'}
599
+ ```
600
+
601
+ ## Worker Status
602
+
603
+ Check the processing worker fleet status:
604
+
605
+ ```python
606
+ status = client.worker_status()
607
+ print(status) # {'backend': 'local', 'active_jobs': 2, 'queue_size': 0, ...}
608
+ ```
609
+
610
+ ---
611
+
503
612
  ## Requirements
504
613
 
505
614
  - Python >= 3.8
@@ -7,7 +7,7 @@ with open(os.path.join(here, "README.md"), encoding="utf-8") as f:
7
7
 
8
8
  setup(
9
9
  name="datatoolpack",
10
- version="0.3.0",
10
+ version="0.4.0",
11
11
  description="Official Python SDK for the AutoData ML data preparation pipeline API",
12
12
  long_description=long_description,
13
13
  long_description_content_type="text/markdown",
File without changes