datatoolpack 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/PKG-INFO +110 -1
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/README.md +109 -0
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/autodata/client.py +131 -1
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/datatoolpack.egg-info/PKG-INFO +110 -1
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/setup.py +1 -1
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/autodata/__init__.py +0 -0
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/datatoolpack.egg-info/SOURCES.txt +0 -0
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/datatoolpack.egg-info/dependency_links.txt +0 -0
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/datatoolpack.egg-info/requires.txt +0 -0
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/datatoolpack.egg-info/top_level.txt +0 -0
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/pyproject.toml +0 -0
- {datatoolpack-0.3.0 → datatoolpack-0.4.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datatoolpack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Official Python SDK for the AutoData ML data preparation pipeline API
|
|
5
5
|
Home-page: https://autodata.datatoolpack.com
|
|
6
6
|
Author: AutoData Team
|
|
@@ -500,6 +500,115 @@ with AutoDataClient(api_key="dtpk_...") as client:
|
|
|
500
500
|
|
|
501
501
|
---
|
|
502
502
|
|
|
503
|
+
## Quality Alerts
|
|
504
|
+
|
|
505
|
+
Monitor pipeline metrics and get notified when thresholds are breached:
|
|
506
|
+
|
|
507
|
+
```python
|
|
508
|
+
# Create a quality alert rule
|
|
509
|
+
rule = client.create_quality_alert(
|
|
510
|
+
name="High row loss",
|
|
511
|
+
metric="row_loss_pct", # row_loss_pct, null_pct, column_drop_count, duration_seconds
|
|
512
|
+
operator=">", # >, <, >=, <=, ==
|
|
513
|
+
threshold=10.0,
|
|
514
|
+
severity="critical", # warning or critical
|
|
515
|
+
stage="mdh", # optional: anomaly, dtc, mdh, cds, dsm, dsg
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
# List rules
|
|
519
|
+
rules = client.list_quality_alerts()
|
|
520
|
+
|
|
521
|
+
# Get fired alert events
|
|
522
|
+
events = client.get_alert_events(session_id="optional-filter")
|
|
523
|
+
|
|
524
|
+
# Delete a rule
|
|
525
|
+
client.delete_quality_alert(rule_id=rule["rule"]["id"])
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
## Sync Watermarks
|
|
529
|
+
|
|
530
|
+
Track incremental sync progress for connector-based pipelines:
|
|
531
|
+
|
|
532
|
+
```python
|
|
533
|
+
# List all watermarks
|
|
534
|
+
watermarks = client.list_watermarks()
|
|
535
|
+
|
|
536
|
+
# Reset a watermark (re-sync from beginning)
|
|
537
|
+
client.reset_watermark(watermark_id="wm-123")
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
## Scheduled Runs
|
|
541
|
+
|
|
542
|
+
Automate recurring pipeline executions:
|
|
543
|
+
|
|
544
|
+
```python
|
|
545
|
+
# Create an interval-based schedule (every 24 hours)
|
|
546
|
+
schedule = client.create_scheduled_run(
|
|
547
|
+
name="Daily ETL",
|
|
548
|
+
schedule_type="interval",
|
|
549
|
+
interval_minutes=1440,
|
|
550
|
+
connector_type="snowflake",
|
|
551
|
+
table="orders",
|
|
552
|
+
credential_id="cred-id",
|
|
553
|
+
y_columns=["target"],
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
# Create with cron expression
|
|
557
|
+
schedule = client.create_scheduled_run(
|
|
558
|
+
name="Weekday ETL",
|
|
559
|
+
schedule_type="cron",
|
|
560
|
+
cron_expression="0 8 * * 1-5",
|
|
561
|
+
connector_type="snowflake",
|
|
562
|
+
table="orders",
|
|
563
|
+
credential_id="cred-id",
|
|
564
|
+
y_columns=["target"],
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
# List and delete
|
|
568
|
+
schedules = client.list_scheduled_runs()
|
|
569
|
+
client.delete_scheduled_run(run_id="run-id")
|
|
570
|
+
```
|
|
571
|
+
|
|
572
|
+
## Folder Listeners
|
|
573
|
+
|
|
574
|
+
Automatically trigger pipelines when new files appear:
|
|
575
|
+
|
|
576
|
+
```python
|
|
577
|
+
# Create an S3 folder listener
|
|
578
|
+
listener = client.create_listener(
|
|
579
|
+
name="S3 Ingest",
|
|
580
|
+
source_type="s3", # s3, gcs, azure_blob, local, sftp
|
|
581
|
+
folder_path="s3://bucket/incoming/",
|
|
582
|
+
credential_id="cred-id",
|
|
583
|
+
y_columns=["target"],
|
|
584
|
+
pipeline_config={"enable_dsg": False},
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
# List and delete
|
|
588
|
+
listeners = client.list_listeners()
|
|
589
|
+
client.delete_listener(listener_id="listener-id")
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
## Pipeline Retry
|
|
593
|
+
|
|
594
|
+
Retry a failed pipeline from its last checkpoint:
|
|
595
|
+
|
|
596
|
+
```python
|
|
597
|
+
result = client.retry_session(session_id="failed-session-id")
|
|
598
|
+
print(result) # {'success': True, 'session_id': '...', 'message': 'Retrying from last checkpoint'}
|
|
599
|
+
```
|
|
600
|
+
|
|
601
|
+
## Worker Status
|
|
602
|
+
|
|
603
|
+
Check the processing worker fleet status:
|
|
604
|
+
|
|
605
|
+
```python
|
|
606
|
+
status = client.worker_status()
|
|
607
|
+
print(status) # {'backend': 'local', 'active_jobs': 2, 'queue_size': 0, ...}
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
---
|
|
611
|
+
|
|
503
612
|
## Requirements
|
|
504
613
|
|
|
505
614
|
- Python >= 3.8
|
|
@@ -463,6 +463,115 @@ with AutoDataClient(api_key="dtpk_...") as client:
|
|
|
463
463
|
|
|
464
464
|
---
|
|
465
465
|
|
|
466
|
+
## Quality Alerts
|
|
467
|
+
|
|
468
|
+
Monitor pipeline metrics and get notified when thresholds are breached:
|
|
469
|
+
|
|
470
|
+
```python
|
|
471
|
+
# Create a quality alert rule
|
|
472
|
+
rule = client.create_quality_alert(
|
|
473
|
+
name="High row loss",
|
|
474
|
+
metric="row_loss_pct", # row_loss_pct, null_pct, column_drop_count, duration_seconds
|
|
475
|
+
operator=">", # >, <, >=, <=, ==
|
|
476
|
+
threshold=10.0,
|
|
477
|
+
severity="critical", # warning or critical
|
|
478
|
+
stage="mdh", # optional: anomaly, dtc, mdh, cds, dsm, dsg
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
# List rules
|
|
482
|
+
rules = client.list_quality_alerts()
|
|
483
|
+
|
|
484
|
+
# Get fired alert events
|
|
485
|
+
events = client.get_alert_events(session_id="optional-filter")
|
|
486
|
+
|
|
487
|
+
# Delete a rule
|
|
488
|
+
client.delete_quality_alert(rule_id=rule["rule"]["id"])
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
## Sync Watermarks
|
|
492
|
+
|
|
493
|
+
Track incremental sync progress for connector-based pipelines:
|
|
494
|
+
|
|
495
|
+
```python
|
|
496
|
+
# List all watermarks
|
|
497
|
+
watermarks = client.list_watermarks()
|
|
498
|
+
|
|
499
|
+
# Reset a watermark (re-sync from beginning)
|
|
500
|
+
client.reset_watermark(watermark_id="wm-123")
|
|
501
|
+
```
|
|
502
|
+
|
|
503
|
+
## Scheduled Runs
|
|
504
|
+
|
|
505
|
+
Automate recurring pipeline executions:
|
|
506
|
+
|
|
507
|
+
```python
|
|
508
|
+
# Create an interval-based schedule (every 24 hours)
|
|
509
|
+
schedule = client.create_scheduled_run(
|
|
510
|
+
name="Daily ETL",
|
|
511
|
+
schedule_type="interval",
|
|
512
|
+
interval_minutes=1440,
|
|
513
|
+
connector_type="snowflake",
|
|
514
|
+
table="orders",
|
|
515
|
+
credential_id="cred-id",
|
|
516
|
+
y_columns=["target"],
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# Create with cron expression
|
|
520
|
+
schedule = client.create_scheduled_run(
|
|
521
|
+
name="Weekday ETL",
|
|
522
|
+
schedule_type="cron",
|
|
523
|
+
cron_expression="0 8 * * 1-5",
|
|
524
|
+
connector_type="snowflake",
|
|
525
|
+
table="orders",
|
|
526
|
+
credential_id="cred-id",
|
|
527
|
+
y_columns=["target"],
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
# List and delete
|
|
531
|
+
schedules = client.list_scheduled_runs()
|
|
532
|
+
client.delete_scheduled_run(run_id="run-id")
|
|
533
|
+
```
|
|
534
|
+
|
|
535
|
+
## Folder Listeners
|
|
536
|
+
|
|
537
|
+
Automatically trigger pipelines when new files appear:
|
|
538
|
+
|
|
539
|
+
```python
|
|
540
|
+
# Create an S3 folder listener
|
|
541
|
+
listener = client.create_listener(
|
|
542
|
+
name="S3 Ingest",
|
|
543
|
+
source_type="s3", # s3, gcs, azure_blob, local, sftp
|
|
544
|
+
folder_path="s3://bucket/incoming/",
|
|
545
|
+
credential_id="cred-id",
|
|
546
|
+
y_columns=["target"],
|
|
547
|
+
pipeline_config={"enable_dsg": False},
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
# List and delete
|
|
551
|
+
listeners = client.list_listeners()
|
|
552
|
+
client.delete_listener(listener_id="listener-id")
|
|
553
|
+
```
|
|
554
|
+
|
|
555
|
+
## Pipeline Retry
|
|
556
|
+
|
|
557
|
+
Retry a failed pipeline from its last checkpoint:
|
|
558
|
+
|
|
559
|
+
```python
|
|
560
|
+
result = client.retry_session(session_id="failed-session-id")
|
|
561
|
+
print(result) # {'success': True, 'session_id': '...', 'message': 'Retrying from last checkpoint'}
|
|
562
|
+
```
|
|
563
|
+
|
|
564
|
+
## Worker Status
|
|
565
|
+
|
|
566
|
+
Check the processing worker fleet status:
|
|
567
|
+
|
|
568
|
+
```python
|
|
569
|
+
status = client.worker_status()
|
|
570
|
+
print(status) # {'backend': 'local', 'active_jobs': 2, 'queue_size': 0, ...}
|
|
571
|
+
```
|
|
572
|
+
|
|
573
|
+
---
|
|
574
|
+
|
|
466
575
|
## Requirements
|
|
467
576
|
|
|
468
577
|
- Python >= 3.8
|
|
@@ -655,7 +655,7 @@ class AutoDataClient:
|
|
|
655
655
|
result = r.json()
|
|
656
656
|
session_id = result.get("session_id")
|
|
657
657
|
|
|
658
|
-
if not wait:
|
|
658
|
+
if not wait or not session_id:
|
|
659
659
|
return result
|
|
660
660
|
|
|
661
661
|
try:
|
|
@@ -934,6 +934,136 @@ class AutoDataClient:
|
|
|
934
934
|
self._raise_for_error(r)
|
|
935
935
|
return r.json()
|
|
936
936
|
|
|
937
|
+
# ------------------------------------------------------------------
|
|
938
|
+
# Quality Alerts
|
|
939
|
+
# ------------------------------------------------------------------
|
|
940
|
+
|
|
941
|
+
def list_quality_alerts(self) -> List[Dict]:
|
|
942
|
+
"""List quality alert rules for the authenticated user."""
|
|
943
|
+
r = self._request("GET", self._url("/quality-alerts"))
|
|
944
|
+
self._raise_for_error(r)
|
|
945
|
+
return r.json().get("rules", [])
|
|
946
|
+
|
|
947
|
+
def create_quality_alert(
|
|
948
|
+
self,
|
|
949
|
+
name: str,
|
|
950
|
+
metric: str,
|
|
951
|
+
operator: str,
|
|
952
|
+
threshold: float,
|
|
953
|
+
severity: str = "warning",
|
|
954
|
+
stage: Optional[str] = None,
|
|
955
|
+
) -> Dict:
|
|
956
|
+
"""Create a quality alert rule.
|
|
957
|
+
|
|
958
|
+
Args:
|
|
959
|
+
name: Rule name.
|
|
960
|
+
metric: One of row_loss_pct, null_pct, column_drop_count, duration_seconds.
|
|
961
|
+
operator: One of >, <, >=, <=, ==.
|
|
962
|
+
threshold: Numeric threshold value.
|
|
963
|
+
severity: 'warning' or 'critical'.
|
|
964
|
+
stage: Optional pipeline stage (anomaly, dtc, mdh, cds, dsm, dsg).
|
|
965
|
+
"""
|
|
966
|
+
body: Dict = {"name": name, "metric": metric, "operator": operator,
|
|
967
|
+
"threshold": threshold, "severity": severity}
|
|
968
|
+
if stage:
|
|
969
|
+
body["stage"] = stage
|
|
970
|
+
r = self._request("POST", self._url("/quality-alerts"), json=body)
|
|
971
|
+
self._raise_for_error(r)
|
|
972
|
+
return r.json()
|
|
973
|
+
|
|
974
|
+
def delete_quality_alert(self, rule_id: str) -> Dict:
|
|
975
|
+
"""Delete a quality alert rule."""
|
|
976
|
+
r = self._request("DELETE", self._url(f"/quality-alerts/{rule_id}"))
|
|
977
|
+
self._raise_for_error(r)
|
|
978
|
+
return r.json()
|
|
979
|
+
|
|
980
|
+
def get_alert_events(self, session_id: Optional[str] = None) -> List[Dict]:
|
|
981
|
+
"""List recent quality alert events."""
|
|
982
|
+
params = {}
|
|
983
|
+
if session_id:
|
|
984
|
+
params["session_id"] = session_id
|
|
985
|
+
r = self._request("GET", self._url("/quality-alerts/events"), params=params)
|
|
986
|
+
self._raise_for_error(r)
|
|
987
|
+
return r.json().get("events", [])
|
|
988
|
+
|
|
989
|
+
# ------------------------------------------------------------------
|
|
990
|
+
# Scheduled Runs
|
|
991
|
+
# ------------------------------------------------------------------
|
|
992
|
+
|
|
993
|
+
def list_scheduled_runs(self) -> List[Dict]:
|
|
994
|
+
"""List all scheduled runs."""
|
|
995
|
+
r = self._request("GET", f"{self.base_url}/api/scheduled-runs")
|
|
996
|
+
self._raise_for_error(r)
|
|
997
|
+
return r.json().get("scheduled_runs", [])
|
|
998
|
+
|
|
999
|
+
def create_scheduled_run(self, **kwargs) -> Dict:
|
|
1000
|
+
"""Create a scheduled run.
|
|
1001
|
+
|
|
1002
|
+
Kwargs:
|
|
1003
|
+
name, schedule_type ('interval' or 'cron'), interval_minutes,
|
|
1004
|
+
cron_expression, connector_type, table, credential_id, y_columns, etc.
|
|
1005
|
+
"""
|
|
1006
|
+
r = self._request("POST", f"{self.base_url}/api/scheduled-runs", json=kwargs)
|
|
1007
|
+
self._raise_for_error(r)
|
|
1008
|
+
return r.json()
|
|
1009
|
+
|
|
1010
|
+
def delete_scheduled_run(self, run_id: str) -> Dict:
|
|
1011
|
+
"""Delete a scheduled run."""
|
|
1012
|
+
r = self._request("DELETE", self._api(f"/scheduled-runs/{run_id}"))
|
|
1013
|
+
self._raise_for_error(r)
|
|
1014
|
+
return r.json()
|
|
1015
|
+
|
|
1016
|
+
# ------------------------------------------------------------------
|
|
1017
|
+
# Folder Listeners
|
|
1018
|
+
# ------------------------------------------------------------------
|
|
1019
|
+
|
|
1020
|
+
def list_listeners(self) -> List[Dict]:
|
|
1021
|
+
"""List folder listeners."""
|
|
1022
|
+
r = self._request("GET", self._url("/listeners/folder"))
|
|
1023
|
+
self._raise_for_error(r)
|
|
1024
|
+
return r.json().get("listeners", [])
|
|
1025
|
+
|
|
1026
|
+
def create_listener(self, **kwargs) -> Dict:
|
|
1027
|
+
"""Create a folder listener.
|
|
1028
|
+
|
|
1029
|
+
Kwargs:
|
|
1030
|
+
name, source_type ('s3', 'gcs', 'azure_blob', 'local', 'sftp'),
|
|
1031
|
+
watch_path, credential_id, y_columns, pipeline_config, etc.
|
|
1032
|
+
"""
|
|
1033
|
+
r = self._request("POST", self._url("/listeners/folder"), json=kwargs)
|
|
1034
|
+
self._raise_for_error(r)
|
|
1035
|
+
return r.json()
|
|
1036
|
+
|
|
1037
|
+
def delete_listener(self, listener_id: str) -> Dict:
|
|
1038
|
+
"""Delete a folder listener."""
|
|
1039
|
+
r = self._request("DELETE", self._url(f"/listeners/folder/{listener_id}"))
|
|
1040
|
+
self._raise_for_error(r)
|
|
1041
|
+
return r.json()
|
|
1042
|
+
|
|
1043
|
+
# ------------------------------------------------------------------
|
|
1044
|
+
# Pipeline Retry
|
|
1045
|
+
# ------------------------------------------------------------------
|
|
1046
|
+
|
|
1047
|
+
def retry_session(self, session_id: str) -> Dict:
|
|
1048
|
+
"""Retry a failed pipeline session from its last checkpoint.
|
|
1049
|
+
|
|
1050
|
+
Args:
|
|
1051
|
+
session_id: ID of the failed session to retry.
|
|
1052
|
+
"""
|
|
1053
|
+
r = self._request("POST", self._url(f"/sessions/{session_id}/retry"))
|
|
1054
|
+
self._raise_for_error(r)
|
|
1055
|
+
return r.json()
|
|
1056
|
+
|
|
1057
|
+
# ------------------------------------------------------------------
|
|
1058
|
+
# Worker Status
|
|
1059
|
+
# ------------------------------------------------------------------
|
|
1060
|
+
|
|
1061
|
+
def worker_status(self) -> Dict:
|
|
1062
|
+
"""Get the worker fleet status (queue size, active workers, etc.)."""
|
|
1063
|
+
r = self._request("GET", self._url("/workers/status"))
|
|
1064
|
+
self._raise_for_error(r)
|
|
1065
|
+
return r.json()
|
|
1066
|
+
|
|
937
1067
|
# ------------------------------------------------------------------
|
|
938
1068
|
# Backward-compatibility alias
|
|
939
1069
|
# ------------------------------------------------------------------
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datatoolpack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Official Python SDK for the AutoData ML data preparation pipeline API
|
|
5
5
|
Home-page: https://autodata.datatoolpack.com
|
|
6
6
|
Author: AutoData Team
|
|
@@ -500,6 +500,115 @@ with AutoDataClient(api_key="dtpk_...") as client:
|
|
|
500
500
|
|
|
501
501
|
---
|
|
502
502
|
|
|
503
|
+
## Quality Alerts
|
|
504
|
+
|
|
505
|
+
Monitor pipeline metrics and get notified when thresholds are breached:
|
|
506
|
+
|
|
507
|
+
```python
|
|
508
|
+
# Create a quality alert rule
|
|
509
|
+
rule = client.create_quality_alert(
|
|
510
|
+
name="High row loss",
|
|
511
|
+
metric="row_loss_pct", # row_loss_pct, null_pct, column_drop_count, duration_seconds
|
|
512
|
+
operator=">", # >, <, >=, <=, ==
|
|
513
|
+
threshold=10.0,
|
|
514
|
+
severity="critical", # warning or critical
|
|
515
|
+
stage="mdh", # optional: anomaly, dtc, mdh, cds, dsm, dsg
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
# List rules
|
|
519
|
+
rules = client.list_quality_alerts()
|
|
520
|
+
|
|
521
|
+
# Get fired alert events
|
|
522
|
+
events = client.get_alert_events(session_id="optional-filter")
|
|
523
|
+
|
|
524
|
+
# Delete a rule
|
|
525
|
+
client.delete_quality_alert(rule_id=rule["rule"]["id"])
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
## Sync Watermarks
|
|
529
|
+
|
|
530
|
+
Track incremental sync progress for connector-based pipelines:
|
|
531
|
+
|
|
532
|
+
```python
|
|
533
|
+
# List all watermarks
|
|
534
|
+
watermarks = client.list_watermarks()
|
|
535
|
+
|
|
536
|
+
# Reset a watermark (re-sync from beginning)
|
|
537
|
+
client.reset_watermark(watermark_id="wm-123")
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
## Scheduled Runs
|
|
541
|
+
|
|
542
|
+
Automate recurring pipeline executions:
|
|
543
|
+
|
|
544
|
+
```python
|
|
545
|
+
# Create an interval-based schedule (every 24 hours)
|
|
546
|
+
schedule = client.create_scheduled_run(
|
|
547
|
+
name="Daily ETL",
|
|
548
|
+
schedule_type="interval",
|
|
549
|
+
interval_minutes=1440,
|
|
550
|
+
connector_type="snowflake",
|
|
551
|
+
table="orders",
|
|
552
|
+
credential_id="cred-id",
|
|
553
|
+
y_columns=["target"],
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
# Create with cron expression
|
|
557
|
+
schedule = client.create_scheduled_run(
|
|
558
|
+
name="Weekday ETL",
|
|
559
|
+
schedule_type="cron",
|
|
560
|
+
cron_expression="0 8 * * 1-5",
|
|
561
|
+
connector_type="snowflake",
|
|
562
|
+
table="orders",
|
|
563
|
+
credential_id="cred-id",
|
|
564
|
+
y_columns=["target"],
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
# List and delete
|
|
568
|
+
schedules = client.list_scheduled_runs()
|
|
569
|
+
client.delete_scheduled_run(run_id="run-id")
|
|
570
|
+
```
|
|
571
|
+
|
|
572
|
+
## Folder Listeners
|
|
573
|
+
|
|
574
|
+
Automatically trigger pipelines when new files appear:
|
|
575
|
+
|
|
576
|
+
```python
|
|
577
|
+
# Create an S3 folder listener
|
|
578
|
+
listener = client.create_listener(
|
|
579
|
+
name="S3 Ingest",
|
|
580
|
+
source_type="s3", # s3, gcs, azure_blob, local, sftp
|
|
581
|
+
folder_path="s3://bucket/incoming/",
|
|
582
|
+
credential_id="cred-id",
|
|
583
|
+
y_columns=["target"],
|
|
584
|
+
pipeline_config={"enable_dsg": False},
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
# List and delete
|
|
588
|
+
listeners = client.list_listeners()
|
|
589
|
+
client.delete_listener(listener_id="listener-id")
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
## Pipeline Retry
|
|
593
|
+
|
|
594
|
+
Retry a failed pipeline from its last checkpoint:
|
|
595
|
+
|
|
596
|
+
```python
|
|
597
|
+
result = client.retry_session(session_id="failed-session-id")
|
|
598
|
+
print(result) # {'success': True, 'session_id': '...', 'message': 'Retrying from last checkpoint'}
|
|
599
|
+
```
|
|
600
|
+
|
|
601
|
+
## Worker Status
|
|
602
|
+
|
|
603
|
+
Check the processing worker fleet status:
|
|
604
|
+
|
|
605
|
+
```python
|
|
606
|
+
status = client.worker_status()
|
|
607
|
+
print(status) # {'backend': 'local', 'active_jobs': 2, 'queue_size': 0, ...}
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
---
|
|
611
|
+
|
|
503
612
|
## Requirements
|
|
504
613
|
|
|
505
614
|
- Python >= 3.8
|
|
@@ -7,7 +7,7 @@ with open(os.path.join(here, "README.md"), encoding="utf-8") as f:
|
|
|
7
7
|
|
|
8
8
|
setup(
|
|
9
9
|
name="datatoolpack",
|
|
10
|
-
version="0.
|
|
10
|
+
version="0.4.0",
|
|
11
11
|
description="Official Python SDK for the AutoData ML data preparation pipeline API",
|
|
12
12
|
long_description=long_description,
|
|
13
13
|
long_description_content_type="text/markdown",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|