duckguard 2.2.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckguard/__init__.py +1 -1
- duckguard/anomaly/__init__.py +28 -0
- duckguard/anomaly/baselines.py +294 -0
- duckguard/anomaly/methods.py +16 -2
- duckguard/anomaly/ml_methods.py +724 -0
- duckguard/checks/__init__.py +26 -0
- duckguard/checks/conditional.py +796 -0
- duckguard/checks/distributional.py +524 -0
- duckguard/checks/multicolumn.py +726 -0
- duckguard/checks/query_based.py +643 -0
- duckguard/cli/main.py +257 -2
- duckguard/connectors/factory.py +30 -2
- duckguard/connectors/files.py +7 -3
- duckguard/core/column.py +851 -1
- duckguard/core/dataset.py +1035 -0
- duckguard/core/result.py +236 -0
- duckguard/freshness/__init__.py +33 -0
- duckguard/freshness/monitor.py +429 -0
- duckguard/history/schema.py +119 -1
- duckguard/notifications/__init__.py +20 -2
- duckguard/notifications/email.py +508 -0
- duckguard/profiler/distribution_analyzer.py +384 -0
- duckguard/profiler/outlier_detector.py +497 -0
- duckguard/profiler/pattern_matcher.py +301 -0
- duckguard/profiler/quality_scorer.py +445 -0
- duckguard/reports/html_reporter.py +1 -2
- duckguard/rules/executor.py +642 -0
- duckguard/rules/generator.py +4 -1
- duckguard/rules/schema.py +54 -0
- duckguard/schema_history/__init__.py +40 -0
- duckguard/schema_history/analyzer.py +414 -0
- duckguard/schema_history/tracker.py +288 -0
- duckguard/semantic/detector.py +17 -1
- duckguard-3.0.0.dist-info/METADATA +1072 -0
- {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/RECORD +38 -21
- duckguard-2.2.0.dist-info/METADATA +0 -351
- {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/WHEEL +0 -0
- {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/entry_points.txt +0 -0
- {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/licenses/LICENSE +0 -0
duckguard/cli/main.py
CHANGED
|
@@ -338,17 +338,28 @@ def contract(
|
|
|
338
338
|
def anomaly(
|
|
339
339
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
340
340
|
table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
|
|
341
|
-
method: str = typer.Option("zscore", "--method", "-m", help="
|
|
341
|
+
method: str = typer.Option("zscore", "--method", "-m", help="Method: zscore, iqr, percent_change, baseline, ks_test"),
|
|
342
342
|
threshold: float | None = typer.Option(None, "--threshold", help="Detection threshold"),
|
|
343
343
|
columns: list[str] | None = typer.Option(None, "--column", "-c", help="Specific columns to check"),
|
|
344
|
+
learn_baseline: bool = typer.Option(False, "--learn-baseline", "-L", help="Learn and store baseline from current data"),
|
|
344
345
|
) -> None:
|
|
345
346
|
"""
|
|
346
347
|
Detect anomalies in data.
|
|
347
348
|
|
|
349
|
+
[bold]Methods:[/bold]
|
|
350
|
+
zscore - Z-score based detection (default)
|
|
351
|
+
iqr - Interquartile range detection
|
|
352
|
+
percent_change - Percent change from baseline
|
|
353
|
+
baseline - Compare to learned baseline (ML)
|
|
354
|
+
ks_test - Distribution drift detection (ML)
|
|
355
|
+
|
|
348
356
|
[bold]Examples:[/bold]
|
|
349
357
|
duckguard anomaly data.csv
|
|
350
358
|
duckguard anomaly data.csv --method iqr --threshold 2.0
|
|
351
359
|
duckguard anomaly data.csv --column amount --column quantity
|
|
360
|
+
duckguard anomaly data.csv --learn-baseline # Store baseline
|
|
361
|
+
duckguard anomaly data.csv --method baseline # Compare to baseline
|
|
362
|
+
duckguard anomaly data.csv --method ks_test # Detect drift
|
|
352
363
|
"""
|
|
353
364
|
from duckguard.anomaly import detect_anomalies
|
|
354
365
|
from duckguard.connectors import connect
|
|
@@ -362,8 +373,38 @@ def anomaly(
|
|
|
362
373
|
console=console,
|
|
363
374
|
transient=True,
|
|
364
375
|
) as progress:
|
|
365
|
-
|
|
376
|
+
if learn_baseline:
|
|
377
|
+
progress.add_task("Learning baseline...", total=None)
|
|
378
|
+
else:
|
|
379
|
+
progress.add_task("Analyzing data...", total=None)
|
|
380
|
+
|
|
366
381
|
dataset = connect(source, table=table)
|
|
382
|
+
|
|
383
|
+
# Handle baseline learning
|
|
384
|
+
if learn_baseline:
|
|
385
|
+
from duckguard.anomaly import BaselineMethod
|
|
386
|
+
from duckguard.history import HistoryStorage
|
|
387
|
+
|
|
388
|
+
storage = HistoryStorage()
|
|
389
|
+
baseline_method = BaselineMethod(storage=storage)
|
|
390
|
+
|
|
391
|
+
# Get numeric columns to learn baselines for
|
|
392
|
+
target_columns = columns if columns else dataset.columns
|
|
393
|
+
learned = 0
|
|
394
|
+
|
|
395
|
+
for col_name in target_columns:
|
|
396
|
+
col = dataset[col_name]
|
|
397
|
+
if col.mean is not None: # Numeric column
|
|
398
|
+
values = col.values
|
|
399
|
+
baseline_method.fit(values)
|
|
400
|
+
baseline_method.save_baseline(source, col_name)
|
|
401
|
+
learned += 1
|
|
402
|
+
|
|
403
|
+
console.print(f"[green]LEARNED[/green] Baselines stored for {learned} columns")
|
|
404
|
+
console.print("[dim]Use --method baseline to compare against stored baselines[/dim]")
|
|
405
|
+
return
|
|
406
|
+
|
|
407
|
+
# Regular anomaly detection
|
|
367
408
|
report = detect_anomalies(
|
|
368
409
|
dataset,
|
|
369
410
|
method=method,
|
|
@@ -952,5 +993,219 @@ def report(
|
|
|
952
993
|
raise typer.Exit(1)
|
|
953
994
|
|
|
954
995
|
|
|
996
|
+
@app.command()
|
|
997
|
+
def freshness(
|
|
998
|
+
source: str = typer.Argument(..., help="Data source path"),
|
|
999
|
+
column: str | None = typer.Option(None, "--column", "-c", help="Timestamp column to check"),
|
|
1000
|
+
max_age: str = typer.Option("24h", "--max-age", "-m", help="Maximum acceptable age: 1h, 6h, 24h, 7d"),
|
|
1001
|
+
output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
|
|
1002
|
+
) -> None:
|
|
1003
|
+
"""
|
|
1004
|
+
Check data freshness.
|
|
1005
|
+
|
|
1006
|
+
Monitors how recently data was updated using file modification time
|
|
1007
|
+
or timestamp columns.
|
|
1008
|
+
|
|
1009
|
+
[bold]Examples:[/bold]
|
|
1010
|
+
duckguard freshness data.csv
|
|
1011
|
+
duckguard freshness data.csv --max-age 6h
|
|
1012
|
+
duckguard freshness data.csv --column updated_at
|
|
1013
|
+
duckguard freshness data.csv --format json
|
|
1014
|
+
"""
|
|
1015
|
+
import json as json_module
|
|
1016
|
+
|
|
1017
|
+
from duckguard.connectors import connect
|
|
1018
|
+
from duckguard.freshness import FreshnessMonitor
|
|
1019
|
+
from duckguard.freshness.monitor import parse_age_string
|
|
1020
|
+
|
|
1021
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] Checking freshness: [cyan]{source}[/cyan]\n")
|
|
1022
|
+
|
|
1023
|
+
try:
|
|
1024
|
+
threshold = parse_age_string(max_age)
|
|
1025
|
+
monitor = FreshnessMonitor(threshold=threshold)
|
|
1026
|
+
|
|
1027
|
+
with Progress(
|
|
1028
|
+
SpinnerColumn(),
|
|
1029
|
+
TextColumn("[progress.description]{task.description}"),
|
|
1030
|
+
console=console,
|
|
1031
|
+
transient=True,
|
|
1032
|
+
) as progress:
|
|
1033
|
+
progress.add_task("Checking freshness...", total=None)
|
|
1034
|
+
|
|
1035
|
+
if column:
|
|
1036
|
+
dataset = connect(source)
|
|
1037
|
+
result = monitor.check_column_timestamp(dataset, column)
|
|
1038
|
+
else:
|
|
1039
|
+
# Try file mtime first, fallback to dataset
|
|
1040
|
+
from pathlib import Path
|
|
1041
|
+
if Path(source).exists():
|
|
1042
|
+
result = monitor.check_file_mtime(source)
|
|
1043
|
+
else:
|
|
1044
|
+
dataset = connect(source)
|
|
1045
|
+
result = monitor.check(dataset)
|
|
1046
|
+
|
|
1047
|
+
if output_format == "json":
|
|
1048
|
+
console.print(json_module.dumps(result.to_dict(), indent=2))
|
|
1049
|
+
else:
|
|
1050
|
+
# Display table
|
|
1051
|
+
status_color = "green" if result.is_fresh else "red"
|
|
1052
|
+
status_text = "FRESH" if result.is_fresh else "STALE"
|
|
1053
|
+
|
|
1054
|
+
console.print(Panel(
|
|
1055
|
+
f"[bold {status_color}]{status_text}[/bold {status_color}]\n\n"
|
|
1056
|
+
f"Last Modified: [cyan]{result.last_modified.strftime('%Y-%m-%d %H:%M:%S') if result.last_modified else 'Unknown'}[/cyan]\n"
|
|
1057
|
+
f"Age: [cyan]{result.age_human}[/cyan]\n"
|
|
1058
|
+
f"Threshold: [dim]{max_age}[/dim]\n"
|
|
1059
|
+
f"Method: [dim]{result.method.value}[/dim]",
|
|
1060
|
+
title="Freshness Check",
|
|
1061
|
+
border_style=status_color,
|
|
1062
|
+
))
|
|
1063
|
+
|
|
1064
|
+
if not result.is_fresh:
|
|
1065
|
+
raise typer.Exit(1)
|
|
1066
|
+
|
|
1067
|
+
except Exception as e:
|
|
1068
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
1069
|
+
raise typer.Exit(1)
|
|
1070
|
+
|
|
1071
|
+
|
|
1072
|
+
@app.command()
|
|
1073
|
+
def schema(
|
|
1074
|
+
source: str = typer.Argument(..., help="Data source path"),
|
|
1075
|
+
action: str = typer.Option("show", "--action", "-a", help="Action: show, capture, history, changes"),
|
|
1076
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
1077
|
+
output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
|
|
1078
|
+
limit: int = typer.Option(10, "--limit", "-l", help="Number of results to show"),
|
|
1079
|
+
) -> None:
|
|
1080
|
+
"""
|
|
1081
|
+
Track schema evolution over time.
|
|
1082
|
+
|
|
1083
|
+
Captures schema snapshots and detects changes between versions.
|
|
1084
|
+
|
|
1085
|
+
[bold]Actions:[/bold]
|
|
1086
|
+
show - Show current schema
|
|
1087
|
+
capture - Capture a schema snapshot
|
|
1088
|
+
history - Show schema snapshot history
|
|
1089
|
+
changes - Detect changes from last snapshot
|
|
1090
|
+
|
|
1091
|
+
[bold]Examples:[/bold]
|
|
1092
|
+
duckguard schema data.csv # Show current schema
|
|
1093
|
+
duckguard schema data.csv --action capture # Capture snapshot
|
|
1094
|
+
duckguard schema data.csv --action history # View history
|
|
1095
|
+
duckguard schema data.csv --action changes # Detect changes
|
|
1096
|
+
"""
|
|
1097
|
+
import json as json_module
|
|
1098
|
+
|
|
1099
|
+
from duckguard.connectors import connect
|
|
1100
|
+
from duckguard.schema_history import SchemaChangeAnalyzer, SchemaTracker
|
|
1101
|
+
|
|
1102
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] Schema: [cyan]{source}[/cyan]\n")
|
|
1103
|
+
|
|
1104
|
+
try:
|
|
1105
|
+
dataset = connect(source, table=table)
|
|
1106
|
+
tracker = SchemaTracker()
|
|
1107
|
+
analyzer = SchemaChangeAnalyzer()
|
|
1108
|
+
|
|
1109
|
+
if action == "show":
|
|
1110
|
+
# Display current schema
|
|
1111
|
+
col_table = Table(title="Current Schema")
|
|
1112
|
+
col_table.add_column("Column", style="cyan")
|
|
1113
|
+
col_table.add_column("Type", style="magenta")
|
|
1114
|
+
col_table.add_column("Position", justify="right")
|
|
1115
|
+
|
|
1116
|
+
ref = dataset.engine.get_source_reference(dataset.source)
|
|
1117
|
+
result = dataset.engine.execute(f"DESCRIBE {ref}")
|
|
1118
|
+
|
|
1119
|
+
for i, row in enumerate(result.fetchall()):
|
|
1120
|
+
col_table.add_row(row[0], row[1], str(i))
|
|
1121
|
+
|
|
1122
|
+
console.print(col_table)
|
|
1123
|
+
console.print(f"\n[dim]Total columns: {dataset.column_count}[/dim]")
|
|
1124
|
+
|
|
1125
|
+
elif action == "capture":
|
|
1126
|
+
with Progress(
|
|
1127
|
+
SpinnerColumn(),
|
|
1128
|
+
TextColumn("[progress.description]{task.description}"),
|
|
1129
|
+
console=console,
|
|
1130
|
+
transient=True,
|
|
1131
|
+
) as progress:
|
|
1132
|
+
progress.add_task("Capturing schema snapshot...", total=None)
|
|
1133
|
+
snapshot = tracker.capture(dataset)
|
|
1134
|
+
|
|
1135
|
+
console.print(f"[green]CAPTURED[/green] Schema snapshot: [cyan]{snapshot.snapshot_id[:8]}...[/cyan]")
|
|
1136
|
+
console.print(f"[dim]Columns: {snapshot.column_count} | Rows: {snapshot.row_count:,}[/dim]")
|
|
1137
|
+
console.print(f"[dim]Captured at: {snapshot.captured_at.strftime('%Y-%m-%d %H:%M:%S')}[/dim]")
|
|
1138
|
+
|
|
1139
|
+
elif action == "history":
|
|
1140
|
+
history = tracker.get_history(source, limit=limit)
|
|
1141
|
+
|
|
1142
|
+
if not history:
|
|
1143
|
+
console.print("[yellow]No schema history found for this source.[/yellow]")
|
|
1144
|
+
console.print("[dim]Use --action capture to create a snapshot first.[/dim]")
|
|
1145
|
+
return
|
|
1146
|
+
|
|
1147
|
+
if output_format == "json":
|
|
1148
|
+
data = [s.to_dict() for s in history]
|
|
1149
|
+
console.print(json_module.dumps(data, indent=2))
|
|
1150
|
+
else:
|
|
1151
|
+
table_obj = Table(title="Schema History")
|
|
1152
|
+
table_obj.add_column("Snapshot ID", style="cyan")
|
|
1153
|
+
table_obj.add_column("Captured At", style="dim")
|
|
1154
|
+
table_obj.add_column("Columns", justify="right")
|
|
1155
|
+
table_obj.add_column("Rows", justify="right")
|
|
1156
|
+
|
|
1157
|
+
for snapshot in history:
|
|
1158
|
+
table_obj.add_row(
|
|
1159
|
+
snapshot.snapshot_id[:8] + "...",
|
|
1160
|
+
snapshot.captured_at.strftime("%Y-%m-%d %H:%M"),
|
|
1161
|
+
str(snapshot.column_count),
|
|
1162
|
+
f"{snapshot.row_count:,}" if snapshot.row_count else "-",
|
|
1163
|
+
)
|
|
1164
|
+
|
|
1165
|
+
console.print(table_obj)
|
|
1166
|
+
|
|
1167
|
+
elif action == "changes":
|
|
1168
|
+
with Progress(
|
|
1169
|
+
SpinnerColumn(),
|
|
1170
|
+
TextColumn("[progress.description]{task.description}"),
|
|
1171
|
+
console=console,
|
|
1172
|
+
transient=True,
|
|
1173
|
+
) as progress:
|
|
1174
|
+
progress.add_task("Detecting schema changes...", total=None)
|
|
1175
|
+
report = analyzer.detect_changes(dataset)
|
|
1176
|
+
|
|
1177
|
+
if not report.has_changes:
|
|
1178
|
+
console.print("[green]No schema changes detected[/green]")
|
|
1179
|
+
console.print(f"[dim]Snapshot captured: {report.current_snapshot.snapshot_id[:8]}...[/dim]")
|
|
1180
|
+
return
|
|
1181
|
+
|
|
1182
|
+
# Display changes
|
|
1183
|
+
console.print(f"[yellow bold]{len(report.changes)} schema changes detected[/yellow bold]\n")
|
|
1184
|
+
|
|
1185
|
+
if report.has_breaking_changes:
|
|
1186
|
+
console.print("[red bold]BREAKING CHANGES:[/red bold]")
|
|
1187
|
+
for change in report.breaking_changes:
|
|
1188
|
+
console.print(f" [red]X[/red] {change}")
|
|
1189
|
+
console.print()
|
|
1190
|
+
|
|
1191
|
+
non_breaking = report.non_breaking_changes
|
|
1192
|
+
if non_breaking:
|
|
1193
|
+
console.print("[dim]Non-breaking changes:[/dim]")
|
|
1194
|
+
for change in non_breaking:
|
|
1195
|
+
console.print(f" - {change}")
|
|
1196
|
+
|
|
1197
|
+
if report.has_breaking_changes:
|
|
1198
|
+
raise typer.Exit(1)
|
|
1199
|
+
|
|
1200
|
+
else:
|
|
1201
|
+
console.print(f"[red]Error:[/red] Unknown action: {action}")
|
|
1202
|
+
console.print("[dim]Valid actions: show, capture, history, changes[/dim]")
|
|
1203
|
+
raise typer.Exit(1)
|
|
1204
|
+
|
|
1205
|
+
except Exception as e:
|
|
1206
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
1207
|
+
raise typer.Exit(1)
|
|
1208
|
+
|
|
1209
|
+
|
|
955
1210
|
if __name__ == "__main__":
|
|
956
1211
|
app()
|
duckguard/connectors/factory.py
CHANGED
|
@@ -31,7 +31,7 @@ def register_connector(connector_class: type[Connector]) -> None:
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def connect(
|
|
34
|
-
source:
|
|
34
|
+
source: Any,
|
|
35
35
|
*,
|
|
36
36
|
table: str | None = None,
|
|
37
37
|
schema: str | None = None,
|
|
@@ -46,7 +46,7 @@ def connect(
|
|
|
46
46
|
It automatically detects the source type and uses the appropriate connector.
|
|
47
47
|
|
|
48
48
|
Args:
|
|
49
|
-
source: Path to file, connection string, or
|
|
49
|
+
source: Path to file, connection string, URL, or DataFrame (pandas/polars/pyarrow)
|
|
50
50
|
table: Table name (for database connections)
|
|
51
51
|
schema: Schema name (for database connections)
|
|
52
52
|
database: Database name (for database connections)
|
|
@@ -60,6 +60,9 @@ def connect(
|
|
|
60
60
|
# Connect to a CSV file
|
|
61
61
|
orders = connect("data/orders.csv")
|
|
62
62
|
|
|
63
|
+
# Connect to a DataFrame
|
|
64
|
+
orders = connect(df)
|
|
65
|
+
|
|
63
66
|
# Connect to a Parquet file on S3
|
|
64
67
|
orders = connect("s3://bucket/orders.parquet")
|
|
65
68
|
|
|
@@ -72,6 +75,23 @@ def connect(
|
|
|
72
75
|
Raises:
|
|
73
76
|
ValueError: If no connector can handle the source
|
|
74
77
|
"""
|
|
78
|
+
# Handle DataFrame sources (pandas, polars, pyarrow)
|
|
79
|
+
if not isinstance(source, str):
|
|
80
|
+
# Check if it's a DataFrame-like object
|
|
81
|
+
if hasattr(source, '__dataframe__') or hasattr(source, 'to_pandas') or \
|
|
82
|
+
(hasattr(source, 'shape') and hasattr(source, 'columns')):
|
|
83
|
+
# Register DataFrame with engine
|
|
84
|
+
if engine is None:
|
|
85
|
+
engine = DuckGuardEngine.get_instance()
|
|
86
|
+
|
|
87
|
+
# Generate a unique name for the DataFrame
|
|
88
|
+
import hashlib
|
|
89
|
+
import time
|
|
90
|
+
df_name = f"df_{hashlib.md5(str(time.time()).encode()).hexdigest()[:8]}"
|
|
91
|
+
|
|
92
|
+
engine.register_dataframe(df_name, source)
|
|
93
|
+
return Dataset(source=df_name, engine=engine, name="dataframe")
|
|
94
|
+
|
|
75
95
|
config = ConnectionConfig(
|
|
76
96
|
source=source,
|
|
77
97
|
table=table,
|
|
@@ -99,6 +119,10 @@ def connect(
|
|
|
99
119
|
|
|
100
120
|
def _is_database_connection(source: str) -> bool:
|
|
101
121
|
"""Check if source is a database connection string."""
|
|
122
|
+
# Only handle string sources
|
|
123
|
+
if not isinstance(source, str):
|
|
124
|
+
return False
|
|
125
|
+
|
|
102
126
|
db_prefixes = (
|
|
103
127
|
"postgres://",
|
|
104
128
|
"postgresql://",
|
|
@@ -143,6 +167,10 @@ def _handle_database_connection(
|
|
|
143
167
|
engine: DuckGuardEngine | None,
|
|
144
168
|
) -> Dataset:
|
|
145
169
|
"""Handle database connection strings."""
|
|
170
|
+
# Validate source is a string
|
|
171
|
+
if not isinstance(source, str):
|
|
172
|
+
raise ValueError(f"Expected string source, got {type(source).__name__}")
|
|
173
|
+
|
|
146
174
|
source_lower = source.lower()
|
|
147
175
|
|
|
148
176
|
# PostgreSQL
|
duckguard/connectors/files.py
CHANGED
|
@@ -65,6 +65,10 @@ class FileConnector(Connector):
|
|
|
65
65
|
@classmethod
|
|
66
66
|
def can_handle(cls, source: str) -> bool:
|
|
67
67
|
"""Check if this connector can handle the source."""
|
|
68
|
+
# Only handle string paths
|
|
69
|
+
if not isinstance(source, str):
|
|
70
|
+
return False
|
|
71
|
+
|
|
68
72
|
# Check for file extensions
|
|
69
73
|
path = Path(source)
|
|
70
74
|
ext = path.suffix.lower()
|
|
@@ -99,7 +103,7 @@ class S3Connector(FileConnector):
|
|
|
99
103
|
@classmethod
|
|
100
104
|
def can_handle(cls, source: str) -> bool:
|
|
101
105
|
"""Check if this is an S3 path."""
|
|
102
|
-
return source.lower().startswith("s3://")
|
|
106
|
+
return isinstance(source, str) and source.lower().startswith("s3://")
|
|
103
107
|
|
|
104
108
|
@classmethod
|
|
105
109
|
def get_priority(cls) -> int:
|
|
@@ -113,7 +117,7 @@ class GCSConnector(FileConnector):
|
|
|
113
117
|
@classmethod
|
|
114
118
|
def can_handle(cls, source: str) -> bool:
|
|
115
119
|
"""Check if this is a GCS path."""
|
|
116
|
-
return source.lower().startswith(("gs://", "gcs://"))
|
|
120
|
+
return isinstance(source, str) and source.lower().startswith(("gs://", "gcs://"))
|
|
117
121
|
|
|
118
122
|
@classmethod
|
|
119
123
|
def get_priority(cls) -> int:
|
|
@@ -127,7 +131,7 @@ class AzureConnector(FileConnector):
|
|
|
127
131
|
@classmethod
|
|
128
132
|
def can_handle(cls, source: str) -> bool:
|
|
129
133
|
"""Check if this is an Azure path."""
|
|
130
|
-
return source.lower().startswith(("az://", "abfs://"))
|
|
134
|
+
return isinstance(source, str) and source.lower().startswith(("az://", "abfs://"))
|
|
131
135
|
|
|
132
136
|
@classmethod
|
|
133
137
|
def get_priority(cls) -> int:
|