DeepFabric 4.4.1__py3-none-any.whl → 4.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfabric/__init__.py +8 -0
- deepfabric/auth.py +8 -2
- deepfabric/builders.py +2 -2
- deepfabric/builders_agent.py +18 -6
- deepfabric/cli.py +292 -13
- deepfabric/cloud_upload.py +884 -0
- deepfabric/config.py +47 -20
- deepfabric/config_manager.py +2 -2
- deepfabric/dataset.py +302 -0
- deepfabric/evaluation/backends/__init__.py +2 -0
- deepfabric/evaluation/backends/llm_eval_backend.py +527 -0
- deepfabric/evaluation/backends/ollama_backend.py +3 -3
- deepfabric/evaluation/backends/tool_call_parsers.py +7 -7
- deepfabric/evaluation/backends/transformers_backend.py +73 -16
- deepfabric/evaluation/evaluator.py +41 -7
- deepfabric/evaluation/evaluators/builtin/tool_calling.py +13 -8
- deepfabric/evaluation/inference.py +77 -5
- deepfabric/evaluation/metrics.py +4 -0
- deepfabric/evaluation/parser.py +8 -8
- deepfabric/evaluation/reporters/cloud_reporter.py +19 -6
- deepfabric/exceptions.py +14 -0
- deepfabric/generator.py +8 -4
- deepfabric/graph.py +38 -0
- deepfabric/hf_hub.py +1 -1
- deepfabric/loader.py +554 -0
- deepfabric/schemas.py +7 -7
- deepfabric/topic_manager.py +4 -0
- deepfabric/training/__init__.py +24 -5
- deepfabric/training/callback.py +43 -1
- deepfabric/training/dataset_utils.py +223 -0
- deepfabric/training/metrics_sender.py +50 -16
- deepfabric/tui.py +9 -1
- deepfabric/utils.py +14 -0
- deepfabric/validation.py +1 -1
- {deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/METADATA +84 -177
- {deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/RECORD +39 -34
- {deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/WHEEL +0 -0
- {deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/entry_points.txt +0 -0
- {deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,884 @@
|
|
|
1
|
+
"""Cloud upload functionality for DeepFabric datasets and topic graphs.
|
|
2
|
+
|
|
3
|
+
This module provides functions to upload locally generated datasets and topic graphs
|
|
4
|
+
to DeepFabric Cloud. It supports both interactive mode (with prompts) and headless
|
|
5
|
+
mode for CI/CD pipelines.
|
|
6
|
+
|
|
7
|
+
Feature is gated behind the EXPERIMENTAL_DF environment variable.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import contextlib
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import time
|
|
15
|
+
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from rich.console import Console
|
|
22
|
+
|
|
23
|
+
from .tui import DeepFabricTUI
|
|
24
|
+
|
|
25
|
+
import click
|
|
26
|
+
import httpx
|
|
27
|
+
|
|
28
|
+
from rich.panel import Panel
|
|
29
|
+
|
|
30
|
+
from .auth import (
|
|
31
|
+
DEFAULT_API_URL,
|
|
32
|
+
clear_tokens,
|
|
33
|
+
device_flow_login,
|
|
34
|
+
get_auth_token,
|
|
35
|
+
get_config,
|
|
36
|
+
is_authenticated,
|
|
37
|
+
save_config,
|
|
38
|
+
)
|
|
39
|
+
from .tui import get_tui
|
|
40
|
+
from .utils import get_bool_env
|
|
41
|
+
|
|
42
|
+
# HTTP status codes
|
|
43
|
+
HTTP_UNAUTHORIZED = 401
|
|
44
|
+
HTTP_BAD_REQUEST = 400
|
|
45
|
+
HTTP_CONFLICT = 409
|
|
46
|
+
HTTP_INTERNAL_SERVER_ERROR = 500
|
|
47
|
+
HTTP_TOO_MANY_REQUESTS = 429
|
|
48
|
+
|
|
49
|
+
# Default frontend URL - derived from API URL or explicit env var
|
|
50
|
+
DEFAULT_FRONTEND_URL = "https://deepfabric.cloud"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def derive_frontend_url(api_url: str = DEFAULT_API_URL) -> str:
|
|
54
|
+
"""Derive the frontend URL from the API URL.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
api_url: The API URL (e.g., https://api.deepfabric.cloud)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
The frontend URL (e.g., https://deepfabric.cloud)
|
|
61
|
+
"""
|
|
62
|
+
# Check for explicit override first
|
|
63
|
+
explicit_url = os.getenv("DEEPFABRIC_FRONTEND_URL")
|
|
64
|
+
if explicit_url:
|
|
65
|
+
return explicit_url.rstrip("/")
|
|
66
|
+
|
|
67
|
+
# Derive from API URL
|
|
68
|
+
if "localhost" in api_url or "127.0.0.1" in api_url:
|
|
69
|
+
# Local development - assume frontend on port 3000
|
|
70
|
+
return "http://localhost:3000"
|
|
71
|
+
|
|
72
|
+
if "api." in api_url:
|
|
73
|
+
return api_url.replace("api.", "app.").rstrip("/")
|
|
74
|
+
|
|
75
|
+
# Fallback to default
|
|
76
|
+
return DEFAULT_FRONTEND_URL
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_current_user(api_url: str = DEFAULT_API_URL) -> dict | None:
|
|
80
|
+
"""Fetch current user info from /api/v1/auth/me.
|
|
81
|
+
|
|
82
|
+
Caches the username in the config file to avoid repeated API calls.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
api_url: The DeepFabric API URL
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
User info dict with id, email, name, username, or None if failed
|
|
89
|
+
"""
|
|
90
|
+
token = get_auth_token()
|
|
91
|
+
if not token:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
# Check if we have cached user info
|
|
95
|
+
config = get_config()
|
|
96
|
+
cached_username = config.get("username")
|
|
97
|
+
cached_user_id = config.get("user_id")
|
|
98
|
+
|
|
99
|
+
if cached_username and cached_user_id:
|
|
100
|
+
return {
|
|
101
|
+
"id": cached_user_id,
|
|
102
|
+
"username": cached_username,
|
|
103
|
+
"email": config.get("email"),
|
|
104
|
+
"name": config.get("name"),
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
# Fetch from API
|
|
108
|
+
try:
|
|
109
|
+
with httpx.Client() as client:
|
|
110
|
+
response = client.get(
|
|
111
|
+
f"{api_url}/api/v1/auth/me",
|
|
112
|
+
headers={"Authorization": f"Bearer {token}"},
|
|
113
|
+
timeout=10.0,
|
|
114
|
+
)
|
|
115
|
+
response.raise_for_status()
|
|
116
|
+
user_data = response.json()
|
|
117
|
+
|
|
118
|
+
# Cache user info
|
|
119
|
+
config["user_id"] = user_data.get("id")
|
|
120
|
+
config["username"] = user_data.get("username")
|
|
121
|
+
config["email"] = user_data.get("email")
|
|
122
|
+
config["name"] = user_data.get("name")
|
|
123
|
+
save_config(config)
|
|
124
|
+
|
|
125
|
+
return user_data
|
|
126
|
+
except (httpx.RequestError, httpx.HTTPStatusError, json.JSONDecodeError, KeyError):
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def derive_name_and_slug(file_path: str) -> tuple[str, str]:
|
|
131
|
+
"""Derive name and slug from a file path.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
file_path: Path to the file (e.g., "my-dataset.jsonl")
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Tuple of (name, slug) derived from filename
|
|
138
|
+
"""
|
|
139
|
+
path = Path(file_path)
|
|
140
|
+
# Remove extension(s) like .jsonl or .json
|
|
141
|
+
stem = path.stem
|
|
142
|
+
if stem.endswith(".json"):
|
|
143
|
+
stem = stem[:-5]
|
|
144
|
+
|
|
145
|
+
# Clean up the name for display
|
|
146
|
+
name = stem.replace("-", " ").replace("_", " ").title()
|
|
147
|
+
|
|
148
|
+
# Create slug: lowercase, alphanumeric with hyphens
|
|
149
|
+
slug = re.sub(r"[^a-z0-9-]", "-", stem.lower())
|
|
150
|
+
slug = re.sub(r"-+", "-", slug) # Collapse multiple hyphens
|
|
151
|
+
slug = slug.strip("-") # Remove leading/trailing hyphens
|
|
152
|
+
|
|
153
|
+
# Ensure slug is not empty
|
|
154
|
+
if not slug:
|
|
155
|
+
slug = "unnamed-dataset"
|
|
156
|
+
name = "Unnamed Dataset"
|
|
157
|
+
|
|
158
|
+
return name, slug
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def prompt_for_name(
|
|
162
|
+
resource_type: str,
|
|
163
|
+
default_name: str,
|
|
164
|
+
default_slug: str,
|
|
165
|
+
) -> tuple[str, str]:
|
|
166
|
+
"""Prompt user for a name and slug for the resource.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
resource_type: Type of resource ("dataset" or "graph")
|
|
170
|
+
default_name: Default name derived from filename
|
|
171
|
+
default_slug: Default slug derived from filename
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Tuple of (name, slug) from user input
|
|
175
|
+
"""
|
|
176
|
+
tui = get_tui()
|
|
177
|
+
console = tui.console
|
|
178
|
+
|
|
179
|
+
# Visual separator
|
|
180
|
+
console.print()
|
|
181
|
+
|
|
182
|
+
# Show a nice header for the naming section
|
|
183
|
+
resource_emoji = "[blue]" if resource_type == "dataset" else "[magenta]"
|
|
184
|
+
console.print(f" {resource_emoji}{resource_type.upper()}[/] - Enter details for cloud upload")
|
|
185
|
+
console.print()
|
|
186
|
+
|
|
187
|
+
# Single prompt for name - slug is auto-derived
|
|
188
|
+
name = click.prompt(
|
|
189
|
+
click.style(" Name", fg="cyan"),
|
|
190
|
+
default=default_name,
|
|
191
|
+
type=str,
|
|
192
|
+
).strip()
|
|
193
|
+
|
|
194
|
+
# Auto-derive slug from name (user doesn't need to worry about this)
|
|
195
|
+
slug = re.sub(r"[^a-z0-9-]", "-", name.lower())
|
|
196
|
+
slug = re.sub(r"-+", "-", slug).strip("-")
|
|
197
|
+
if not slug:
|
|
198
|
+
slug = default_slug
|
|
199
|
+
|
|
200
|
+
# Show the URL that will be created
|
|
201
|
+
console.print(f" [dim]URL path:[/] [cyan]{slug}[/]")
|
|
202
|
+
console.print()
|
|
203
|
+
|
|
204
|
+
return name, slug
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def upload_dataset(
|
|
208
|
+
dataset_path: str,
|
|
209
|
+
name: str,
|
|
210
|
+
slug: str,
|
|
211
|
+
description: str = "",
|
|
212
|
+
tags: list[str] | None = None,
|
|
213
|
+
organization_id: str | None = None,
|
|
214
|
+
api_url: str = DEFAULT_API_URL,
|
|
215
|
+
) -> dict:
|
|
216
|
+
"""Upload a dataset to DeepFabric Cloud.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
dataset_path: Path to the JSONL dataset file
|
|
220
|
+
name: Display name for the dataset
|
|
221
|
+
slug: URL-friendly slug for the dataset
|
|
222
|
+
description: Optional description
|
|
223
|
+
tags: Optional list of tags
|
|
224
|
+
organization_id: Optional organization UUID
|
|
225
|
+
api_url: The DeepFabric API URL
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Response dict with dataset_id, version_id, and URLs
|
|
229
|
+
|
|
230
|
+
Raises:
|
|
231
|
+
Exception: If upload fails
|
|
232
|
+
"""
|
|
233
|
+
token = get_auth_token()
|
|
234
|
+
if not token:
|
|
235
|
+
raise ValueError("Not authenticated. Please run 'deepfabric auth login' first.")
|
|
236
|
+
|
|
237
|
+
# Read and parse the JSONL file
|
|
238
|
+
samples = []
|
|
239
|
+
with open(dataset_path) as f:
|
|
240
|
+
for line_num, raw_line in enumerate(f, 1):
|
|
241
|
+
content = raw_line.strip()
|
|
242
|
+
if not content:
|
|
243
|
+
continue
|
|
244
|
+
try:
|
|
245
|
+
sample = json.loads(content)
|
|
246
|
+
samples.append(sample)
|
|
247
|
+
except json.JSONDecodeError as e:
|
|
248
|
+
raise ValueError(f"Invalid JSON on line {line_num}: {e}") from e
|
|
249
|
+
|
|
250
|
+
if not samples:
|
|
251
|
+
raise ValueError("Dataset file is empty or contains no valid samples")
|
|
252
|
+
|
|
253
|
+
# Build request payload
|
|
254
|
+
payload = {
|
|
255
|
+
"name": name,
|
|
256
|
+
"slug": slug,
|
|
257
|
+
"description": description,
|
|
258
|
+
"tags": tags or [],
|
|
259
|
+
"samples": samples,
|
|
260
|
+
}
|
|
261
|
+
if organization_id:
|
|
262
|
+
payload["organization_id"] = organization_id
|
|
263
|
+
|
|
264
|
+
# Make the upload request
|
|
265
|
+
with httpx.Client() as client:
|
|
266
|
+
response = client.post(
|
|
267
|
+
f"{api_url}/api/v1/datasets/push",
|
|
268
|
+
headers={
|
|
269
|
+
"Authorization": f"Bearer {token}",
|
|
270
|
+
"Content-Type": "application/json",
|
|
271
|
+
},
|
|
272
|
+
json=payload,
|
|
273
|
+
timeout=120.0, # Allow longer timeout for large uploads
|
|
274
|
+
)
|
|
275
|
+
response.raise_for_status()
|
|
276
|
+
return response.json()
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def upload_topic_graph(
|
|
280
|
+
graph_path: str,
|
|
281
|
+
name: str,
|
|
282
|
+
description: str = "",
|
|
283
|
+
slug: str | None = None,
|
|
284
|
+
api_url: str = DEFAULT_API_URL,
|
|
285
|
+
) -> dict:
|
|
286
|
+
"""Upload a topic graph to DeepFabric Cloud.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
graph_path: Path to the JSON graph file
|
|
290
|
+
name: Display name for the graph
|
|
291
|
+
description: Optional description
|
|
292
|
+
slug: Optional URL-friendly slug (derived from name if not provided)
|
|
293
|
+
api_url: The DeepFabric API URL
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Response dict with graph info
|
|
297
|
+
|
|
298
|
+
Raises:
|
|
299
|
+
Exception: If upload fails
|
|
300
|
+
"""
|
|
301
|
+
token = get_auth_token()
|
|
302
|
+
if not token:
|
|
303
|
+
raise ValueError("Not authenticated. Please run 'deepfabric auth login' first.")
|
|
304
|
+
|
|
305
|
+
# Derive slug from name if not provided
|
|
306
|
+
if not slug:
|
|
307
|
+
slug = re.sub(r"[^a-z0-9-]", "-", name.lower())
|
|
308
|
+
slug = re.sub(r"-+", "-", slug).strip("-")
|
|
309
|
+
|
|
310
|
+
# Read the graph file
|
|
311
|
+
with open(graph_path) as f:
|
|
312
|
+
graph_content = f.read()
|
|
313
|
+
|
|
314
|
+
# Build metadata
|
|
315
|
+
metadata = {
|
|
316
|
+
"name": name,
|
|
317
|
+
"description": description,
|
|
318
|
+
"slug": slug,
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
# Make the multipart upload request
|
|
322
|
+
# Use .json extension for the uploaded filename regardless of actual file extension
|
|
323
|
+
# The backend expects .json for graph imports
|
|
324
|
+
upload_filename = Path(graph_path).stem + ".json"
|
|
325
|
+
|
|
326
|
+
with httpx.Client() as client:
|
|
327
|
+
response = client.post(
|
|
328
|
+
f"{api_url}/api/v1/topic-graphs/import",
|
|
329
|
+
headers={"Authorization": f"Bearer {token}"},
|
|
330
|
+
files={"file": (upload_filename, graph_content, "application/json")},
|
|
331
|
+
data={"metadata": json.dumps(metadata)},
|
|
332
|
+
timeout=60.0,
|
|
333
|
+
)
|
|
334
|
+
response.raise_for_status()
|
|
335
|
+
return response.json()
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def build_urls(
|
|
339
|
+
resource_type: str,
|
|
340
|
+
resource_id: str,
|
|
341
|
+
slug: str,
|
|
342
|
+
username: str | None,
|
|
343
|
+
frontend_url: str = DEFAULT_FRONTEND_URL,
|
|
344
|
+
) -> tuple[str | None, str]:
|
|
345
|
+
"""Build user-facing URLs for an uploaded resource.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
resource_type: Either "datasets" or "graphs"
|
|
349
|
+
resource_id: UUID of the resource
|
|
350
|
+
slug: URL slug of the resource
|
|
351
|
+
username: Username for namespace URL (None if unavailable)
|
|
352
|
+
frontend_url: Frontend base URL
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Tuple of (public_url, internal_url)
|
|
356
|
+
public_url may be None if username is not available
|
|
357
|
+
"""
|
|
358
|
+
internal_url = f"{frontend_url}/{resource_type}/{resource_id}"
|
|
359
|
+
|
|
360
|
+
if username:
|
|
361
|
+
if resource_type == "datasets":
|
|
362
|
+
public_url = f"{frontend_url}/dataset/{username}/{slug}"
|
|
363
|
+
else:
|
|
364
|
+
public_url = f"{frontend_url}/graphs/{username}/{slug}"
|
|
365
|
+
else:
|
|
366
|
+
public_url = None
|
|
367
|
+
|
|
368
|
+
return public_url, internal_url
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def ensure_authenticated(
|
|
372
|
+
api_url: str = DEFAULT_API_URL,
|
|
373
|
+
headless: bool = False,
|
|
374
|
+
) -> bool:
|
|
375
|
+
"""Ensure the user is authenticated, prompting if necessary.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
api_url: The DeepFabric API URL
|
|
379
|
+
headless: If True, don't prompt for login (fail if not authenticated)
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
True if authenticated, False otherwise
|
|
383
|
+
"""
|
|
384
|
+
if is_authenticated():
|
|
385
|
+
return True
|
|
386
|
+
|
|
387
|
+
if headless:
|
|
388
|
+
return False
|
|
389
|
+
|
|
390
|
+
tui = get_tui()
|
|
391
|
+
tui.info("You need to authenticate to upload to DeepFabric Cloud.")
|
|
392
|
+
|
|
393
|
+
if not click.confirm("Would you like to log in now?", default=True):
|
|
394
|
+
return False
|
|
395
|
+
|
|
396
|
+
return device_flow_login(api_url)
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def _get_user_friendly_error(e: httpx.HTTPStatusError) -> str: # noqa: PLR0911
|
|
400
|
+
"""Convert HTTP error to user-friendly message.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
e: The HTTP status error
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
A user-friendly error message
|
|
407
|
+
"""
|
|
408
|
+
try:
|
|
409
|
+
error_data = e.response.json()
|
|
410
|
+
raw_message = error_data.get("message", "")
|
|
411
|
+
|
|
412
|
+
# Hide database-specific error details from users
|
|
413
|
+
if "unique constraint" in raw_message.lower():
|
|
414
|
+
return "A resource with this name already exists"
|
|
415
|
+
if "foreign key" in raw_message.lower():
|
|
416
|
+
return "Invalid reference to related resource"
|
|
417
|
+
if "sqlstate" in raw_message.lower():
|
|
418
|
+
return "Server error occurred. Please try again."
|
|
419
|
+
|
|
420
|
+
# Return cleaned message if it exists
|
|
421
|
+
if raw_message:
|
|
422
|
+
return raw_message
|
|
423
|
+
|
|
424
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
425
|
+
pass
|
|
426
|
+
|
|
427
|
+
# Fallback based on status code
|
|
428
|
+
status_code = e.response.status_code
|
|
429
|
+
if status_code == HTTP_BAD_REQUEST:
|
|
430
|
+
return "Invalid request. Please check your input."
|
|
431
|
+
if status_code == HTTP_UNAUTHORIZED:
|
|
432
|
+
return "Authentication required"
|
|
433
|
+
if status_code == HTTP_CONFLICT:
|
|
434
|
+
return "Resource conflict - name may already exist"
|
|
435
|
+
if status_code >= HTTP_INTERNAL_SERVER_ERROR:
|
|
436
|
+
return "Server error occurred. Please try again later."
|
|
437
|
+
|
|
438
|
+
return f"Request failed (HTTP {status_code})"
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _is_duplicate_name_error(response: httpx.Response) -> bool:
|
|
442
|
+
"""Check if the error is a duplicate name/slug conflict.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
response: The HTTP response object
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
True if this is a duplicate name error
|
|
449
|
+
"""
|
|
450
|
+
if response.status_code not in (HTTP_BAD_REQUEST, HTTP_CONFLICT):
|
|
451
|
+
return False
|
|
452
|
+
|
|
453
|
+
try:
|
|
454
|
+
error_data = response.json()
|
|
455
|
+
message = error_data.get("message", "").lower()
|
|
456
|
+
error_field = error_data.get("error", "").lower()
|
|
457
|
+
combined = f"{message} {error_field}"
|
|
458
|
+
# Check for common duplicate/unique constraint error patterns
|
|
459
|
+
return any(
|
|
460
|
+
pattern in combined
|
|
461
|
+
for pattern in [
|
|
462
|
+
"duplicate",
|
|
463
|
+
"unique constraint",
|
|
464
|
+
"already exists",
|
|
465
|
+
"name already",
|
|
466
|
+
"slug already",
|
|
467
|
+
"idx_datasets_owner_slug",
|
|
468
|
+
"idx_topic_graphs_owner_slug",
|
|
469
|
+
]
|
|
470
|
+
)
|
|
471
|
+
except Exception:
|
|
472
|
+
return False
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _handle_auth_error(
|
|
476
|
+
api_url: str,
|
|
477
|
+
headless: bool,
|
|
478
|
+
) -> bool:
|
|
479
|
+
"""Handle authentication errors by prompting for re-login.
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
api_url: The DeepFabric API URL
|
|
483
|
+
headless: If True, don't prompt for re-login
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
True if re-authenticated successfully, False otherwise
|
|
487
|
+
"""
|
|
488
|
+
tui = get_tui()
|
|
489
|
+
|
|
490
|
+
# Clear invalid tokens
|
|
491
|
+
clear_tokens()
|
|
492
|
+
|
|
493
|
+
if headless:
|
|
494
|
+
tui.error(
|
|
495
|
+
"Authentication token expired or invalid. "
|
|
496
|
+
"Please run 'deepfabric auth login' to re-authenticate."
|
|
497
|
+
)
|
|
498
|
+
return False
|
|
499
|
+
|
|
500
|
+
# Prompt user to re-authenticate
|
|
501
|
+
tui.warning("Your session has expired or is invalid.")
|
|
502
|
+
console = tui.console
|
|
503
|
+
console.print()
|
|
504
|
+
|
|
505
|
+
if not click.confirm(" Would you like to log in again?", default=True):
|
|
506
|
+
return False
|
|
507
|
+
|
|
508
|
+
console.print()
|
|
509
|
+
return device_flow_login(api_url)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def _display_upload_result(
|
|
513
|
+
tui,
|
|
514
|
+
dataset_result: dict | None,
|
|
515
|
+
graph_result: dict | None,
|
|
516
|
+
username: str | None,
|
|
517
|
+
frontend_url: str = DEFAULT_FRONTEND_URL,
|
|
518
|
+
) -> dict:
|
|
519
|
+
"""Display upload results in a formatted panel.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
tui: TUI instance for output
|
|
523
|
+
dataset_result: Result from dataset upload (or None)
|
|
524
|
+
graph_result: Result from graph upload (or None)
|
|
525
|
+
username: Username for namespace URLs
|
|
526
|
+
frontend_url: Frontend base URL
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
Dict with all URLs for JSON output
|
|
530
|
+
"""
|
|
531
|
+
result: dict[str, str | bool] = {"success": True}
|
|
532
|
+
lines: list[str] = []
|
|
533
|
+
|
|
534
|
+
if dataset_result:
|
|
535
|
+
dataset_id = dataset_result.get("dataset_id", "")
|
|
536
|
+
# Get slug from the result or derive it
|
|
537
|
+
slug = dataset_result.get("slug", dataset_result.get("name", "").lower().replace(" ", "-"))
|
|
538
|
+
public_url, internal_url = build_urls("datasets", dataset_id, slug, username, frontend_url)
|
|
539
|
+
|
|
540
|
+
result["dataset_id"] = dataset_id
|
|
541
|
+
result["dataset_internal_url"] = internal_url
|
|
542
|
+
if public_url:
|
|
543
|
+
result["dataset_url"] = public_url
|
|
544
|
+
lines.append("[bold blue]Dataset[/bold blue]")
|
|
545
|
+
lines.append(f" [cyan]{public_url}[/cyan]")
|
|
546
|
+
lines.append(f" [dim]{internal_url}[/dim]")
|
|
547
|
+
else:
|
|
548
|
+
lines.append("[bold blue]Dataset[/bold blue]")
|
|
549
|
+
lines.append(f" [cyan]{internal_url}[/cyan]")
|
|
550
|
+
|
|
551
|
+
if graph_result:
|
|
552
|
+
graph_info = graph_result.get("graph", {})
|
|
553
|
+
graph_id = graph_info.get("id", "")
|
|
554
|
+
slug = graph_info.get("slug", graph_info.get("name", "").lower().replace(" ", "-"))
|
|
555
|
+
public_url, internal_url = build_urls("graphs", graph_id, slug, username, frontend_url)
|
|
556
|
+
|
|
557
|
+
result["graph_id"] = graph_id
|
|
558
|
+
result["graph_internal_url"] = internal_url
|
|
559
|
+
if lines:
|
|
560
|
+
lines.append("") # Add spacing between dataset and graph
|
|
561
|
+
if public_url:
|
|
562
|
+
result["graph_url"] = public_url
|
|
563
|
+
lines.append("[bold magenta]Graph[/bold magenta]")
|
|
564
|
+
lines.append(f" [cyan]{public_url}[/cyan]")
|
|
565
|
+
lines.append(f" [dim]{internal_url}[/dim]")
|
|
566
|
+
else:
|
|
567
|
+
lines.append("[bold magenta]Graph[/bold magenta]")
|
|
568
|
+
lines.append(f" [cyan]{internal_url}[/cyan]")
|
|
569
|
+
|
|
570
|
+
if not dataset_result and not graph_result:
|
|
571
|
+
lines.append("[dim](no uploads)[/dim]")
|
|
572
|
+
|
|
573
|
+
# Display the panel with nice formatting
|
|
574
|
+
console = tui.console
|
|
575
|
+
console.print()
|
|
576
|
+
panel_content = "\n".join(lines)
|
|
577
|
+
console.print(
|
|
578
|
+
Panel(
|
|
579
|
+
panel_content,
|
|
580
|
+
title="[bold green]Uploaded to DeepFabric Cloud[/bold green]",
|
|
581
|
+
border_style="green",
|
|
582
|
+
padding=(1, 2),
|
|
583
|
+
)
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
return result
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
def _upload_with_retry(
|
|
590
|
+
upload_fn,
|
|
591
|
+
max_retries: int = 3,
|
|
592
|
+
initial_delay: float = 1.0,
|
|
593
|
+
) -> dict:
|
|
594
|
+
"""Execute an upload function with exponential backoff retry.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
upload_fn: Function to call that performs the upload
|
|
598
|
+
max_retries: Maximum number of retry attempts
|
|
599
|
+
initial_delay: Initial delay between retries in seconds
|
|
600
|
+
|
|
601
|
+
Returns:
|
|
602
|
+
Result from the upload function
|
|
603
|
+
|
|
604
|
+
Raises:
|
|
605
|
+
Exception: If all retries fail
|
|
606
|
+
"""
|
|
607
|
+
last_error: Exception | None = None
|
|
608
|
+
delay = initial_delay
|
|
609
|
+
|
|
610
|
+
for attempt in range(max_retries + 1):
|
|
611
|
+
try:
|
|
612
|
+
return upload_fn()
|
|
613
|
+
except httpx.HTTPStatusError as e:
|
|
614
|
+
last_error = e
|
|
615
|
+
# Don't retry client errors (4xx) except rate limits
|
|
616
|
+
status_code = e.response.status_code
|
|
617
|
+
is_client_error = HTTP_BAD_REQUEST <= status_code < HTTP_INTERNAL_SERVER_ERROR
|
|
618
|
+
if is_client_error and status_code != HTTP_TOO_MANY_REQUESTS:
|
|
619
|
+
raise
|
|
620
|
+
|
|
621
|
+
if attempt < max_retries:
|
|
622
|
+
# Check for Retry-After header
|
|
623
|
+
retry_after = e.response.headers.get("Retry-After")
|
|
624
|
+
if retry_after:
|
|
625
|
+
with contextlib.suppress(ValueError):
|
|
626
|
+
delay = float(retry_after)
|
|
627
|
+
|
|
628
|
+
time.sleep(delay)
|
|
629
|
+
delay *= 2 # Exponential backoff
|
|
630
|
+
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
|
631
|
+
last_error = e
|
|
632
|
+
if attempt < max_retries:
|
|
633
|
+
time.sleep(delay)
|
|
634
|
+
delay *= 2
|
|
635
|
+
|
|
636
|
+
if last_error is not None:
|
|
637
|
+
raise last_error
|
|
638
|
+
# This should never happen as the loop always catches an exception
|
|
639
|
+
raise RuntimeError("Upload failed with unknown error")
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _perform_upload(
|
|
643
|
+
resource_type: str,
|
|
644
|
+
file_path: str,
|
|
645
|
+
upload_fn: Callable[[str, str], dict],
|
|
646
|
+
success_message_fn: Callable[[dict], str],
|
|
647
|
+
tui: "DeepFabricTUI",
|
|
648
|
+
console: "Console",
|
|
649
|
+
api_url: str,
|
|
650
|
+
headless: bool,
|
|
651
|
+
) -> dict | None:
|
|
652
|
+
"""Perform upload with retry logic for auth errors and duplicate names.
|
|
653
|
+
|
|
654
|
+
Args:
|
|
655
|
+
resource_type: Type of resource ("dataset" or "graph")
|
|
656
|
+
file_path: Path to the file to upload
|
|
657
|
+
upload_fn: Function that takes (name, slug) and performs the upload
|
|
658
|
+
success_message_fn: Function that takes the result and returns a success message
|
|
659
|
+
tui: TUI instance for displaying messages
|
|
660
|
+
console: Rich console instance
|
|
661
|
+
api_url: API URL for re-authentication
|
|
662
|
+
headless: Whether running in headless mode
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
Upload result dict or None if upload failed/skipped
|
|
666
|
+
"""
|
|
667
|
+
default_name, default_slug = derive_name_and_slug(file_path)
|
|
668
|
+
if headless:
|
|
669
|
+
name, slug = default_name, default_slug
|
|
670
|
+
else:
|
|
671
|
+
name, slug = prompt_for_name(resource_type, default_name, default_slug)
|
|
672
|
+
|
|
673
|
+
# Loop to handle retries (auth errors, duplicate names)
|
|
674
|
+
max_name_retries = 3
|
|
675
|
+
for _attempt in range(max_name_retries):
|
|
676
|
+
tui.info(f"Uploading {resource_type} '{name}'...")
|
|
677
|
+
|
|
678
|
+
try:
|
|
679
|
+
result = _upload_with_retry(lambda n=name, s=slug: upload_fn(n, s))
|
|
680
|
+
except httpx.HTTPStatusError as e:
|
|
681
|
+
# Check for auth error (401)
|
|
682
|
+
if e.response.status_code == HTTP_UNAUTHORIZED:
|
|
683
|
+
if _handle_auth_error(api_url, headless):
|
|
684
|
+
# Re-authenticated, retry upload
|
|
685
|
+
continue
|
|
686
|
+
# User declined re-auth or headless mode
|
|
687
|
+
if headless:
|
|
688
|
+
raise click.ClickException(
|
|
689
|
+
f"{resource_type.capitalize()} upload failed: authentication required"
|
|
690
|
+
) from None
|
|
691
|
+
return None
|
|
692
|
+
|
|
693
|
+
# Check for duplicate name error
|
|
694
|
+
if _is_duplicate_name_error(e.response) and not headless:
|
|
695
|
+
tui.warning(
|
|
696
|
+
f"A {resource_type} named '{slug}' already exists. "
|
|
697
|
+
"Please choose a different name."
|
|
698
|
+
)
|
|
699
|
+
console.print()
|
|
700
|
+
name, slug = prompt_for_name(resource_type, name + "-2", slug + "-2")
|
|
701
|
+
continue
|
|
702
|
+
|
|
703
|
+
# Other HTTP errors - show user-friendly message
|
|
704
|
+
error_msg = _get_user_friendly_error(e)
|
|
705
|
+
tui.error(f"Failed to upload {resource_type}: {error_msg}")
|
|
706
|
+
if headless:
|
|
707
|
+
raise click.ClickException(
|
|
708
|
+
f"{resource_type.capitalize()} upload failed: {error_msg}"
|
|
709
|
+
) from None
|
|
710
|
+
return None
|
|
711
|
+
except Exception as e:
|
|
712
|
+
tui.error(f"Failed to upload {resource_type}: {e}")
|
|
713
|
+
if headless:
|
|
714
|
+
raise click.ClickException(
|
|
715
|
+
f"{resource_type.capitalize()} upload failed: {e}"
|
|
716
|
+
) from None
|
|
717
|
+
return None
|
|
718
|
+
else:
|
|
719
|
+
tui.success(success_message_fn(result))
|
|
720
|
+
return result
|
|
721
|
+
|
|
722
|
+
return None
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def handle_cloud_upload( # noqa: PLR0911
|
|
726
|
+
dataset_path: str | None = None,
|
|
727
|
+
graph_path: str | None = None,
|
|
728
|
+
cloud_upload_flag: str | None = None,
|
|
729
|
+
api_url: str = DEFAULT_API_URL,
|
|
730
|
+
) -> dict | None:
|
|
731
|
+
"""Main entry point for cloud upload functionality.
|
|
732
|
+
|
|
733
|
+
Args:
|
|
734
|
+
dataset_path: Path to dataset JSONL file (or None)
|
|
735
|
+
graph_path: Path to topic graph JSON file (or None)
|
|
736
|
+
cloud_upload_flag: Upload mode for headless operation:
|
|
737
|
+
- None: Interactive mode with prompts
|
|
738
|
+
- "all": Upload both dataset and graph
|
|
739
|
+
- "dataset": Upload dataset only
|
|
740
|
+
- "graph": Upload graph only
|
|
741
|
+
- "none": Skip uploads
|
|
742
|
+
api_url: The DeepFabric API URL
|
|
743
|
+
|
|
744
|
+
Returns:
|
|
745
|
+
Dict with upload results and URLs, or None if skipped
|
|
746
|
+
"""
|
|
747
|
+
# Check experimental flag
|
|
748
|
+
if not get_bool_env("EXPERIMENTAL_DF"):
|
|
749
|
+
return None
|
|
750
|
+
|
|
751
|
+
tui = get_tui()
|
|
752
|
+
console = tui.console
|
|
753
|
+
headless = cloud_upload_flag is not None
|
|
754
|
+
|
|
755
|
+
# Derive frontend URL from API URL
|
|
756
|
+
frontend_url = derive_frontend_url(api_url)
|
|
757
|
+
|
|
758
|
+
# Determine what to upload
|
|
759
|
+
if headless:
|
|
760
|
+
# Headless mode - use flag value directly
|
|
761
|
+
if cloud_upload_flag == "none":
|
|
762
|
+
return None
|
|
763
|
+
|
|
764
|
+
upload_dataset_flag = cloud_upload_flag in ("all", "dataset")
|
|
765
|
+
upload_graph_flag = cloud_upload_flag in ("all", "graph")
|
|
766
|
+
else:
|
|
767
|
+
# Interactive mode - prompt user
|
|
768
|
+
has_dataset = dataset_path and Path(dataset_path).exists()
|
|
769
|
+
has_graph = graph_path and Path(graph_path).exists()
|
|
770
|
+
|
|
771
|
+
if not has_dataset and not has_graph:
|
|
772
|
+
return None
|
|
773
|
+
|
|
774
|
+
# Visual separator and header
|
|
775
|
+
console.print()
|
|
776
|
+
console.rule("[bold cyan]Cloud Upload[/bold cyan]", style="cyan")
|
|
777
|
+
console.print()
|
|
778
|
+
|
|
779
|
+
# Build prompt based on what's available
|
|
780
|
+
if has_dataset and has_graph:
|
|
781
|
+
prompt_text = " Upload to DeepFabric Cloud?"
|
|
782
|
+
hint = "[dim](Y=both, n=skip, c=choose)[/dim]"
|
|
783
|
+
elif has_dataset:
|
|
784
|
+
prompt_text = " Upload dataset to DeepFabric Cloud?"
|
|
785
|
+
hint = "[dim](Y=yes, n=skip)[/dim]"
|
|
786
|
+
else:
|
|
787
|
+
prompt_text = " Upload graph to DeepFabric Cloud?"
|
|
788
|
+
hint = "[dim](Y=yes, n=skip)[/dim]"
|
|
789
|
+
|
|
790
|
+
console.print(f"{prompt_text} {hint}")
|
|
791
|
+
|
|
792
|
+
if has_dataset and has_graph:
|
|
793
|
+
response = click.prompt(
|
|
794
|
+
click.style(" Choice", fg="cyan"),
|
|
795
|
+
type=click.Choice(["Y", "n", "c"], case_sensitive=False),
|
|
796
|
+
default="Y",
|
|
797
|
+
show_choices=False,
|
|
798
|
+
)
|
|
799
|
+
else:
|
|
800
|
+
response = click.prompt(
|
|
801
|
+
click.style(" Choice", fg="cyan"),
|
|
802
|
+
type=click.Choice(["Y", "n"], case_sensitive=False),
|
|
803
|
+
default="Y",
|
|
804
|
+
show_choices=False,
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
if response.lower() == "n":
|
|
808
|
+
tui.info("Skipping cloud upload.")
|
|
809
|
+
return None
|
|
810
|
+
|
|
811
|
+
if response.lower() == "c":
|
|
812
|
+
# Choose individually
|
|
813
|
+
upload_dataset_flag = has_dataset and click.confirm("Upload dataset?", default=True)
|
|
814
|
+
upload_graph_flag = has_graph and click.confirm("Upload graph?", default=True)
|
|
815
|
+
else:
|
|
816
|
+
# Y = upload all available
|
|
817
|
+
upload_dataset_flag = has_dataset
|
|
818
|
+
upload_graph_flag = has_graph
|
|
819
|
+
|
|
820
|
+
# Check if anything to upload
|
|
821
|
+
if not upload_dataset_flag and not upload_graph_flag:
|
|
822
|
+
return None
|
|
823
|
+
|
|
824
|
+
# Ensure authenticated
|
|
825
|
+
if not ensure_authenticated(api_url, headless=headless):
|
|
826
|
+
if headless:
|
|
827
|
+
tui.error(
|
|
828
|
+
"Authentication required. Set DEEPFABRIC_API_KEY environment variable "
|
|
829
|
+
"or run 'deepfabric auth login' first."
|
|
830
|
+
)
|
|
831
|
+
raise click.ClickException("Authentication required for cloud upload")
|
|
832
|
+
tui.info("Skipping cloud upload (not authenticated).")
|
|
833
|
+
return None
|
|
834
|
+
|
|
835
|
+
# Get user info for URL construction
|
|
836
|
+
user_info = get_current_user(api_url)
|
|
837
|
+
username = user_info.get("username") if user_info else None
|
|
838
|
+
|
|
839
|
+
# Upload dataset
|
|
840
|
+
dataset_result = None
|
|
841
|
+
if upload_dataset_flag and dataset_path:
|
|
842
|
+
dataset_result = _perform_upload(
|
|
843
|
+
resource_type="dataset",
|
|
844
|
+
file_path=dataset_path,
|
|
845
|
+
upload_fn=lambda n, s: upload_dataset(
|
|
846
|
+
dataset_path=dataset_path, name=n, slug=s, api_url=api_url
|
|
847
|
+
),
|
|
848
|
+
success_message_fn=lambda r: f"Dataset uploaded: {r.get('samples_count', 0)} samples",
|
|
849
|
+
tui=tui,
|
|
850
|
+
console=console,
|
|
851
|
+
api_url=api_url,
|
|
852
|
+
headless=headless,
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
# Upload graph
|
|
856
|
+
graph_result = None
|
|
857
|
+
if upload_graph_flag and graph_path:
|
|
858
|
+
graph_result = _perform_upload(
|
|
859
|
+
resource_type="graph",
|
|
860
|
+
file_path=graph_path,
|
|
861
|
+
upload_fn=lambda n, s: upload_topic_graph(
|
|
862
|
+
graph_path=graph_path, name=n, slug=s, api_url=api_url
|
|
863
|
+
),
|
|
864
|
+
success_message_fn=lambda r: (
|
|
865
|
+
f"Graph uploaded: {r.get('nodes_imported', 0)} nodes, "
|
|
866
|
+
f"{r.get('edges_imported', 0)} edges"
|
|
867
|
+
),
|
|
868
|
+
tui=tui,
|
|
869
|
+
console=console,
|
|
870
|
+
api_url=api_url,
|
|
871
|
+
headless=headless,
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
# Display results
|
|
875
|
+
if dataset_result or graph_result:
|
|
876
|
+
result = _display_upload_result(tui, dataset_result, graph_result, username, frontend_url)
|
|
877
|
+
|
|
878
|
+
# In headless mode, also output JSON
|
|
879
|
+
if headless:
|
|
880
|
+
tui.console.print(json.dumps(result, indent=2))
|
|
881
|
+
|
|
882
|
+
return result
|
|
883
|
+
|
|
884
|
+
return None
|