anyscale 0.26.31__py3-none-any.whl → 0.26.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anyscale/_private/anyscale_client/anyscale_client.py +15 -0
- anyscale/_private/anyscale_client/common.py +12 -1
- anyscale/_private/anyscale_client/fake_anyscale_client.py +24 -0
- anyscale/_private/docgen/__main__.py +2 -0
- anyscale/_private/docgen/models.md +2 -2
- anyscale/api.py +22 -0
- anyscale/aws_iam_policies.py +0 -3
- anyscale/client/README.md +22 -1
- anyscale/client/openapi_client/__init__.py +17 -0
- anyscale/client/openapi_client/api/default_api.py +611 -157
- anyscale/client/openapi_client/models/__init__.py +17 -0
- anyscale/client/openapi_client/models/baseimagesenum.py +68 -1
- anyscale/client/openapi_client/models/cli_usage_payload.py +440 -0
- anyscale/client/openapi_client/models/cluster_operation.py +266 -0
- anyscale/client/openapi_client/models/cluster_operation_type.py +101 -0
- anyscale/client/openapi_client/models/clusteroperation_response.py +121 -0
- anyscale/client/openapi_client/models/commit_ledger_item_type.py +111 -0
- anyscale/client/openapi_client/models/commit_ledger_record_v2.py +207 -0
- anyscale/client/openapi_client/models/complexity_level.py +101 -0
- anyscale/client/openapi_client/models/credit_grant_record_v2.py +181 -0
- anyscale/client/openapi_client/models/credit_ledger_item_type.py +104 -0
- anyscale/client/openapi_client/models/credit_ledger_record_v2.py +207 -0
- anyscale/client/openapi_client/models/credit_record_commit_v2.py +410 -0
- anyscale/client/openapi_client/models/credit_record_credit_v2.py +410 -0
- anyscale/client/openapi_client/models/credit_type.py +100 -0
- anyscale/client/openapi_client/models/credits_v2.py +355 -0
- anyscale/client/openapi_client/models/operation_error.py +123 -0
- anyscale/client/openapi_client/models/operation_progress.py +123 -0
- anyscale/client/openapi_client/models/operation_result.py +150 -0
- anyscale/client/openapi_client/models/supportedbaseimagesenum.py +68 -1
- anyscale/client/openapi_client/models/workspace_template.py +115 -3
- anyscale/client/openapi_client/models/workspace_template_readme.py +59 -3
- anyscale/cloud/__init__.py +16 -0
- anyscale/cloud/_private/cloud_sdk.py +33 -0
- anyscale/cloud/commands.py +35 -0
- anyscale/commands/cloud_commands.py +35 -0
- anyscale/commands/command_examples.py +6 -0
- anyscale/commands/list_util.py +100 -38
- anyscale/integrations.py +0 -20
- anyscale/scripts.py +1 -0
- anyscale/sdk/anyscale_client/models/baseimagesenum.py +68 -1
- anyscale/sdk/anyscale_client/models/supportedbaseimagesenum.py +68 -1
- anyscale/shared_anyscale_utils/headers.py +4 -0
- anyscale/shared_anyscale_utils/latest_ray_version.py +1 -1
- anyscale/telemetry.py +424 -0
- anyscale/version.py +1 -1
- {anyscale-0.26.31.dist-info → anyscale-0.26.33.dist-info}/METADATA +1 -1
- {anyscale-0.26.31.dist-info → anyscale-0.26.33.dist-info}/RECORD +53 -35
- {anyscale-0.26.31.dist-info → anyscale-0.26.33.dist-info}/LICENSE +0 -0
- {anyscale-0.26.31.dist-info → anyscale-0.26.33.dist-info}/NOTICE +0 -0
- {anyscale-0.26.31.dist-info → anyscale-0.26.33.dist-info}/WHEEL +0 -0
- {anyscale-0.26.31.dist-info → anyscale-0.26.33.dist-info}/entry_points.txt +0 -0
- {anyscale-0.26.31.dist-info → anyscale-0.26.33.dist-info}/top_level.txt +0 -0
anyscale/telemetry.py
ADDED
@@ -0,0 +1,424 @@
|
|
1
|
+
"""
|
2
|
+
Telemetry for Anyscale CLI commands.
|
3
|
+
|
4
|
+
Patches Click to capture execution metrics for _leaf_ commands,
|
5
|
+
including command path, flags, timing, and errors. Emits via
|
6
|
+
HTTP POST (best-effort) or debug print.
|
7
|
+
|
8
|
+
Supports session-based distributed tracing for interactive commands:
|
9
|
+
- Each command gets a unique trace_id for backend correlation
|
10
|
+
- Interactive sessions get a session_id to group related operations
|
11
|
+
- Page fetches get new trace_ids but share the session_id
|
12
|
+
"""
|
13
|
+
|
14
|
+
from contextvars import ContextVar
|
15
|
+
import functools
|
16
|
+
import json
|
17
|
+
import os
|
18
|
+
import random
|
19
|
+
import secrets
|
20
|
+
import sys
|
21
|
+
import threading
|
22
|
+
import time
|
23
|
+
from typing import List, Optional
|
24
|
+
|
25
|
+
import click
|
26
|
+
|
27
|
+
from anyscale.cli_logger import BlockLogger
|
28
|
+
from anyscale.client.openapi_client.models.cli_usage_payload import CLIUsagePayload
|
29
|
+
|
30
|
+
|
31
|
+
# ─── Configuration ────────────────────────────────────────────────────────────
|
32
|
+
|
33
|
+
SAMPLE_RATE = float(os.getenv("ANYSCALE_TELEMETRY_SAMPLE_RATE", "1.0"))
|
34
|
+
TELEMETRY_DEBUG = os.getenv("ANYSCALE_DEBUG") == "1"
|
35
|
+
|
36
|
+
# ContextVar automatically propagates into asyncio tasks if you ever go async.
|
37
|
+
# (Each CLI invocation gets its own interpreter, so this never crosses commands.)
|
38
|
+
_trace_id_var: ContextVar[Optional[str]] = ContextVar("_trace_id_var", default=None)
|
39
|
+
_session_id_var: ContextVar[Optional[str]] = ContextVar("_session_id_var", default=None)
|
40
|
+
_skip_click_patch_var: ContextVar[bool] = ContextVar(
|
41
|
+
"_skip_click_patch_var", default=False
|
42
|
+
)
|
43
|
+
|
44
|
+
logger = BlockLogger()
|
45
|
+
|
46
|
+
# ─── Trace Context Helpers ───────────────────────────────────────────────────
|
47
|
+
|
48
|
+
|
49
|
+
def _setup_trace_context() -> str:
|
50
|
+
"""Ensure we have a trace ID in the ContextVar, and return it."""
|
51
|
+
try:
|
52
|
+
tid = _trace_id_var.get()
|
53
|
+
if tid is None:
|
54
|
+
tid = secrets.token_hex(16)
|
55
|
+
_trace_id_var.set(tid)
|
56
|
+
logger.debug(f"[TRACE DEBUG] trace-id={tid}")
|
57
|
+
return tid
|
58
|
+
except Exception: # noqa: BLE001
|
59
|
+
# Fallback to a default trace ID if anything goes wrong
|
60
|
+
return secrets.token_hex(16)
|
61
|
+
|
62
|
+
|
63
|
+
def get_traceparent() -> Optional[str]:
|
64
|
+
"""Return a W3C-style traceparent header, or None if not initialized."""
|
65
|
+
try:
|
66
|
+
tid = _trace_id_var.get()
|
67
|
+
if not tid:
|
68
|
+
return None
|
69
|
+
return f"00-{tid}-{'0'*16}-01"
|
70
|
+
except Exception: # noqa: BLE001
|
71
|
+
return None
|
72
|
+
|
73
|
+
|
74
|
+
def start_interactive_session() -> str:
|
75
|
+
"""Start an interactive session and return the session ID."""
|
76
|
+
try:
|
77
|
+
session_id = secrets.token_hex(8)
|
78
|
+
_session_id_var.set(session_id)
|
79
|
+
logger.debug(f"[TRACE DEBUG] session-id={session_id}")
|
80
|
+
return session_id
|
81
|
+
except Exception: # noqa: BLE001
|
82
|
+
# Return a fallback session ID
|
83
|
+
return secrets.token_hex(8)
|
84
|
+
|
85
|
+
|
86
|
+
def new_trace_for_page() -> str:
|
87
|
+
"""Generate a new trace ID for the next page in an interactive session."""
|
88
|
+
try:
|
89
|
+
new_trace_id = secrets.token_hex(16)
|
90
|
+
_trace_id_var.set(new_trace_id)
|
91
|
+
logger.debug(f"[TRACE DEBUG] new-trace-id={new_trace_id}")
|
92
|
+
return new_trace_id
|
93
|
+
except Exception: # noqa: BLE001
|
94
|
+
# Return a fallback trace ID
|
95
|
+
return secrets.token_hex(16)
|
96
|
+
|
97
|
+
|
98
|
+
# ─── CLI Arg Extraction ───────────────────────────────────────────────────────
|
99
|
+
|
100
|
+
|
101
|
+
def _get_user_flags() -> List[str]:
|
102
|
+
"""Return all `-x`/`--long` flags from the raw argv (no values)."""
|
103
|
+
try:
|
104
|
+
args = sys.argv[1:]
|
105
|
+
# Strip off the program name if Click added it
|
106
|
+
if args and args[0] in ("anyscale", "main"):
|
107
|
+
args = args[1:]
|
108
|
+
return [a for a in args if a.startswith("-")]
|
109
|
+
except Exception: # noqa: BLE001
|
110
|
+
return []
|
111
|
+
|
112
|
+
|
113
|
+
def _get_user_options(ctx: click.Context) -> List[str]:
|
114
|
+
"""Return the names of parameters explicitly set via the CLI."""
|
115
|
+
try:
|
116
|
+
opts: List[str] = []
|
117
|
+
for name in ctx.params:
|
118
|
+
try:
|
119
|
+
if (
|
120
|
+
ctx.get_parameter_source(name)
|
121
|
+
is click.core.ParameterSource.COMMANDLINE
|
122
|
+
):
|
123
|
+
opts.append(name)
|
124
|
+
except Exception: # noqa: BLE001
|
125
|
+
opts.append(name)
|
126
|
+
return opts
|
127
|
+
except Exception: # noqa: BLE001
|
128
|
+
return []
|
129
|
+
|
130
|
+
|
131
|
+
# ─── Page Fetch Tracking ─────────────────────────────────────────────────────
|
132
|
+
|
133
|
+
_page_fetch_start_time: ContextVar[Optional[float]] = ContextVar(
|
134
|
+
"_page_fetch_start_time", default=None
|
135
|
+
)
|
136
|
+
|
137
|
+
|
138
|
+
def mark_page_fetch_start(page_number: int) -> None:
|
139
|
+
"""
|
140
|
+
Mark the start of a page fetch operation. This will:
|
141
|
+
1. Generate a new trace ID for this page
|
142
|
+
2. Start timing the fetch operation
|
143
|
+
|
144
|
+
Args:
|
145
|
+
page_number: The page number being fetched (1-indexed)
|
146
|
+
"""
|
147
|
+
try:
|
148
|
+
if SAMPLE_RATE <= 0 or random.random() > SAMPLE_RATE:
|
149
|
+
return
|
150
|
+
|
151
|
+
# Generate new trace ID for this page BEFORE making the API request
|
152
|
+
new_trace_for_page()
|
153
|
+
|
154
|
+
# Start timing
|
155
|
+
_page_fetch_start_time.set(time.perf_counter())
|
156
|
+
|
157
|
+
logger.debug(f"[TRACE DEBUG] page-fetch-start page={page_number}")
|
158
|
+
except Exception: # noqa: BLE001
|
159
|
+
# Telemetry should never crash the CLI
|
160
|
+
pass
|
161
|
+
|
162
|
+
|
163
|
+
def mark_page_fetch_complete(page_number: int) -> None:
|
164
|
+
"""
|
165
|
+
Mark the completion of a page fetch operation and emit telemetry.
|
166
|
+
This calculates the duration and sends the page_fetch event.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
page_number: The page number that was fetched (1-indexed)
|
170
|
+
"""
|
171
|
+
try:
|
172
|
+
if SAMPLE_RATE <= 0 or random.random() > SAMPLE_RATE:
|
173
|
+
return
|
174
|
+
|
175
|
+
# Calculate duration
|
176
|
+
start_time = _page_fetch_start_time.get()
|
177
|
+
if start_time is None:
|
178
|
+
# Fallback if timing wasn't started properly
|
179
|
+
duration_ms = 0.0
|
180
|
+
else:
|
181
|
+
duration_ms = (time.perf_counter() - start_time) * 1000
|
182
|
+
|
183
|
+
# Get current click context
|
184
|
+
try:
|
185
|
+
ctx = click.get_current_context()
|
186
|
+
except RuntimeError:
|
187
|
+
return # No active context
|
188
|
+
|
189
|
+
# Get current trace ID (should be the one we generated in mark_page_fetch_start)
|
190
|
+
trace_id = _trace_id_var.get()
|
191
|
+
if not trace_id:
|
192
|
+
return
|
193
|
+
|
194
|
+
# Emit page fetch telemetry
|
195
|
+
body = _create_payload(
|
196
|
+
trace_id=trace_id,
|
197
|
+
ctx=ctx,
|
198
|
+
duration_ms=duration_ms,
|
199
|
+
exit_code=0,
|
200
|
+
exception_type=None,
|
201
|
+
event_type="page_fetch",
|
202
|
+
page_number=page_number,
|
203
|
+
)
|
204
|
+
_emit_telemetry(body)
|
205
|
+
|
206
|
+
# Reset timing
|
207
|
+
_page_fetch_start_time.set(None)
|
208
|
+
|
209
|
+
logger.debug(
|
210
|
+
f"[TRACE DEBUG] page-fetch-complete page={page_number} duration={duration_ms:.2f}ms"
|
211
|
+
)
|
212
|
+
except Exception: # noqa: BLE001
|
213
|
+
# Telemetry should never crash the CLI
|
214
|
+
pass
|
215
|
+
|
216
|
+
|
217
|
+
# ─── Payload Construction ────────────────────────────────────────────────────
|
218
|
+
|
219
|
+
|
220
|
+
def _create_payload(
|
221
|
+
trace_id: str,
|
222
|
+
ctx: click.Context,
|
223
|
+
duration_ms: float,
|
224
|
+
exit_code: int,
|
225
|
+
exception_type: Optional[str],
|
226
|
+
event_type: str = "command",
|
227
|
+
page_number: Optional[int] = None,
|
228
|
+
) -> CLIUsagePayload:
|
229
|
+
"""
|
230
|
+
Build a Typed CLIUsagePayload (from the generated OpenAPI models)
|
231
|
+
so we get IDE/type-checker support on all the fields.
|
232
|
+
|
233
|
+
Args:
|
234
|
+
trace_id: Unique trace identifier for this operation
|
235
|
+
ctx: Click context containing command information
|
236
|
+
duration_ms: Command/operation duration in milliseconds
|
237
|
+
exit_code: Command exit code (0 for success, 1 for error)
|
238
|
+
exception_type: Exception class name if command failed
|
239
|
+
event_type: Type of event ("command" or "page_fetch")
|
240
|
+
page_number: Page number for page_fetch events
|
241
|
+
"""
|
242
|
+
try:
|
243
|
+
# Get session ID if available
|
244
|
+
session_id = _session_id_var.get()
|
245
|
+
|
246
|
+
data = {
|
247
|
+
"trace_id": trace_id,
|
248
|
+
"session_id": session_id,
|
249
|
+
"event_type": event_type,
|
250
|
+
"page_number": page_number,
|
251
|
+
"cmd_path": ctx.command_path,
|
252
|
+
"options": sorted(_get_user_options(ctx)),
|
253
|
+
"flags_used": sorted(_get_user_flags()),
|
254
|
+
"duration_ms": round(duration_ms, 2),
|
255
|
+
"exit_code": exit_code,
|
256
|
+
"exception_type": exception_type,
|
257
|
+
"cli_version": getattr(sys.modules.get("anyscale"), "__version__", None),
|
258
|
+
"python_version": f"{sys.version_info.major}.{sys.version_info.minor}",
|
259
|
+
"timestamp": int(time.time()),
|
260
|
+
}
|
261
|
+
return CLIUsagePayload(**data)
|
262
|
+
except Exception: # noqa: BLE001
|
263
|
+
# Fallback payload with minimal data if construction fails
|
264
|
+
fallback_data = {
|
265
|
+
"trace_id": trace_id,
|
266
|
+
"cmd_path": "unknown",
|
267
|
+
"duration_ms": duration_ms,
|
268
|
+
"exit_code": exit_code,
|
269
|
+
"timestamp": int(time.time()),
|
270
|
+
}
|
271
|
+
return CLIUsagePayload(**fallback_data)
|
272
|
+
|
273
|
+
|
274
|
+
def mark_command_complete() -> None:
|
275
|
+
"""
|
276
|
+
Mark that the command logic has completed and emit telemetry immediately.
|
277
|
+
For interactive commands, call this when data is ready but before user interaction.
|
278
|
+
This will prevent the Click patch from double-emitting.
|
279
|
+
"""
|
280
|
+
try:
|
281
|
+
trace_id = _trace_id_var.get()
|
282
|
+
if not trace_id:
|
283
|
+
return
|
284
|
+
|
285
|
+
# Get current click context
|
286
|
+
try:
|
287
|
+
ctx = click.get_current_context()
|
288
|
+
except RuntimeError:
|
289
|
+
return # No active context
|
290
|
+
|
291
|
+
# Calculate duration from the click context if available
|
292
|
+
# For interactive commands, we want the time up to this point
|
293
|
+
start_time = getattr(ctx, "telemetry_start_time", None)
|
294
|
+
if start_time is None:
|
295
|
+
# Fallback: use a minimal duration
|
296
|
+
duration_ms = 0.0
|
297
|
+
else:
|
298
|
+
duration_ms = (time.perf_counter() - start_time) * 1000
|
299
|
+
|
300
|
+
# Emit the command completion event
|
301
|
+
body = _create_payload(
|
302
|
+
trace_id=trace_id,
|
303
|
+
ctx=ctx,
|
304
|
+
duration_ms=duration_ms,
|
305
|
+
exit_code=0,
|
306
|
+
exception_type=None,
|
307
|
+
event_type="command",
|
308
|
+
)
|
309
|
+
_emit_telemetry(body)
|
310
|
+
|
311
|
+
# Prevent Click patch from emitting again
|
312
|
+
_skip_click_patch_var.set(True)
|
313
|
+
except Exception: # noqa: BLE001
|
314
|
+
# Telemetry should never crash the CLI
|
315
|
+
pass
|
316
|
+
|
317
|
+
|
318
|
+
# ─── Emission (fire-&-forget) ─────────────────────────────────────────────────
|
319
|
+
|
320
|
+
|
321
|
+
def _emit_telemetry(body: CLIUsagePayload) -> None:
|
322
|
+
"""
|
323
|
+
Send the payload to the console API. Runs in a short-lived thread
|
324
|
+
so we never block the CLI for more than ~3 seconds.
|
325
|
+
"""
|
326
|
+
try:
|
327
|
+
logger.debug(json.dumps(body.to_dict(), indent=2))
|
328
|
+
|
329
|
+
def _worker():
|
330
|
+
try:
|
331
|
+
# Lazy imports to avoid circular deps
|
332
|
+
from anyscale.authenticate import get_auth_api_client
|
333
|
+
from anyscale.client.openapi_client.api.default_api import DefaultApi
|
334
|
+
|
335
|
+
auth_block = get_auth_api_client()
|
336
|
+
api = DefaultApi(api_client=auth_block.anyscale_api_client)
|
337
|
+
api.receive_cli_usage_api_v2_cli_usage_post(
|
338
|
+
cli_usage_payload=body, _request_timeout=2
|
339
|
+
)
|
340
|
+
except Exception: # noqa: BLE001
|
341
|
+
# Best-effort only - never crash the CLI
|
342
|
+
pass
|
343
|
+
|
344
|
+
thread = threading.Thread(target=_worker, daemon=False)
|
345
|
+
thread.start()
|
346
|
+
thread.join(timeout=3)
|
347
|
+
except Exception: # noqa: BLE001
|
348
|
+
# Telemetry should never crash the CLI
|
349
|
+
pass
|
350
|
+
|
351
|
+
|
352
|
+
# ─── Click Patch ─────────────────────────────────────────────────────────────
|
353
|
+
|
354
|
+
|
355
|
+
def _patch_click() -> None:
|
356
|
+
"""Monkey-patch Click so that each leaf command emits telemetry."""
|
357
|
+
try:
|
358
|
+
if getattr(click, "_anyscale_telemetry_patched", False):
|
359
|
+
return
|
360
|
+
|
361
|
+
original_invoke = click.Command.invoke
|
362
|
+
|
363
|
+
@functools.wraps(original_invoke)
|
364
|
+
def instrumented_invoke(self, ctx, *args, **kwargs):
|
365
|
+
try:
|
366
|
+
# Sampling
|
367
|
+
if SAMPLE_RATE <= 0 or random.random() > SAMPLE_RATE:
|
368
|
+
return original_invoke(self, ctx, *args, **kwargs)
|
369
|
+
# Only instrument leaf commands
|
370
|
+
if isinstance(self, click.Group):
|
371
|
+
return original_invoke(self, ctx, *args, **kwargs)
|
372
|
+
|
373
|
+
trace_id = _setup_trace_context()
|
374
|
+
start = time.perf_counter()
|
375
|
+
|
376
|
+
# Store start time in context for interactive commands
|
377
|
+
ctx.telemetry_start_time = start
|
378
|
+
|
379
|
+
code, exc = 0, None
|
380
|
+
should_emit_telemetry = True
|
381
|
+
|
382
|
+
try:
|
383
|
+
result = original_invoke(self, ctx, *args, **kwargs)
|
384
|
+
return result
|
385
|
+
except Exception as e:
|
386
|
+
code, exc = 1, e.__class__.__name__
|
387
|
+
raise
|
388
|
+
finally:
|
389
|
+
# Only emit telemetry once per command invocation
|
390
|
+
if _skip_click_patch_var.get():
|
391
|
+
should_emit_telemetry = False
|
392
|
+
|
393
|
+
if should_emit_telemetry:
|
394
|
+
try:
|
395
|
+
# Use actual end time for non-interactive commands
|
396
|
+
dur = (time.perf_counter() - start) * 1_000
|
397
|
+
body = _create_payload(
|
398
|
+
trace_id=trace_id,
|
399
|
+
ctx=ctx,
|
400
|
+
duration_ms=dur,
|
401
|
+
exit_code=code,
|
402
|
+
exception_type=exc,
|
403
|
+
event_type="command",
|
404
|
+
)
|
405
|
+
_emit_telemetry(body)
|
406
|
+
_skip_click_patch_var.set(True)
|
407
|
+
except Exception: # noqa: BLE001
|
408
|
+
# Telemetry should never crash the CLI
|
409
|
+
pass
|
410
|
+
except Exception: # noqa: BLE001
|
411
|
+
# If telemetry setup fails, just run the original command
|
412
|
+
return original_invoke(self, ctx, *args, **kwargs)
|
413
|
+
|
414
|
+
click.Command.invoke = instrumented_invoke
|
415
|
+
click._anyscale_telemetry_patched = ( # noqa: SLF001 # type: ignore[attr-defined]
|
416
|
+
True
|
417
|
+
)
|
418
|
+
except Exception: # noqa: BLE001
|
419
|
+
# If patching fails, telemetry just won't work - don't crash the CLI
|
420
|
+
pass
|
421
|
+
|
422
|
+
|
423
|
+
# Auto-patch on import
|
424
|
+
_patch_click()
|
anyscale/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.26.
|
1
|
+
__version__ = "0.26.33"
|