arize-phoenix 4.4.4rc4__py3-none-any.whl → 4.4.4rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (52) hide show
  1. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/METADATA +12 -6
  2. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/RECORD +47 -42
  3. phoenix/config.py +21 -0
  4. phoenix/datetime_utils.py +4 -0
  5. phoenix/db/insertion/dataset.py +19 -16
  6. phoenix/db/insertion/evaluation.py +4 -4
  7. phoenix/db/insertion/helpers.py +4 -12
  8. phoenix/db/insertion/span.py +3 -3
  9. phoenix/db/migrations/versions/10460e46d750_datasets.py +2 -2
  10. phoenix/db/models.py +8 -3
  11. phoenix/experiments/__init__.py +6 -0
  12. phoenix/experiments/evaluators/__init__.py +29 -0
  13. phoenix/experiments/evaluators/base.py +153 -0
  14. phoenix/{datasets → experiments}/evaluators/code_evaluators.py +25 -53
  15. phoenix/{datasets → experiments}/evaluators/llm_evaluators.py +62 -31
  16. phoenix/experiments/evaluators/utils.py +189 -0
  17. phoenix/experiments/functions.py +616 -0
  18. phoenix/{datasets → experiments}/tracing.py +19 -0
  19. phoenix/experiments/types.py +722 -0
  20. phoenix/experiments/utils.py +9 -0
  21. phoenix/server/api/context.py +4 -0
  22. phoenix/server/api/dataloaders/__init__.py +4 -0
  23. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  24. phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
  25. phoenix/server/api/helpers/dataset_helpers.py +8 -7
  26. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  27. phoenix/server/api/mutations/project_mutations.py +9 -4
  28. phoenix/server/api/routers/v1/__init__.py +1 -1
  29. phoenix/server/api/routers/v1/dataset_examples.py +10 -10
  30. phoenix/server/api/routers/v1/datasets.py +152 -48
  31. phoenix/server/api/routers/v1/evaluations.py +4 -11
  32. phoenix/server/api/routers/v1/experiment_evaluations.py +23 -23
  33. phoenix/server/api/routers/v1/experiment_runs.py +5 -17
  34. phoenix/server/api/routers/v1/experiments.py +5 -5
  35. phoenix/server/api/routers/v1/spans.py +6 -4
  36. phoenix/server/api/types/Experiment.py +12 -0
  37. phoenix/server/api/types/ExperimentRun.py +1 -1
  38. phoenix/server/api/types/ExperimentRunAnnotation.py +1 -1
  39. phoenix/server/app.py +4 -0
  40. phoenix/server/static/index.js +712 -588
  41. phoenix/session/client.py +321 -28
  42. phoenix/trace/fixtures.py +6 -6
  43. phoenix/utilities/json.py +8 -8
  44. phoenix/version.py +1 -1
  45. phoenix/datasets/__init__.py +0 -0
  46. phoenix/datasets/evaluators/__init__.py +0 -18
  47. phoenix/datasets/evaluators/_utils.py +0 -13
  48. phoenix/datasets/experiments.py +0 -485
  49. phoenix/datasets/types.py +0 -212
  50. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/WHEEL +0 -0
  51. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/IP_NOTICE +0 -0
  52. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/LICENSE +0 -0
@@ -1,485 +0,0 @@
1
- import functools
2
- import json
3
- from binascii import hexlify
4
- from contextlib import ExitStack
5
- from copy import deepcopy
6
- from datetime import datetime, timezone
7
- from itertools import product
8
- from typing import (
9
- Any,
10
- Awaitable,
11
- Callable,
12
- Coroutine,
13
- Iterable,
14
- Mapping,
15
- Optional,
16
- Tuple,
17
- Type,
18
- Union,
19
- cast,
20
- )
21
- from urllib.parse import urljoin
22
-
23
- import httpx
24
- import opentelemetry.sdk.trace as trace_sdk
25
- from openinference.semconv.resource import ResourceAttributes
26
- from openinference.semconv.trace import (
27
- OpenInferenceMimeTypeValues,
28
- OpenInferenceSpanKindValues,
29
- SpanAttributes,
30
- )
31
- from opentelemetry.context import Context
32
- from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
33
- from opentelemetry.sdk.resources import Resource
34
- from opentelemetry.sdk.trace import Span
35
- from opentelemetry.sdk.trace.export import SimpleSpanProcessor
36
- from opentelemetry.trace import Status, StatusCode
37
- from typing_extensions import TypeAlias
38
-
39
- from phoenix.config import (
40
- get_env_client_headers,
41
- get_env_collector_endpoint,
42
- get_env_host,
43
- get_env_port,
44
- )
45
- from phoenix.datasets.tracing import capture_spans
46
- from phoenix.datasets.types import (
47
- CanAsyncEvaluate,
48
- CanEvaluate,
49
- Dataset,
50
- EvaluationResult,
51
- Example,
52
- Experiment,
53
- ExperimentEvaluationRun,
54
- ExperimentEvaluator,
55
- ExperimentResult,
56
- ExperimentRun,
57
- ExperimentRunId,
58
- JSONSerializable,
59
- TestCase,
60
- )
61
- from phoenix.evals.executors import get_executor_on_sync_context
62
- from phoenix.evals.models.rate_limiters import RateLimiter
63
- from phoenix.evals.utils import get_tqdm_progress_bar_formatter
64
- from phoenix.session.session import active_session
65
- from phoenix.trace.attributes import flatten
66
- from phoenix.utilities.json import jsonify
67
-
68
- ExperimentTask: TypeAlias = Union[
69
- Callable[[Example], JSONSerializable],
70
- Callable[[Example], Coroutine[None, None, JSONSerializable]],
71
- ]
72
-
73
-
74
- def _get_base_url() -> str:
75
- host = get_env_host()
76
- if host == "0.0.0.0":
77
- host = "127.0.0.1"
78
- base_url = get_env_collector_endpoint() or f"http://{host}:{get_env_port()}"
79
- return base_url if base_url.endswith("/") else base_url + "/"
80
-
81
-
82
- def _get_web_base_url() -> str:
83
- """Return the web UI base URL.
84
-
85
- Returns:
86
- str: the web UI base URL
87
- """
88
- if session := active_session():
89
- return session.url
90
- return _get_base_url()
91
-
92
-
93
- def _get_experiment_url(*, dataset_id: str, experiment_id: str) -> str:
94
- return f"{_get_web_base_url()}datasets/{dataset_id}/compare?experimentId={experiment_id}"
95
-
96
-
97
- def _get_dataset_experiments_url(*, dataset_id: str) -> str:
98
- return f"{_get_web_base_url()}datasets/{dataset_id}/experiments"
99
-
100
-
101
- def _phoenix_client() -> httpx.Client:
102
- headers = get_env_client_headers()
103
- client = httpx.Client(base_url=_get_base_url(), headers=headers)
104
- return client
105
-
106
-
107
- def run_experiment(
108
- dataset: Dataset,
109
- task: ExperimentTask,
110
- *,
111
- experiment_name: Optional[str] = None,
112
- experiment_description: Optional[str] = None,
113
- experiment_metadata: Optional[Mapping[str, Any]] = None,
114
- evaluators: Optional[Union[ExperimentEvaluator, Iterable[ExperimentEvaluator]]] = None,
115
- rate_limit_errors: Optional[Union[Type[BaseException], Tuple[Type[BaseException], ...]]] = None,
116
- ) -> Experiment:
117
- # Add this to the params once supported in the UI
118
- repetitions = 1
119
- assert repetitions > 0, "Must run the experiment at least once."
120
-
121
- client = _phoenix_client()
122
-
123
- experiment_response = client.post(
124
- f"/v1/datasets/{dataset.id}/experiments",
125
- json={
126
- "version-id": dataset.version_id,
127
- "name": experiment_name,
128
- "description": experiment_description,
129
- "metadata": experiment_metadata,
130
- "repetitions": repetitions,
131
- },
132
- )
133
- experiment_response.raise_for_status()
134
- exp_json = experiment_response.json()
135
- experiment_id = exp_json["id"]
136
- project_name = exp_json["project_name"]
137
-
138
- resource = Resource({ResourceAttributes.PROJECT_NAME: project_name} if project_name else {})
139
- tracer_provider = trace_sdk.TracerProvider(resource=resource)
140
- tracer_provider.add_span_processor(
141
- SimpleSpanProcessor(OTLPSpanExporter(urljoin(f"{_get_base_url()}", "v1/traces")))
142
- )
143
- tracer = tracer_provider.get_tracer(__name__)
144
- root_span_name = f"Task: {task.__qualname__}"
145
- root_span_kind = CHAIN.value
146
-
147
- dataset_experiments_url = _get_dataset_experiments_url(dataset_id=dataset.id)
148
- experiment_compare_url = _get_experiment_url(dataset_id=dataset.id, experiment_id=experiment_id)
149
- print("🧪 Experiment started.")
150
- print(f"📺 View dataset experiments: {dataset_experiments_url}")
151
- print(f"🔗 View this experiment: {experiment_compare_url}")
152
-
153
- errors: Tuple[Optional[Type[BaseException]], ...]
154
- if not hasattr(rate_limit_errors, "__iter__"):
155
- errors = (rate_limit_errors,)
156
- else:
157
- rate_limit_errors = cast(Tuple[Type[BaseException], ...], rate_limit_errors)
158
- errors = rate_limit_errors
159
-
160
- rate_limiters = [RateLimiter(rate_limit_error=rate_limit_error) for rate_limit_error in errors]
161
-
162
- def sync_run_experiment(test_case: TestCase) -> ExperimentRun:
163
- example, repetition_number = test_case.example, test_case.repetition_number
164
- output = None
165
- error: Optional[BaseException] = None
166
- status = Status(StatusCode.OK)
167
- with ExitStack() as stack:
168
- span: Span = stack.enter_context(
169
- tracer.start_as_current_span(root_span_name, context=Context())
170
- )
171
- stack.enter_context(capture_spans(resource))
172
- try:
173
- # Do not use keyword arguments, which can fail at runtime
174
- # even when function obeys protocol, because keyword arguments
175
- # are implementation details.
176
- _output = task(example)
177
- if isinstance(_output, Awaitable):
178
- raise RuntimeError("Task is async but running in sync context")
179
- else:
180
- output = _output
181
- except BaseException as exc:
182
- span.record_exception(exc)
183
- status = Status(StatusCode.ERROR, f"{type(exc).__name__}: {exc}")
184
- error = exc
185
- span.set_attribute(INPUT_VALUE, json.dumps(example.input, ensure_ascii=False))
186
- span.set_attribute(INPUT_MIME_TYPE, JSON.value)
187
- if result := ExperimentResult(result=output) if output is not None else None:
188
- if isinstance(output, str):
189
- span.set_attribute(OUTPUT_VALUE, output)
190
- else:
191
- span.set_attribute(OUTPUT_VALUE, json.dumps(output, ensure_ascii=False))
192
- span.set_attribute(OUTPUT_MIME_TYPE, JSON.value)
193
- span.set_attributes(dict(flatten(jsonify(result), recurse_on_sequence=True)))
194
- span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND, root_span_kind)
195
- span.set_status(status)
196
-
197
- assert isinstance(
198
- output, (dict, list, str, int, float, bool, type(None))
199
- ), "Output must be JSON serializable"
200
- experiment_run = ExperimentRun(
201
- start_time=_decode_unix_nano(cast(int, span.start_time)),
202
- end_time=_decode_unix_nano(cast(int, span.end_time)),
203
- experiment_id=experiment_id,
204
- dataset_example_id=example.id,
205
- repetition_number=repetition_number,
206
- output=result,
207
- error=repr(error) if error else None,
208
- trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
209
- )
210
- return experiment_run
211
-
212
- async def async_run_experiment(test_case: TestCase) -> ExperimentRun:
213
- example, repetition_number = test_case.example, test_case.repetition_number
214
- output = None
215
- error: Optional[BaseException] = None
216
- status = Status(StatusCode.OK)
217
- with ExitStack() as stack:
218
- span: Span = stack.enter_context(
219
- tracer.start_as_current_span(root_span_name, context=Context())
220
- )
221
- stack.enter_context(capture_spans(resource))
222
- try:
223
- # Do not use keyword arguments, which can fail at runtime
224
- # even when function obeys protocol, because keyword arguments
225
- # are implementation details.
226
- _output = task(example)
227
- if isinstance(_output, Awaitable):
228
- output = await _output
229
- else:
230
- output = _output
231
- except BaseException as exc:
232
- span.record_exception(exc)
233
- status = Status(StatusCode.ERROR, f"{type(exc).__name__}: {exc}")
234
- error = exc
235
- span.set_attribute(INPUT_VALUE, json.dumps(example.input, ensure_ascii=False))
236
- span.set_attribute(INPUT_MIME_TYPE, JSON.value)
237
- if result := ExperimentResult(result=output) if output is not None else None:
238
- if isinstance(output, str):
239
- span.set_attribute(OUTPUT_VALUE, output)
240
- else:
241
- span.set_attribute(OUTPUT_VALUE, json.dumps(output, ensure_ascii=False))
242
- span.set_attribute(OUTPUT_MIME_TYPE, JSON.value)
243
- span.set_attributes(dict(flatten(jsonify(result), recurse_on_sequence=True)))
244
- span.set_attribute(OPENINFERENCE_SPAN_KIND, root_span_kind)
245
- span.set_status(status)
246
-
247
- assert isinstance(
248
- output, (dict, list, str, int, float, bool, type(None))
249
- ), "Output must be JSON serializable"
250
- experiment_run = ExperimentRun(
251
- start_time=_decode_unix_nano(cast(int, span.start_time)),
252
- end_time=_decode_unix_nano(cast(int, span.end_time)),
253
- experiment_id=experiment_id,
254
- dataset_example_id=example.id,
255
- repetition_number=repetition_number,
256
- output=result,
257
- error=repr(error) if error else None,
258
- trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
259
- )
260
- return experiment_run
261
-
262
- rate_limited_sync_run_experiment = functools.reduce(
263
- lambda fn, limiter: limiter.limit(fn), rate_limiters, sync_run_experiment
264
- )
265
- rate_limited_async_run_experiment = functools.reduce(
266
- lambda fn, limiter: limiter.alimit(fn), rate_limiters, async_run_experiment
267
- )
268
-
269
- executor = get_executor_on_sync_context(
270
- rate_limited_sync_run_experiment,
271
- rate_limited_async_run_experiment,
272
- max_retries=0,
273
- exit_on_error=False,
274
- fallback_return_value=None,
275
- tqdm_bar_format=get_tqdm_progress_bar_formatter("running tasks"),
276
- )
277
-
278
- test_cases = [
279
- TestCase(example=ex, repetition_number=rep)
280
- for ex, rep in product(dataset.examples, range(1, repetitions + 1))
281
- ]
282
- experiment_payloads, _execution_details = executor.run(test_cases)
283
- for payload in experiment_payloads:
284
- if payload is not None:
285
- resp = client.post(f"/v1/experiments/{experiment_id}/runs", json=jsonify(payload))
286
- resp.raise_for_status()
287
-
288
- experiment = Experiment(
289
- id=experiment_id,
290
- dataset_id=dataset.id,
291
- dataset_version_id=dataset.version_id,
292
- project_name=project_name,
293
- )
294
-
295
- print("✅ Task runs completed.")
296
- print("🧠 Evaluation started.")
297
-
298
- if evaluators is not None:
299
- _evaluate_experiment(experiment, evaluators, dataset.examples, client)
300
-
301
- return experiment
302
-
303
-
304
- def evaluate_experiment(
305
- experiment: Experiment,
306
- evaluators: Union[ExperimentEvaluator, Iterable[ExperimentEvaluator]],
307
- ) -> None:
308
- client = _phoenix_client()
309
- dataset_id = experiment.dataset_id
310
- dataset_version_id = experiment.dataset_version_id
311
-
312
- dataset_examples = [
313
- Example.from_dict(ex)
314
- for ex in (
315
- client.get(
316
- f"/v1/datasets/{dataset_id}/examples",
317
- params={"version-id": str(dataset_version_id)},
318
- )
319
- .json()
320
- .get("data", {})
321
- .get("examples", [])
322
- )
323
- ]
324
- _evaluate_experiment(experiment, evaluators, dataset_examples, client)
325
-
326
-
327
- ExperimentEvaluatorName: TypeAlias = str
328
-
329
-
330
- def _evaluate_experiment(
331
- experiment: Experiment,
332
- evaluators: Union[ExperimentEvaluator, Iterable[ExperimentEvaluator]],
333
- dataset_examples: Iterable[Example],
334
- client: httpx.Client,
335
- ) -> None:
336
- if isinstance(evaluators, (CanEvaluate, CanAsyncEvaluate)):
337
- evaluators = [evaluators]
338
-
339
- experiment_id = experiment.id
340
-
341
- experiment_runs = [
342
- ExperimentRun.from_dict(exp_run)
343
- for exp_run in client.get(f"/v1/experiments/{experiment_id}/runs").json()
344
- ]
345
-
346
- # not all dataset examples have associated experiment runs, so we need to pair them up
347
- example_run_pairs = []
348
- examples_by_id = {example.id: example for example in dataset_examples}
349
- for exp_run in experiment_runs:
350
- example = examples_by_id.get(exp_run.dataset_example_id)
351
- if example:
352
- example_run_pairs.append((deepcopy(example), exp_run))
353
- evaluation_inputs = [
354
- (example, run, evaluator.name, evaluator)
355
- for (example, run), evaluator in product(example_run_pairs, evaluators)
356
- ]
357
-
358
- project_name = "evaluators"
359
- resource = Resource({ResourceAttributes.PROJECT_NAME: project_name} if project_name else {})
360
- tracer_provider = trace_sdk.TracerProvider(resource=resource)
361
- tracer_provider.add_span_processor(
362
- SimpleSpanProcessor(OTLPSpanExporter(urljoin(f"{_get_base_url()}", "v1/traces")))
363
- )
364
- tracer = tracer_provider.get_tracer(__name__)
365
- root_span_kind = "EVALUATOR"
366
-
367
- def sync_evaluate_run(
368
- obj: Tuple[Example, ExperimentRun, ExperimentEvaluatorName, ExperimentEvaluator],
369
- ) -> ExperimentEvaluationRun:
370
- example, experiment_run, name, evaluator = obj
371
- result: Optional[EvaluationResult] = None
372
- error: Optional[BaseException] = None
373
- status = Status(StatusCode.OK)
374
- root_span_name = f"Evaluation: {name}"
375
- with ExitStack() as stack:
376
- span: Span = stack.enter_context(
377
- tracer.start_as_current_span(root_span_name, context=Context())
378
- )
379
- stack.enter_context(capture_spans(resource))
380
- try:
381
- # Do not use keyword arguments, which can fail at runtime
382
- # even when function obeys protocol, because keyword arguments
383
- # are implementation details.
384
- if not isinstance(evaluator, CanEvaluate):
385
- raise RuntimeError("Task is async but running in sync context")
386
- _output = evaluator.evaluate(example, experiment_run)
387
- if isinstance(_output, Awaitable):
388
- raise RuntimeError("Task is async but running in sync context")
389
- result = _output
390
- except BaseException as exc:
391
- span.record_exception(exc)
392
- status = Status(StatusCode.ERROR, f"{type(exc).__name__}: {exc}")
393
- error = exc
394
- span.set_attributes(dict(flatten(jsonify(result), recurse_on_sequence=True)))
395
- span.set_attribute(OPENINFERENCE_SPAN_KIND, root_span_kind)
396
- span.set_status(status)
397
-
398
- evaluator_payload = ExperimentEvaluationRun(
399
- experiment_run_id=cast(ExperimentRunId, experiment_run.id),
400
- start_time=_decode_unix_nano(cast(int, span.start_time)),
401
- end_time=_decode_unix_nano(cast(int, span.end_time)),
402
- name=evaluator.name,
403
- annotator_kind=evaluator.annotator_kind,
404
- error=repr(error) if error else None,
405
- result=result,
406
- trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
407
- )
408
- return evaluator_payload
409
-
410
- async def async_evaluate_run(
411
- obj: Tuple[Example, ExperimentRun, ExperimentEvaluatorName, ExperimentEvaluator],
412
- ) -> ExperimentEvaluationRun:
413
- example, experiment_run, name, evaluator = obj
414
- result: Optional[EvaluationResult] = None
415
- error: Optional[BaseException] = None
416
- status = Status(StatusCode.OK)
417
- root_span_name = f"Evaluation: {name}"
418
- with ExitStack() as stack:
419
- span: Span = stack.enter_context(
420
- tracer.start_as_current_span(root_span_name, context=Context())
421
- )
422
- stack.enter_context(capture_spans(resource))
423
- try:
424
- # Do not use keyword arguments, which can fail at runtime
425
- # even when function obeys protocol, because keyword arguments
426
- # are implementation details.
427
- if isinstance(evaluator, CanAsyncEvaluate):
428
- result = await evaluator.async_evaluate(example, experiment_run)
429
- else:
430
- _output = evaluator.evaluate(example, experiment_run)
431
- if isinstance(_output, Awaitable):
432
- result = await _output
433
- else:
434
- result = _output
435
- except BaseException as exc:
436
- span.record_exception(exc)
437
- status = Status(StatusCode.ERROR, f"{type(exc).__name__}: {exc}")
438
- error = exc
439
- span.set_attributes(dict(flatten(jsonify(result), recurse_on_sequence=True)))
440
- span.set_attribute(OPENINFERENCE_SPAN_KIND, root_span_kind)
441
- span.set_status(status)
442
-
443
- evaluator_payload = ExperimentEvaluationRun(
444
- experiment_run_id=cast(ExperimentRunId, experiment_run.id),
445
- start_time=_decode_unix_nano(cast(int, span.start_time)),
446
- end_time=_decode_unix_nano(cast(int, span.end_time)),
447
- name=evaluator.name,
448
- annotator_kind=evaluator.annotator_kind,
449
- error=repr(error) if error else None,
450
- result=result,
451
- trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
452
- )
453
- return evaluator_payload
454
-
455
- executor = get_executor_on_sync_context(
456
- sync_evaluate_run,
457
- async_evaluate_run,
458
- max_retries=0,
459
- exit_on_error=False,
460
- fallback_return_value=None,
461
- tqdm_bar_format=get_tqdm_progress_bar_formatter("running experiment evaluations"),
462
- )
463
- evaluation_payloads, _execution_details = executor.run(evaluation_inputs)
464
- for payload in evaluation_payloads:
465
- if payload is not None:
466
- resp = client.post("/v1/experiment_evaluations", json=jsonify(payload))
467
- resp.raise_for_status()
468
-
469
-
470
- def _str_trace_id(id_: int) -> str:
471
- return hexlify(id_.to_bytes(16, "big")).decode()
472
-
473
-
474
- def _decode_unix_nano(time_unix_nano: int) -> datetime:
475
- return datetime.fromtimestamp(time_unix_nano / 1e9, tz=timezone.utc)
476
-
477
-
478
- INPUT_VALUE = SpanAttributes.INPUT_VALUE
479
- OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
480
- INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
481
- OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
482
- OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
483
-
484
- CHAIN = OpenInferenceSpanKindValues.CHAIN
485
- JSON = OpenInferenceMimeTypeValues.JSON
phoenix/datasets/types.py DELETED
@@ -1,212 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass, field
4
- from datetime import datetime
5
- from types import MappingProxyType
6
- from typing import (
7
- TYPE_CHECKING,
8
- Any,
9
- Dict,
10
- List,
11
- Mapping,
12
- Optional,
13
- Protocol,
14
- Sequence,
15
- Union,
16
- runtime_checkable,
17
- )
18
-
19
- from typing_extensions import TypeAlias
20
-
21
- JSONSerializable: TypeAlias = Optional[Union[Dict[str, Any], List[Any], str, int, float, bool]]
22
-
23
- ExperimentId: TypeAlias = str
24
- DatasetId: TypeAlias = str
25
- DatasetVersionId: TypeAlias = str
26
- ExampleId: TypeAlias = str
27
- RepetitionNumber: TypeAlias = int
28
- ExperimentRunId: TypeAlias = str
29
- TraceId: TypeAlias = str
30
-
31
-
32
- @dataclass(frozen=True)
33
- class Example:
34
- id: ExampleId
35
- updated_at: datetime
36
- input: Mapping[str, JSONSerializable]
37
- output: Mapping[str, JSONSerializable]
38
- metadata: Mapping[str, JSONSerializable] = field(default_factory=lambda: MappingProxyType({}))
39
-
40
- @classmethod
41
- def from_dict(cls, obj: Mapping[str, Any]) -> Example:
42
- return cls(
43
- input=obj["input"],
44
- output=obj["output"],
45
- metadata=obj.get("metadata") or {},
46
- id=obj["id"],
47
- updated_at=obj["updated_at"],
48
- )
49
-
50
-
51
- @dataclass(frozen=True)
52
- class Dataset:
53
- id: DatasetId
54
- version_id: DatasetVersionId
55
- examples: Sequence[Example]
56
-
57
-
58
- @dataclass(frozen=True)
59
- class TestCase:
60
- example: Example
61
- repetition_number: RepetitionNumber
62
-
63
-
64
- @dataclass(frozen=True)
65
- class Experiment:
66
- id: ExperimentId
67
- dataset_id: DatasetId
68
- dataset_version_id: DatasetVersionId
69
- project_name: Optional[str] = None
70
-
71
-
72
- @dataclass(frozen=True)
73
- class ExperimentResult:
74
- result: JSONSerializable
75
-
76
- @classmethod
77
- def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> Optional[ExperimentResult]:
78
- if not obj:
79
- return None
80
- return cls(result=obj["result"])
81
-
82
-
83
- @dataclass(frozen=True)
84
- class ExperimentRun:
85
- start_time: datetime
86
- end_time: datetime
87
- experiment_id: ExperimentId
88
- dataset_example_id: ExampleId
89
- repetition_number: RepetitionNumber
90
- output: Optional[ExperimentResult] = None
91
- error: Optional[str] = None
92
- id: Optional[ExperimentRunId] = None
93
- trace_id: Optional[TraceId] = None
94
-
95
- @classmethod
96
- def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentRun:
97
- return cls(
98
- start_time=obj["start_time"],
99
- end_time=obj["end_time"],
100
- experiment_id=obj["experiment_id"],
101
- dataset_example_id=obj["dataset_example_id"],
102
- repetition_number=obj.get("repetition_number") or 1,
103
- output=ExperimentResult.from_dict(obj["output"]),
104
- error=obj.get("error"),
105
- id=obj.get("id"),
106
- trace_id=obj.get("trace_id"),
107
- )
108
-
109
- def __post_init__(self) -> None:
110
- if bool(self.output) == bool(self.error):
111
- ValueError("Must specify either result or error")
112
-
113
-
114
- @dataclass(frozen=True)
115
- class EvaluationResult:
116
- score: Optional[float] = None
117
- label: Optional[str] = None
118
- explanation: Optional[str] = None
119
- metadata: Mapping[str, JSONSerializable] = field(default_factory=lambda: MappingProxyType({}))
120
-
121
- @classmethod
122
- def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> Optional[EvaluationResult]:
123
- if not obj:
124
- return None
125
- return cls(
126
- score=obj.get("score"),
127
- label=obj.get("label"),
128
- explanation=obj.get("explanation"),
129
- metadata=obj.get("metadata") or {},
130
- )
131
-
132
- def __post_init__(self) -> None:
133
- if self.score is None and not self.label and not self.explanation:
134
- ValueError("Must specify one of score, label, or explanation")
135
-
136
-
137
- @dataclass(frozen=True)
138
- class ExperimentEvaluationRun:
139
- experiment_run_id: ExperimentRunId
140
- start_time: datetime
141
- end_time: datetime
142
- name: str
143
- annotator_kind: str
144
- error: Optional[str] = None
145
- result: Optional[EvaluationResult] = None
146
- id: Optional[str] = None
147
- trace_id: Optional[TraceId] = None
148
-
149
- @classmethod
150
- def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentEvaluationRun:
151
- return cls(
152
- experiment_run_id=obj["experiment_run_id"],
153
- start_time=obj["start_time"],
154
- end_time=obj["end_time"],
155
- name=obj["name"],
156
- annotator_kind=obj["annotator_kind"],
157
- error=obj.get("error"),
158
- result=EvaluationResult.from_dict(obj.get("result")),
159
- id=obj.get("id"),
160
- trace_id=obj.get("trace_id"),
161
- )
162
-
163
- def __post_init__(self) -> None:
164
- if bool(self.result) == bool(self.error):
165
- ValueError("Must specify either result or error")
166
-
167
-
168
- class _HasName(Protocol):
169
- name: str
170
-
171
-
172
- class _HasKind(Protocol):
173
- @property
174
- def annotator_kind(self) -> str: ...
175
-
176
-
177
- @runtime_checkable
178
- class CanEvaluate(_HasName, _HasKind, Protocol):
179
- def evaluate(
180
- self,
181
- example: Example,
182
- experiment_run: ExperimentRun,
183
- ) -> EvaluationResult: ...
184
-
185
-
186
- @runtime_checkable
187
- class CanAsyncEvaluate(_HasName, _HasKind, Protocol):
188
- async def async_evaluate(
189
- self,
190
- example: Example,
191
- experiment_run: ExperimentRun,
192
- ) -> EvaluationResult: ...
193
-
194
-
195
- ExperimentEvaluator: TypeAlias = Union[CanEvaluate, CanAsyncEvaluate]
196
-
197
-
198
- # Someday we'll do type checking in unit tests.
199
- if TYPE_CHECKING:
200
-
201
- class _EvaluatorDummy:
202
- annotator_kind: str
203
- name: str
204
-
205
- def evaluate(self, _: Example, __: ExperimentRun) -> EvaluationResult:
206
- raise NotImplementedError
207
-
208
- async def async_evaluate(self, _: Example, __: ExperimentRun) -> EvaluationResult:
209
- raise NotImplementedError
210
-
211
- _: ExperimentEvaluator
212
- _ = _EvaluatorDummy()