arize-phoenix 4.4.4rc4__py3-none-any.whl → 4.4.4rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (52) hide show
  1. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/METADATA +12 -6
  2. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/RECORD +47 -42
  3. phoenix/config.py +21 -0
  4. phoenix/datetime_utils.py +4 -0
  5. phoenix/db/insertion/dataset.py +19 -16
  6. phoenix/db/insertion/evaluation.py +4 -4
  7. phoenix/db/insertion/helpers.py +4 -12
  8. phoenix/db/insertion/span.py +3 -3
  9. phoenix/db/migrations/versions/10460e46d750_datasets.py +2 -2
  10. phoenix/db/models.py +8 -3
  11. phoenix/experiments/__init__.py +6 -0
  12. phoenix/experiments/evaluators/__init__.py +29 -0
  13. phoenix/experiments/evaluators/base.py +153 -0
  14. phoenix/{datasets → experiments}/evaluators/code_evaluators.py +25 -53
  15. phoenix/{datasets → experiments}/evaluators/llm_evaluators.py +62 -31
  16. phoenix/experiments/evaluators/utils.py +189 -0
  17. phoenix/experiments/functions.py +616 -0
  18. phoenix/{datasets → experiments}/tracing.py +19 -0
  19. phoenix/experiments/types.py +722 -0
  20. phoenix/experiments/utils.py +9 -0
  21. phoenix/server/api/context.py +4 -0
  22. phoenix/server/api/dataloaders/__init__.py +4 -0
  23. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  24. phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
  25. phoenix/server/api/helpers/dataset_helpers.py +8 -7
  26. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  27. phoenix/server/api/mutations/project_mutations.py +9 -4
  28. phoenix/server/api/routers/v1/__init__.py +1 -1
  29. phoenix/server/api/routers/v1/dataset_examples.py +10 -10
  30. phoenix/server/api/routers/v1/datasets.py +152 -48
  31. phoenix/server/api/routers/v1/evaluations.py +4 -11
  32. phoenix/server/api/routers/v1/experiment_evaluations.py +23 -23
  33. phoenix/server/api/routers/v1/experiment_runs.py +5 -17
  34. phoenix/server/api/routers/v1/experiments.py +5 -5
  35. phoenix/server/api/routers/v1/spans.py +6 -4
  36. phoenix/server/api/types/Experiment.py +12 -0
  37. phoenix/server/api/types/ExperimentRun.py +1 -1
  38. phoenix/server/api/types/ExperimentRunAnnotation.py +1 -1
  39. phoenix/server/app.py +4 -0
  40. phoenix/server/static/index.js +712 -588
  41. phoenix/session/client.py +321 -28
  42. phoenix/trace/fixtures.py +6 -6
  43. phoenix/utilities/json.py +8 -8
  44. phoenix/version.py +1 -1
  45. phoenix/datasets/__init__.py +0 -0
  46. phoenix/datasets/evaluators/__init__.py +0 -18
  47. phoenix/datasets/evaluators/_utils.py +0 -13
  48. phoenix/datasets/experiments.py +0 -485
  49. phoenix/datasets/types.py +0 -212
  50. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/WHEEL +0 -0
  51. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/IP_NOTICE +0 -0
  52. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/LICENSE +0 -0
phoenix/session/client.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import csv
2
2
  import gzip
3
3
  import logging
4
+ import re
4
5
  import weakref
5
6
  from collections import Counter
6
7
  from datetime import datetime
@@ -15,6 +16,7 @@ from typing import (
15
16
  Literal,
16
17
  Mapping,
17
18
  Optional,
19
+ Sequence,
18
20
  Tuple,
19
21
  Union,
20
22
  cast,
@@ -24,6 +26,7 @@ from urllib.parse import quote, urljoin
24
26
  import httpx
25
27
  import pandas as pd
26
28
  import pyarrow as pa
29
+ from httpx import HTTPStatusError, Response
27
30
  from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import ExportTraceServiceRequest
28
31
  from opentelemetry.proto.common.v1.common_pb2 import AnyValue, KeyValue
29
32
  from opentelemetry.proto.resource.v1.resource_pb2 import Resource
@@ -38,9 +41,9 @@ from phoenix.config import (
38
41
  get_env_port,
39
42
  get_env_project_name,
40
43
  )
41
- from phoenix.datasets.types import Dataset, Example
42
44
  from phoenix.datetime_utils import normalize_datetime
43
45
  from phoenix.db.insertion.dataset import DatasetKeys
46
+ from phoenix.experiments.types import Dataset, Example
44
47
  from phoenix.session.data_extractor import DEFAULT_SPAN_LIMIT, TraceDataExtractor
45
48
  from phoenix.trace import Evaluations, TraceDataset
46
49
  from phoenix.trace.dsl import SpanQuery
@@ -48,6 +51,8 @@ from phoenix.trace.otel import encode_span_to_otlp
48
51
 
49
52
  logger = logging.getLogger(__name__)
50
53
 
54
+ DatasetAction: TypeAlias = Literal["create", "append"]
55
+
51
56
 
52
57
  class Client(TraceDataExtractor):
53
58
  def __init__(
@@ -88,6 +93,23 @@ class Client(TraceDataExtractor):
88
93
  if warn_if_server_not_running:
89
94
  self._warn_if_phoenix_is_not_running()
90
95
 
96
+ @property
97
+ def web_url(self) -> str:
98
+ """
99
+ Return the web URL of the Phoenix UI. This is different from the base
100
+ URL in the cases where there is a proxy like colab
101
+
102
+
103
+ Returns:
104
+ str: A fully qualified URL to the Phoenix UI.
105
+ """
106
+ # Avoid circular import
107
+ from phoenix.session.session import active_session
108
+
109
+ if session := active_session():
110
+ return session.url
111
+ return self._base_url
112
+
91
113
  def query_spans(
92
114
  self,
93
115
  *queries: SpanQuery,
@@ -125,7 +147,10 @@ class Client(TraceDataExtractor):
125
147
  end_time = end_time or stop_time
126
148
  response = self._client.post(
127
149
  url=urljoin(self._base_url, "v1/spans"),
128
- params={"project-name": project_name},
150
+ params={
151
+ "project_name": project_name,
152
+ "project-name": project_name, # for backward-compatibility
153
+ },
129
154
  json={
130
155
  "queries": [q.to_dict() for q in queries],
131
156
  "start_time": _to_iso_format(normalize_datetime(start_time)),
@@ -172,7 +197,10 @@ class Client(TraceDataExtractor):
172
197
  project_name = project_name or get_env_project_name()
173
198
  response = self._client.get(
174
199
  url=urljoin(self._base_url, "v1/evaluations"),
175
- params={"project-name": project_name},
200
+ params={
201
+ "project_name": project_name,
202
+ "project-name": project_name, # for backward-compatibility
203
+ },
176
204
  )
177
205
  if response.status_code == 404:
178
206
  logger.info("No evaluations found.")
@@ -329,12 +357,12 @@ class Client(TraceDataExtractor):
329
357
 
330
358
  response = self._client.get(
331
359
  urljoin(self._base_url, f"/v1/datasets/{quote(id)}/examples"),
332
- params={"version-id": version_id} if version_id else None,
360
+ params={"version_id": version_id} if version_id else None,
333
361
  )
334
362
  response.raise_for_status()
335
363
  data = response.json()["data"]
336
- examples = [
337
- Example(
364
+ examples = {
365
+ example["id"]: Example(
338
366
  id=example["id"],
339
367
  input=example["input"],
340
368
  output=example["output"],
@@ -342,7 +370,7 @@ class Client(TraceDataExtractor):
342
370
  updated_at=datetime.fromisoformat(example["updated_at"]),
343
371
  )
344
372
  for example in data["examples"]
345
- ]
373
+ }
346
374
  resolved_dataset_id = data["dataset_id"]
347
375
  resolved_version_id = data["version_id"]
348
376
  return Dataset(
@@ -399,7 +427,7 @@ class Client(TraceDataExtractor):
399
427
  url = f"v1/datasets/{dataset_id}/csv"
400
428
  response = httpx.get(
401
429
  url=urljoin(self._base_url, url),
402
- params={"version": dataset_version_id} if dataset_version_id else {},
430
+ params={"version_id": dataset_version_id} if dataset_version_id else {},
403
431
  )
404
432
  response.raise_for_status()
405
433
  return pd.read_csv(
@@ -408,16 +436,170 @@ class Client(TraceDataExtractor):
408
436
  )
409
437
 
410
438
  def upload_dataset(
439
+ self,
440
+ *,
441
+ dataset_name: str,
442
+ dataframe: Optional[pd.DataFrame] = None,
443
+ csv_file_path: Optional[Union[str, Path]] = None,
444
+ input_keys: Iterable[str] = (),
445
+ output_keys: Iterable[str] = (),
446
+ metadata_keys: Iterable[str] = (),
447
+ inputs: Iterable[Mapping[str, Any]] = (),
448
+ outputs: Iterable[Mapping[str, Any]] = (),
449
+ metadata: Iterable[Mapping[str, Any]] = (),
450
+ dataset_description: Optional[str] = None,
451
+ ) -> Dataset:
452
+ """
453
+ Upload examples as dataset to the Phoenix server. If `dataframe` or
454
+ `csv_file_path` are provided, must also provide `input_keys` (and
455
+ optionally with `output_keys` or `metadata_keys` or both), which is a
456
+ list of strings denoting the column names in the dataframe or the csv
457
+ file. On the other hand, a sequence of dictionaries can also be provided
458
+ via `inputs` (and optionally with `outputs` or `metadat` or both), each
459
+ item of which represents a separate example in the dataset.
460
+
461
+ Args:
462
+ dataset_name: (str): Name of the dataset.
463
+ dataframe (pd.DataFrame): pandas DataFrame.
464
+ csv_file_path (str | Path): Location of a CSV text file
465
+ input_keys (Iterable[str]): List of column names used as input keys.
466
+ input_keys, output_keys, metadata_keys must be disjoint, and must
467
+ exist in CSV column headers.
468
+ output_keys (Iterable[str]): List of column names used as output keys.
469
+ input_keys, output_keys, metadata_keys must be disjoint, and must
470
+ exist in CSV column headers.
471
+ metadata_keys (Iterable[str]): List of column names used as metadata keys.
472
+ input_keys, output_keys, metadata_keys must be disjoint, and must
473
+ exist in CSV column headers.
474
+ inputs (Iterable[Mapping[str, Any]]): List of dictionaries object each
475
+ corresponding to an example in the dataset.
476
+ outputs (Iterable[Mapping[str, Any]]): List of dictionaries object each
477
+ corresponding to an example in the dataset.
478
+ metadata (Iterable[Mapping[str, Any]]): List of dictionaries object each
479
+ corresponding to an example in the dataset.
480
+ dataset_description: (Optional[str]): Description of the dataset.
481
+
482
+ Returns:
483
+ A Dataset object with the uploaded examples.
484
+ """
485
+ if dataframe is not None or csv_file_path is not None:
486
+ if dataframe is not None and csv_file_path is not None:
487
+ raise ValueError(
488
+ "Please provide either `dataframe` or `csv_file_path`, but not both"
489
+ )
490
+ if list(inputs) or list(outputs) or list(metadata):
491
+ option = "dataframe" if dataframe is not None else "csv_file_path"
492
+ raise ValueError(
493
+ f"Please provide only either `{option}` or list of dictionaries "
494
+ f"via `inputs` (with `outputs` and `metadata`) but not both."
495
+ )
496
+ table = dataframe if dataframe is not None else csv_file_path
497
+ assert table is not None # for type-checker
498
+ return self._upload_tabular_dataset(
499
+ table,
500
+ dataset_name=dataset_name,
501
+ input_keys=input_keys,
502
+ output_keys=output_keys,
503
+ metadata_keys=metadata_keys,
504
+ dataset_description=dataset_description,
505
+ )
506
+ return self._upload_json_dataset(
507
+ dataset_name=dataset_name,
508
+ inputs=inputs,
509
+ outputs=outputs,
510
+ metadata=metadata,
511
+ dataset_description=dataset_description,
512
+ )
513
+
514
+ def append_to_dataset(
515
+ self,
516
+ *,
517
+ dataset_name: str,
518
+ dataframe: Optional[pd.DataFrame] = None,
519
+ csv_file_path: Optional[Union[str, Path]] = None,
520
+ input_keys: Iterable[str] = (),
521
+ output_keys: Iterable[str] = (),
522
+ metadata_keys: Iterable[str] = (),
523
+ inputs: Iterable[Mapping[str, Any]] = (),
524
+ outputs: Iterable[Mapping[str, Any]] = (),
525
+ metadata: Iterable[Mapping[str, Any]] = (),
526
+ dataset_description: Optional[str] = None,
527
+ ) -> Dataset:
528
+ """
529
+ Append examples to dataset on the Phoenix server. If `dataframe` or
530
+ `csv_file_path` are provided, must also provide `input_keys` (and
531
+ optionally with `output_keys` or `metadata_keys` or both), which is a
532
+ list of strings denoting the column names in the dataframe or the csv
533
+ file. On the other hand, a sequence of dictionaries can also be provided
534
+ via `inputs` (and optionally with `outputs` or `metadat` or both), each
535
+ item of which represents a separate example in the dataset.
536
+
537
+ Args:
538
+ dataset_name: (str): Name of the dataset.
539
+ dataframe (pd.DataFrame): pandas DataFrame.
540
+ csv_file_path (str | Path): Location of a CSV text file
541
+ input_keys (Iterable[str]): List of column names used as input keys.
542
+ input_keys, output_keys, metadata_keys must be disjoint, and must
543
+ exist in CSV column headers.
544
+ output_keys (Iterable[str]): List of column names used as output keys.
545
+ input_keys, output_keys, metadata_keys must be disjoint, and must
546
+ exist in CSV column headers.
547
+ metadata_keys (Iterable[str]): List of column names used as metadata keys.
548
+ input_keys, output_keys, metadata_keys must be disjoint, and must
549
+ exist in CSV column headers.
550
+ inputs (Iterable[Mapping[str, Any]]): List of dictionaries object each
551
+ corresponding to an example in the dataset.
552
+ outputs (Iterable[Mapping[str, Any]]): List of dictionaries object each
553
+ corresponding to an example in the dataset.
554
+ metadata (Iterable[Mapping[str, Any]]): List of dictionaries object each
555
+ corresponding to an example in the dataset.
556
+ dataset_description: (Optional[str]): Description of the dataset.
557
+
558
+ Returns:
559
+ A Dataset object with its examples.
560
+ """
561
+ if dataframe is not None or csv_file_path is not None:
562
+ if dataframe is not None and csv_file_path is not None:
563
+ raise ValueError(
564
+ "Please provide either `dataframe` or `csv_file_path`, but not both"
565
+ )
566
+ if list(inputs) or list(outputs) or list(metadata):
567
+ option = "dataframe" if dataframe is not None else "csv_file_path"
568
+ raise ValueError(
569
+ f"Please provide only either `{option}` or list of dictionaries "
570
+ f"via `inputs` (with `outputs` and `metadata`) but not both."
571
+ )
572
+ table = dataframe if dataframe is not None else csv_file_path
573
+ assert table is not None # for type-checker
574
+ return self._upload_tabular_dataset(
575
+ table,
576
+ dataset_name=dataset_name,
577
+ input_keys=input_keys,
578
+ output_keys=output_keys,
579
+ metadata_keys=metadata_keys,
580
+ dataset_description=dataset_description,
581
+ action="append",
582
+ )
583
+ return self._upload_json_dataset(
584
+ dataset_name=dataset_name,
585
+ inputs=inputs,
586
+ outputs=outputs,
587
+ metadata=metadata,
588
+ dataset_description=dataset_description,
589
+ action="append",
590
+ )
591
+
592
+ def _upload_tabular_dataset(
411
593
  self,
412
594
  table: Union[str, Path, pd.DataFrame],
413
595
  /,
414
596
  *,
415
- name: str,
597
+ dataset_name: str,
416
598
  input_keys: Iterable[str],
417
- output_keys: Iterable[str],
599
+ output_keys: Iterable[str] = (),
418
600
  metadata_keys: Iterable[str] = (),
419
- description: Optional[str] = None,
420
- action: Literal["create", "append"] = "create",
601
+ dataset_description: Optional[str] = None,
602
+ action: DatasetAction = "create",
421
603
  ) -> Dataset:
422
604
  """
423
605
  Upload examples as dataset to the Phoenix server.
@@ -425,7 +607,7 @@ class Client(TraceDataExtractor):
425
607
  Args:
426
608
  table (str | Path | pd.DataFrame): Location of a CSV text file, or
427
609
  pandas DataFrame.
428
- name: (str): Name of the dataset. Required if action=append.
610
+ dataset_name: (str): Name of the dataset. Required if action=append.
429
611
  input_keys (Iterable[str]): List of column names used as input keys.
430
612
  input_keys, output_keys, metadata_keys must be disjoint, and must
431
613
  exist in CSV column headers.
@@ -435,17 +617,24 @@ class Client(TraceDataExtractor):
435
617
  metadata_keys (Iterable[str]): List of column names used as metadata keys.
436
618
  input_keys, output_keys, metadata_keys must be disjoint, and must
437
619
  exist in CSV column headers.
438
- description: (Optional[str]): Description of the dataset.
439
- action: (Literal["create", "append"): Create new dataset or append to an
440
- existing dataset. If action=append, dataset name is required.
620
+ dataset_description: (Optional[str]): Description of the dataset.
621
+ action: (Literal["create", "append"]): Create new dataset or append to an
622
+ existing one. If action="append" and dataset does not exist, it'll
623
+ be created.
441
624
 
442
625
  Returns:
443
626
  A Dataset object with the uploaded examples.
444
627
  """
445
628
  if action not in ("create", "append"):
446
629
  raise ValueError(f"Invalid action: {action}")
447
- if not name:
630
+ if not dataset_name:
448
631
  raise ValueError("Dataset name must not be blank")
632
+ input_keys, output_keys, metadata_keys = (
633
+ (keys,) if isinstance(keys, str) else (keys or ())
634
+ for keys in (input_keys, output_keys, metadata_keys)
635
+ )
636
+ if not any(map(bool, (input_keys, output_keys, metadata_keys))):
637
+ input_keys, output_keys, metadata_keys = _infer_keys(table)
449
638
  keys = DatasetKeys(
450
639
  frozenset(input_keys),
451
640
  frozenset(output_keys),
@@ -457,20 +646,86 @@ class Client(TraceDataExtractor):
457
646
  file = _prepare_csv(Path(table), keys)
458
647
  else:
459
648
  assert_never(table)
649
+ print("📤 Uploading dataset...")
460
650
  response = self._client.post(
461
651
  url=urljoin(self._base_url, "v1/datasets/upload"),
462
652
  files={"file": file},
463
653
  data={
464
654
  "action": action,
465
- "name": name,
466
- "description": description,
655
+ "name": dataset_name,
656
+ "description": dataset_description,
467
657
  "input_keys[]": sorted(keys.input),
468
658
  "output_keys[]": sorted(keys.output),
469
659
  "metadata_keys[]": sorted(keys.metadata),
470
660
  },
471
661
  params={"sync": True},
472
662
  )
473
- response.raise_for_status()
663
+ return self._process_dataset_upload_response(response)
664
+
665
+ def _upload_json_dataset(
666
+ self,
667
+ *,
668
+ dataset_name: str,
669
+ inputs: Iterable[Mapping[str, Any]],
670
+ outputs: Iterable[Mapping[str, Any]] = (),
671
+ metadata: Iterable[Mapping[str, Any]] = (),
672
+ dataset_description: Optional[str] = None,
673
+ action: DatasetAction = "create",
674
+ ) -> Dataset:
675
+ """
676
+ Upload examples as dataset to the Phoenix server.
677
+
678
+ Args:
679
+ dataset_name: (str): Name of the dataset
680
+ inputs (Iterable[Mapping[str, Any]]): List of dictionaries object each
681
+ corresponding to an example in the dataset.
682
+ outputs (Iterable[Mapping[str, Any]]): List of dictionaries object each
683
+ corresponding to an example in the dataset.
684
+ metadata (Iterable[Mapping[str, Any]]): List of dictionaries object each
685
+ corresponding to an example in the dataset.
686
+ dataset_description: (Optional[str]): Description of the dataset.
687
+ action: (Literal["create", "append"]): Create new dataset or append to an
688
+ existing one. If action="append" and dataset does not exist, it'll
689
+ be created.
690
+
691
+ Returns:
692
+ A Dataset object with the uploaded examples.
693
+ """
694
+ # convert to list to avoid issues with pandas Series
695
+ inputs, outputs, metadata = list(inputs), list(outputs), list(metadata)
696
+ if not inputs or not _is_all_dict(inputs):
697
+ raise ValueError(
698
+ "`inputs` should be a non-empty sequence containing only dictionary objects"
699
+ )
700
+ for name, seq in {"outputs": outputs, "metadata": metadata}.items():
701
+ if seq and not (len(seq) == len(inputs) and _is_all_dict(seq)):
702
+ raise ValueError(
703
+ f"`{name}` should be a sequence of the same length as `inputs` "
704
+ "containing only dictionary objects"
705
+ )
706
+ print("📤 Uploading dataset...")
707
+ response = self._client.post(
708
+ url=urljoin(self._base_url, "v1/datasets/upload"),
709
+ headers={"Content-Encoding": "gzip"},
710
+ json={
711
+ "action": action,
712
+ "name": dataset_name,
713
+ "description": dataset_description,
714
+ "inputs": inputs,
715
+ "outputs": outputs,
716
+ "metadata": metadata,
717
+ },
718
+ params={"sync": True},
719
+ )
720
+ return self._process_dataset_upload_response(response)
721
+
722
+ def _process_dataset_upload_response(self, response: Response) -> Dataset:
723
+ try:
724
+ response.raise_for_status()
725
+ except HTTPStatusError as e:
726
+ if msg := response.text:
727
+ raise DatasetUploadError(msg) from e
728
+ raise
474
729
  data = response.json()["data"]
475
730
  dataset_id = data["dataset_id"]
476
731
  response = self._client.get(
@@ -480,11 +735,14 @@ class Client(TraceDataExtractor):
480
735
  data = response.json()["data"]
481
736
  version_id = data["version_id"]
482
737
  examples = data["examples"]
738
+ print(f"💾 Examples uploaded: {self.web_url}datasets/{dataset_id}/examples")
739
+ print(f"🗄️ Dataset version ID: {version_id}")
740
+
483
741
  return Dataset(
484
742
  id=dataset_id,
485
743
  version_id=version_id,
486
- examples=[
487
- Example(
744
+ examples={
745
+ example["id"]: Example(
488
746
  id=example["id"],
489
747
  input=example["input"],
490
748
  output=example["output"],
@@ -492,7 +750,7 @@ class Client(TraceDataExtractor):
492
750
  updated_at=datetime.fromisoformat(example["updated_at"]),
493
751
  )
494
752
  for example in examples
495
- ],
753
+ },
496
754
  )
497
755
 
498
756
 
@@ -502,20 +760,25 @@ FileType: TypeAlias = str
502
760
  FileHeaders: TypeAlias = Dict[str, str]
503
761
 
504
762
 
505
- def _prepare_csv(
506
- path: Path,
507
- keys: DatasetKeys,
508
- ) -> Tuple[FileName, FilePointer, FileType, FileHeaders]:
763
+ def _get_csv_column_headers(path: Path) -> Tuple[str, ...]:
509
764
  path = path.resolve()
510
765
  if not path.is_file():
511
766
  raise FileNotFoundError(f"File does not exist: {path}")
512
767
  with open(path, "r") as f:
513
768
  rows = csv.reader(f)
514
769
  try:
515
- column_headers = next(rows)
770
+ column_headers = tuple(next(rows))
516
771
  _ = next(rows)
517
772
  except StopIteration:
518
773
  raise ValueError("csv file has no data")
774
+ return column_headers
775
+
776
+
777
+ def _prepare_csv(
778
+ path: Path,
779
+ keys: DatasetKeys,
780
+ ) -> Tuple[FileName, FilePointer, FileType, FileHeaders]:
781
+ column_headers = _get_csv_column_headers(path)
519
782
  (header, freq), *_ = Counter(column_headers).most_common(1)
520
783
  if freq > 1:
521
784
  raise ValueError(f"Duplicated column header in CSV file: {header}")
@@ -545,5 +808,35 @@ def _prepare_pyarrow(
545
808
  return "pandas", file, "application/x-pandas-pyarrow", {}
546
809
 
547
810
 
811
+ _response_header = re.compile(r"(?i)(response|answer)s*$")
812
+
813
+
814
+ def _infer_keys(
815
+ table: Union[str, Path, pd.DataFrame],
816
+ ) -> Tuple[Tuple[str, ...], Tuple[str, ...], Tuple[str, ...]]:
817
+ column_headers = (
818
+ tuple(table.columns)
819
+ if isinstance(table, pd.DataFrame)
820
+ else _get_csv_column_headers(Path(table))
821
+ )
822
+ for i, header in enumerate(column_headers):
823
+ if _response_header.search(header):
824
+ break
825
+ else:
826
+ i = len(column_headers)
827
+ return (
828
+ column_headers[:i],
829
+ column_headers[i : i + 1],
830
+ column_headers[i + 1 :],
831
+ )
832
+
833
+
548
834
  def _to_iso_format(value: Optional[datetime]) -> Optional[str]:
549
835
  return value.isoformat() if value else None
836
+
837
+
838
+ def _is_all_dict(seq: Sequence[Any]) -> bool:
839
+ return all(map(lambda obj: isinstance(obj, dict), seq))
840
+
841
+
842
+ class DatasetUploadError(Exception): ...
phoenix/trace/fixtures.py CHANGED
@@ -244,12 +244,12 @@ def send_dataset_fixtures(
244
244
  try:
245
245
  if i % 2:
246
246
  client.upload_dataset(
247
- fixture.dataframe,
248
- name=fixture.name,
247
+ dataset_name=fixture.name,
248
+ dataframe=fixture.dataframe,
249
249
  input_keys=fixture.input_keys,
250
250
  output_keys=fixture.output_keys,
251
251
  metadata_keys=fixture.metadata_keys,
252
- description=fixture.description,
252
+ dataset_description=fixture.description,
253
253
  )
254
254
  else:
255
255
  with NamedTemporaryFile() as tf:
@@ -257,12 +257,12 @@ def send_dataset_fixtures(
257
257
  shutil.copyfileobj(fixture.csv, f)
258
258
  f.flush()
259
259
  client.upload_dataset(
260
- tf.name,
261
- name=fixture.name,
260
+ dataset_name=fixture.name,
261
+ csv_file_path=tf.name,
262
262
  input_keys=fixture.input_keys,
263
263
  output_keys=fixture.output_keys,
264
264
  metadata_keys=fixture.metadata_keys,
265
- description=fixture.description,
265
+ dataset_description=fixture.description,
266
266
  )
267
267
  except HTTPStatusError as e:
268
268
  print(e.response.content.decode())
phoenix/utilities/json.py CHANGED
@@ -2,7 +2,7 @@ import dataclasses
2
2
  import datetime
3
3
  from enum import Enum
4
4
  from pathlib import Path
5
- from typing import Any, Mapping, Sequence, SupportsFloat, Union, get_args, get_origin
5
+ from typing import Any, Mapping, Sequence, Union, get_args, get_origin
6
6
 
7
7
  import numpy as np
8
8
 
@@ -15,10 +15,10 @@ def jsonify(obj: Any) -> Any:
15
15
  return jsonify(obj.value)
16
16
  if isinstance(obj, (str, int, float, bool)) or obj is None:
17
17
  return obj
18
- if isinstance(obj, np.ndarray):
18
+ if isinstance(obj, (list, set, frozenset, Sequence)):
19
19
  return [jsonify(v) for v in obj]
20
- if isinstance(obj, SupportsFloat):
21
- return float(obj)
20
+ if isinstance(obj, (dict, Mapping)):
21
+ return {jsonify(k): jsonify(v) for k, v in obj.items()}
22
22
  if dataclasses.is_dataclass(obj):
23
23
  return {
24
24
  k: jsonify(v)
@@ -29,10 +29,6 @@ def jsonify(obj: Any) -> Any:
29
29
  and type(None) in get_args(field)
30
30
  )
31
31
  }
32
- if isinstance(obj, (Sequence, set, frozenset)):
33
- return [jsonify(v) for v in obj]
34
- if isinstance(obj, Mapping):
35
- return {jsonify(k): jsonify(v) for k, v in obj.items()}
36
32
  if isinstance(obj, (datetime.date, datetime.datetime, datetime.time)):
37
33
  return obj.isoformat()
38
34
  if isinstance(obj, datetime.timedelta):
@@ -41,6 +37,10 @@ def jsonify(obj: Any) -> Any:
41
37
  return str(obj)
42
38
  if isinstance(obj, BaseException):
43
39
  return str(obj)
40
+ if isinstance(obj, np.ndarray):
41
+ return [jsonify(v) for v in obj]
42
+ if hasattr(obj, "__float__"):
43
+ return float(obj)
44
44
  if hasattr(obj, "model_dump") and callable(obj.model_dump):
45
45
  # pydantic v2
46
46
  try:
phoenix/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "4.4.4rc4"
1
+ __version__ = "4.4.4rc6"
File without changes
@@ -1,18 +0,0 @@
1
- from phoenix.datasets.evaluators.code_evaluators import ContainsKeyword, JSONParsable
2
- from phoenix.datasets.evaluators.llm_evaluators import (
3
- CoherenceEvaluator,
4
- ConcisenessEvaluator,
5
- HelpfulnessEvaluator,
6
- LLMCriteriaEvaluator,
7
- RelevanceEvaluator,
8
- )
9
-
10
- __all__ = [
11
- "ContainsKeyword",
12
- "JSONParsable",
13
- "CoherenceEvaluator",
14
- "ConcisenessEvaluator",
15
- "LLMCriteriaEvaluator",
16
- "HelpfulnessEvaluator",
17
- "RelevanceEvaluator",
18
- ]
@@ -1,13 +0,0 @@
1
- from phoenix.datasets.types import JSONSerializable
2
-
3
-
4
- def _unwrap_json(obj: JSONSerializable) -> JSONSerializable:
5
- if isinstance(obj, dict):
6
- if len(obj) == 1:
7
- key = next(iter(obj.keys()))
8
- output = obj[key]
9
- assert isinstance(
10
- output, (dict, list, str, int, float, bool, type(None))
11
- ), "Output must be JSON serializable"
12
- return output
13
- return obj