arize-phoenix 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (71) hide show
  1. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/METADATA +11 -5
  2. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/RECORD +69 -40
  3. phoenix/__init__.py +3 -1
  4. phoenix/config.py +23 -1
  5. phoenix/core/model_schema.py +14 -37
  6. phoenix/core/model_schema_adapter.py +0 -1
  7. phoenix/core/traces.py +285 -0
  8. phoenix/datasets/dataset.py +14 -21
  9. phoenix/datasets/errors.py +4 -1
  10. phoenix/datasets/schema.py +1 -1
  11. phoenix/datetime_utils.py +87 -0
  12. phoenix/experimental/callbacks/__init__.py +0 -0
  13. phoenix/experimental/callbacks/langchain_tracer.py +228 -0
  14. phoenix/experimental/callbacks/llama_index_trace_callback_handler.py +364 -0
  15. phoenix/experimental/evals/__init__.py +33 -0
  16. phoenix/experimental/evals/functions/__init__.py +4 -0
  17. phoenix/experimental/evals/functions/binary.py +156 -0
  18. phoenix/experimental/evals/functions/common.py +31 -0
  19. phoenix/experimental/evals/functions/generate.py +50 -0
  20. phoenix/experimental/evals/models/__init__.py +4 -0
  21. phoenix/experimental/evals/models/base.py +130 -0
  22. phoenix/experimental/evals/models/openai.py +128 -0
  23. phoenix/experimental/evals/retrievals.py +2 -2
  24. phoenix/experimental/evals/templates/__init__.py +24 -0
  25. phoenix/experimental/evals/templates/default_templates.py +126 -0
  26. phoenix/experimental/evals/templates/template.py +107 -0
  27. phoenix/experimental/evals/utils/__init__.py +0 -0
  28. phoenix/experimental/evals/utils/downloads.py +33 -0
  29. phoenix/experimental/evals/utils/threads.py +27 -0
  30. phoenix/experimental/evals/utils/types.py +9 -0
  31. phoenix/experimental/evals/utils.py +33 -0
  32. phoenix/metrics/binning.py +0 -1
  33. phoenix/metrics/timeseries.py +2 -3
  34. phoenix/server/api/context.py +2 -0
  35. phoenix/server/api/input_types/SpanSort.py +60 -0
  36. phoenix/server/api/schema.py +85 -4
  37. phoenix/server/api/types/DataQualityMetric.py +10 -1
  38. phoenix/server/api/types/Dataset.py +2 -4
  39. phoenix/server/api/types/DatasetInfo.py +10 -0
  40. phoenix/server/api/types/ExportEventsMutation.py +4 -1
  41. phoenix/server/api/types/Functionality.py +15 -0
  42. phoenix/server/api/types/MimeType.py +16 -0
  43. phoenix/server/api/types/Model.py +3 -5
  44. phoenix/server/api/types/SortDir.py +13 -0
  45. phoenix/server/api/types/Span.py +229 -0
  46. phoenix/server/api/types/TimeSeries.py +9 -2
  47. phoenix/server/api/types/pagination.py +2 -0
  48. phoenix/server/app.py +24 -4
  49. phoenix/server/main.py +60 -24
  50. phoenix/server/span_handler.py +39 -0
  51. phoenix/server/static/index.js +956 -479
  52. phoenix/server/thread_server.py +10 -2
  53. phoenix/services.py +39 -16
  54. phoenix/session/session.py +99 -27
  55. phoenix/trace/exporter.py +71 -0
  56. phoenix/trace/filter.py +181 -0
  57. phoenix/trace/fixtures.py +23 -8
  58. phoenix/trace/schemas.py +59 -6
  59. phoenix/trace/semantic_conventions.py +141 -1
  60. phoenix/trace/span_json_decoder.py +60 -6
  61. phoenix/trace/span_json_encoder.py +1 -9
  62. phoenix/trace/trace_dataset.py +100 -8
  63. phoenix/trace/tracer.py +26 -3
  64. phoenix/trace/v1/__init__.py +522 -0
  65. phoenix/trace/v1/trace_pb2.py +52 -0
  66. phoenix/trace/v1/trace_pb2.pyi +351 -0
  67. phoenix/core/dimension_data_type.py +0 -6
  68. phoenix/core/dimension_type.py +0 -9
  69. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/WHEEL +0 -0
  70. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/IP_NOTICE +0 -0
  71. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/LICENSE +0 -0
@@ -13,10 +13,12 @@ class ThreadServer(Server):
13
13
  def __init__(
14
14
  self,
15
15
  app: Starlette,
16
+ host: str,
16
17
  port: int,
17
18
  ) -> None:
18
19
  config = Config(
19
20
  app=app,
21
+ host=host,
20
22
  port=port,
21
23
  # TODO: save logs to file
22
24
  log_level=logging.ERROR,
@@ -32,9 +34,15 @@ class ThreadServer(Server):
32
34
  thread.start()
33
35
  time_limit = time() + 5 # 5 seconds
34
36
  try:
35
- while not self.started and thread.is_alive() and time() < time_limit:
37
+ while (
38
+ time() < time_limit
39
+ and thread.is_alive()
40
+ and not self.should_exit
41
+ and not self.started
42
+ ):
36
43
  sleep(1e-3)
37
- if time() > time_limit:
44
+ if time() >= time_limit and not self.started:
45
+ self.should_exit = True
38
46
  raise RuntimeError("server took too long to start")
39
47
  yield thread
40
48
  finally:
phoenix/services.py CHANGED
@@ -4,11 +4,12 @@ import signal
4
4
  import subprocess
5
5
  import sys
6
6
  from pathlib import Path
7
- from typing import List, Optional
7
+ from time import sleep, time
8
+ from typing import Callable, List, Optional
8
9
 
9
10
  import psutil
10
11
 
11
- import phoenix.config as config
12
+ from phoenix.config import SERVER_DIR, get_pids_path, get_running_pid
12
13
 
13
14
  logger = logging.getLogger(__name__)
14
15
 
@@ -22,6 +23,12 @@ class Service:
22
23
 
23
24
  def __init__(self) -> None:
24
25
  self.child = self.start()
26
+ self._wait_until(
27
+ lambda: get_running_pid() is not None,
28
+ # Not sure why, but the process can take a very long time
29
+ # to get going, e.g. 15+ seconds in Colab.
30
+ up_to_seconds=60,
31
+ )
25
32
 
26
33
  @property
27
34
  def command(self) -> List[str]:
@@ -30,7 +37,7 @@ class Service:
30
37
  def start(self) -> psutil.Popen:
31
38
  """Starts the service."""
32
39
 
33
- if len(os.listdir(config.get_pids_path())) > 0:
40
+ if get_running_pid():
34
41
  # Currently, only one instance of Phoenix can be running at any given time.
35
42
  # Support for multiple concurrently running instances may be supported in the future.
36
43
  logger.warning(
@@ -48,21 +55,18 @@ class Service:
48
55
  text=True,
49
56
  env={**os.environ},
50
57
  )
51
- # TODO: convert to async with timeout because this can block forever
52
- # if there's nothing to read. This is also brittle because it relies
53
- # on a specific line of print output by a third party module (uvicorn).
54
- for line in iter(process.stdout.readline, b""):
55
- if "Uvicorn running on" in str(line):
56
- break
57
58
  return process
58
59
 
59
60
  @property
60
61
  def active(self) -> bool:
61
- return self.child.is_running()
62
+ # Not sure why, but the process can remain in a zombie state
63
+ # indefinitely, e.g. in Colab.
64
+ return self.child.is_running() and self.child.status() != psutil.STATUS_ZOMBIE
62
65
 
63
66
  def stop(self) -> None:
64
67
  """Stops the service."""
65
68
  self.child.terminate()
69
+ self._wait_until(lambda: get_running_pid() is None)
66
70
 
67
71
  @staticmethod
68
72
  def stop_any() -> None:
@@ -70,39 +74,54 @@ class Service:
70
74
  within the current session or if it is being run in a separate process on the
71
75
  same host machine. In either case, the instance will be forcibly stopped.
72
76
  """
73
- pids_path = config.get_pids_path()
74
- for filename in os.listdir(pids_path):
77
+ for file in get_pids_path().iterdir():
78
+ if not file.name.isnumeric():
79
+ continue
75
80
  try:
76
- os.kill(int(filename), signal.SIGKILL)
81
+ os.kill(int(file.name), signal.SIGKILL)
77
82
  except ProcessLookupError:
78
83
  pass
79
- filename_path = os.path.join(pids_path, filename)
80
- os.unlink(filename_path)
84
+ file.unlink(missing_ok=True)
85
+
86
+ def _wait_until(
87
+ self,
88
+ predicate: Callable[[], bool],
89
+ up_to_seconds: float = 5,
90
+ sleep_seconds: float = 1e-3,
91
+ ) -> None:
92
+ time_limit = time() + up_to_seconds
93
+ while not predicate() and time() < time_limit and self.active:
94
+ sleep(sleep_seconds)
81
95
 
82
96
 
83
97
  class AppService(Service):
84
98
  """Service that controls the phoenix application."""
85
99
 
86
- working_dir = config.SERVER_DIR
100
+ working_dir = SERVER_DIR
87
101
 
88
102
  # Internal references to the name / directory of the dataset(s)
89
103
  __primary_dataset_name: str
90
104
  __reference_dataset_name: Optional[str]
91
105
  __corpus_dataset_name: Optional[str]
106
+ __trace_dataset_name: Optional[str]
92
107
 
93
108
  def __init__(
94
109
  self,
95
110
  export_path: Path,
111
+ host: str,
96
112
  port: int,
97
113
  primary_dataset_name: str,
98
114
  reference_dataset_name: Optional[str],
99
115
  corpus_dataset_name: Optional[str],
116
+ trace_dataset_name: Optional[str],
100
117
  ):
101
118
  self.export_path = export_path
119
+ self.host = host
102
120
  self.port = port
103
121
  self.__primary_dataset_name = primary_dataset_name
104
122
  self.__reference_dataset_name = reference_dataset_name
105
123
  self.__corpus_dataset_name = corpus_dataset_name
124
+ self.__trace_dataset_name = trace_dataset_name
106
125
  super().__init__()
107
126
 
108
127
  @property
@@ -112,6 +131,8 @@ class AppService(Service):
112
131
  "main.py",
113
132
  "--export_path",
114
133
  str(self.export_path),
134
+ "--host",
135
+ str(self.host),
115
136
  "--port",
116
137
  str(self.port),
117
138
  "datasets",
@@ -122,5 +143,7 @@ class AppService(Service):
122
143
  command.extend(["--reference", str(self.__reference_dataset_name)])
123
144
  if self.__corpus_dataset_name is not None:
124
145
  command.extend(["--corpus", str(self.__corpus_dataset_name)])
146
+ if self.__trace_dataset_name is not None:
147
+ command.extend(["--trace", str(self.__trace_dataset_name)])
125
148
  logger.info(f"command: {command}")
126
149
  return command
@@ -1,19 +1,24 @@
1
+ import json
1
2
  import logging
2
3
  from abc import ABC, abstractmethod
3
4
  from collections import UserList
5
+ from datetime import datetime
4
6
  from pathlib import Path
5
7
  from tempfile import TemporaryDirectory
6
8
  from typing import TYPE_CHECKING, Iterable, List, Optional, Set
7
9
 
8
10
  import pandas as pd
9
- from portpicker import pick_unused_port
10
11
 
11
- from phoenix.config import PORT, get_exported_files
12
+ from phoenix.config import get_env_host, get_env_port, get_exported_files
12
13
  from phoenix.core.model_schema_adapter import create_model_from_datasets
13
- from phoenix.datasets.dataset import Dataset
14
+ from phoenix.core.traces import Traces
15
+ from phoenix.datasets.dataset import EMPTY_DATASET, Dataset
14
16
  from phoenix.server.app import create_app
15
17
  from phoenix.server.thread_server import ThreadServer
16
18
  from phoenix.services import AppService
19
+ from phoenix.trace.filter import SpanFilter
20
+ from phoenix.trace.span_json_encoder import span_to_json
21
+ from phoenix.trace.trace_dataset import TraceDataset
17
22
 
18
23
  try:
19
24
  from IPython.display import IFrame # type: ignore
@@ -52,6 +57,9 @@ class ExportedData(_BaseList):
52
57
  class Session(ABC):
53
58
  """Session that maintains a 1-1 shared state with the Phoenix App."""
54
59
 
60
+ trace_dataset: Optional[TraceDataset]
61
+ traces: Optional[Traces]
62
+
55
63
  def __dir__(self) -> List[str]:
56
64
  return ["exports", "view", "url"]
57
65
 
@@ -60,11 +68,14 @@ class Session(ABC):
60
68
  primary_dataset: Dataset,
61
69
  reference_dataset: Optional[Dataset] = None,
62
70
  corpus_dataset: Optional[Dataset] = None,
63
- port: int = PORT,
71
+ trace_dataset: Optional[TraceDataset] = None,
72
+ host: Optional[str] = None,
73
+ port: Optional[int] = None,
64
74
  ):
65
75
  self.primary_dataset = primary_dataset
66
76
  self.reference_dataset = reference_dataset
67
77
  self.corpus_dataset = corpus_dataset
78
+ self.trace_dataset = trace_dataset
68
79
  self.model = create_model_from_datasets(
69
80
  primary_dataset,
70
81
  reference_dataset,
@@ -78,7 +89,13 @@ class Session(ABC):
78
89
  else None
79
90
  )
80
91
 
81
- self.port = port
92
+ self.traces = Traces()
93
+ if trace_dataset:
94
+ for span in trace_dataset.to_spans():
95
+ self.traces.put(span)
96
+
97
+ self.host = host or get_env_host()
98
+ self.port = port or get_env_port()
82
99
  self.temp_dir = TemporaryDirectory()
83
100
  self.export_path = Path(self.temp_dir.name) / "exports"
84
101
  self.export_path.mkdir(parents=True, exist_ok=True)
@@ -122,7 +139,29 @@ class Session(ABC):
122
139
  @property
123
140
  def url(self) -> str:
124
141
  """Returns the url for the phoenix app"""
125
- return _get_url(self.port, self.is_colab)
142
+ return _get_url(self.host, self.port, self.is_colab)
143
+
144
+ def get_span_dataframe(
145
+ self,
146
+ filter_condition: Optional[str] = None,
147
+ *,
148
+ start_time: Optional[datetime] = None,
149
+ stop_time: Optional[datetime] = None,
150
+ root_spans_only: Optional[bool] = None,
151
+ ) -> Optional[pd.DataFrame]:
152
+ if (traces := self.traces) is None:
153
+ return None
154
+ predicate = SpanFilter(filter_condition) if filter_condition else None
155
+ spans = traces.get_spans(
156
+ start_time=start_time,
157
+ stop_time=stop_time,
158
+ root_spans_only=root_spans_only,
159
+ )
160
+ if predicate:
161
+ spans = filter(predicate, spans)
162
+ if not (data := list(map(json.loads, map(span_to_json, spans)))):
163
+ return None
164
+ return pd.json_normalize(data).set_index("context.span_id", drop=False)
126
165
 
127
166
 
128
167
  _session: Optional[Session] = None
@@ -134,22 +173,29 @@ class ProcessSession(Session):
134
173
  primary_dataset: Dataset,
135
174
  reference_dataset: Optional[Dataset] = None,
136
175
  corpus_dataset: Optional[Dataset] = None,
176
+ trace_dataset: Optional[TraceDataset] = None,
177
+ host: Optional[str] = None,
137
178
  port: Optional[int] = None,
138
179
  ) -> None:
139
180
  super().__init__(
140
181
  primary_dataset=primary_dataset,
141
182
  reference_dataset=reference_dataset,
142
183
  corpus_dataset=corpus_dataset,
143
- port=port or PORT,
184
+ trace_dataset=trace_dataset,
185
+ host=host,
186
+ port=port,
144
187
  )
145
188
  primary_dataset.to_disc()
146
189
  if isinstance(reference_dataset, Dataset):
147
190
  reference_dataset.to_disc()
148
191
  if isinstance(corpus_dataset, Dataset):
149
192
  corpus_dataset.to_disc()
193
+ if isinstance(trace_dataset, TraceDataset):
194
+ trace_dataset.to_disc()
150
195
  # Initialize an app service that keeps the server running
151
196
  self.app_service = AppService(
152
197
  self.export_path,
198
+ self.host,
153
199
  self.port,
154
200
  self.primary_dataset.name,
155
201
  reference_dataset_name=(
@@ -158,6 +204,9 @@ class ProcessSession(Session):
158
204
  corpus_dataset_name=(
159
205
  self.corpus_dataset.name if self.corpus_dataset is not None else None
160
206
  ),
207
+ trace_dataset_name=(
208
+ self.trace_dataset.name if self.trace_dataset is not None else None
209
+ ),
161
210
  )
162
211
 
163
212
  @property
@@ -175,22 +224,28 @@ class ThreadSession(Session):
175
224
  primary_dataset: Dataset,
176
225
  reference_dataset: Optional[Dataset] = None,
177
226
  corpus_dataset: Optional[Dataset] = None,
227
+ trace_dataset: Optional[TraceDataset] = None,
228
+ host: Optional[str] = None,
178
229
  port: Optional[int] = None,
179
230
  ):
180
231
  super().__init__(
181
232
  primary_dataset=primary_dataset,
182
233
  reference_dataset=reference_dataset,
183
234
  corpus_dataset=corpus_dataset,
184
- port=port or pick_unused_port(),
235
+ trace_dataset=trace_dataset,
236
+ host=host,
237
+ port=port,
185
238
  )
186
239
  # Initialize an app service that keeps the server running
187
240
  self.app = create_app(
188
241
  export_path=self.export_path,
189
242
  model=self.model,
190
243
  corpus=self.corpus,
244
+ traces=self.traces,
191
245
  )
192
246
  self.server = ThreadServer(
193
247
  app=self.app,
248
+ host=self.host,
194
249
  port=self.port,
195
250
  ).run_in_thread()
196
251
  # start the server
@@ -206,11 +261,13 @@ class ThreadSession(Session):
206
261
 
207
262
 
208
263
  def launch_app(
209
- primary: Dataset,
264
+ primary: Optional[Dataset] = None,
210
265
  reference: Optional[Dataset] = None,
211
266
  corpus: Optional[Dataset] = None,
267
+ trace: Optional[TraceDataset] = None,
268
+ host: Optional[str] = None,
212
269
  port: Optional[int] = None,
213
- run_in_thread: Optional[bool] = True,
270
+ run_in_thread: bool = True,
214
271
  ) -> Optional[Session]:
215
272
  """
216
273
  Launches the phoenix application and returns a session to interact with.
@@ -224,8 +281,14 @@ def launch_app(
224
281
  If not provided, drift analysis will not be available.
225
282
  corpus : Dataset, optional
226
283
  The dataset containing corpus for LLM context retrieval.
284
+ trace: TraceDataset, optional
285
+ **Experimental** The trace dataset containing the trace data.
286
+ host: str, optional
287
+ The host on which the server runs. It can also be set using environment
288
+ variable `PHOENIX_HOST`, otherwise it defaults to `127.0.0.1`.
227
289
  port: int, optional
228
- The port on which the server listens.
290
+ The port on which the server listens. It can also be set using environment
291
+ variable `PHOENIX_PORT`, otherwise it defaults to 6060.
229
292
  run_in_thread: bool, optional, default=True
230
293
  Whether the server should run in a Thread or Process.
231
294
 
@@ -243,23 +306,32 @@ def launch_app(
243
306
  """
244
307
  global _session
245
308
 
309
+ # Stopgap solution to allow the app to run without a primary dataset
310
+ if primary is None:
311
+ # Dummy dataset
312
+ # TODO: pass through the lack of a primary dataset to the app
313
+ primary = EMPTY_DATASET
314
+
315
+ if _session is not None and _session.active:
316
+ logger.warning(
317
+ "Existing running Phoenix instance detected! Shutting "
318
+ "it down and starting a new instance..."
319
+ )
320
+ _session.end()
321
+
246
322
  if run_in_thread:
247
- if _session is not None and _session.active:
248
- logger.warning(
249
- "Existing running Phoenix instance detected! Shutting "
250
- "it down and starting a new instance..."
251
- )
252
- _session.end()
253
- _session = ThreadSession(primary, reference, corpus, port=port)
323
+ _session = ThreadSession(primary, reference, corpus, trace, host=host, port=port)
254
324
  # TODO: catch exceptions from thread
255
- if not _session.active:
256
- logger.error(
257
- "💥 Phoenix failed to start. Please try again or file an issue "
258
- "with us at https://github.com/Arize-ai/phoenix"
259
- )
260
- return None
261
325
  else:
262
- _session = ProcessSession(primary, reference, port=port)
326
+ _session = ProcessSession(primary, reference, corpus, trace, host=host, port=port)
327
+
328
+ if not _session.active:
329
+ logger.error(
330
+ f"💥 Phoenix failed to start. Please try again (making sure that "
331
+ f"port {port} is not occupied by another process) or file an issue "
332
+ f"with us at https://github.com/Arize-ai/phoenix"
333
+ )
334
+ return None
263
335
 
264
336
  print(f"🌍 To view the Phoenix app in your browser, visit {_session.url}")
265
337
  print("📺 To view the Phoenix app in a notebook, run `px.active_session().view()`")
@@ -288,14 +360,14 @@ def close_app() -> None:
288
360
  logger.info("Session closed")
289
361
 
290
362
 
291
- def _get_url(port: int, is_colab: bool) -> str:
363
+ def _get_url(host: str, port: int, is_colab: bool) -> str:
292
364
  """Determines the IFrame URL based on whether this is in a Colab or in a local notebook"""
293
365
  if is_colab:
294
366
  from google.colab.output import eval_js # type: ignore
295
367
 
296
368
  return str(eval_js(f"google.colab.kernel.proxyPort({port}, {{'cache': true}})"))
297
369
 
298
- return f"http://localhost:{port}/"
370
+ return f"http://{host}:{port}/"
299
371
 
300
372
 
301
373
  def _is_colab() -> bool:
@@ -0,0 +1,71 @@
1
+ import gzip
2
+ import logging
3
+ import weakref
4
+ from queue import SimpleQueue
5
+ from threading import Thread
6
+ from types import MethodType
7
+ from typing import Optional
8
+
9
+ from requests import Session
10
+
11
+ from phoenix.config import get_env_host, get_env_port
12
+ from phoenix.trace.schemas import Span
13
+ from phoenix.trace.v1 import encode
14
+
15
+ logger = logging.getLogger(__name__)
16
+ logger.addHandler(logging.NullHandler())
17
+
18
+
19
+ class NoOpExporter:
20
+ def export(self, span: Span) -> None:
21
+ pass
22
+
23
+
24
+ class HttpExporter:
25
+ def __init__(
26
+ self,
27
+ host: Optional[str] = None,
28
+ port: Optional[int] = None,
29
+ ) -> None:
30
+ self._host = host or get_env_host()
31
+ self._port = port or get_env_port()
32
+ self._url = f"http://{self._host}:{self._port}/v1/spans"
33
+ self._session = Session()
34
+ weakref.finalize(self, self._session.close)
35
+ self._session.headers.update(
36
+ {
37
+ "content-type": "application/x-protobuf",
38
+ "content-encoding": "gzip",
39
+ }
40
+ )
41
+ self._queue: "SimpleQueue[Optional[Span]]" = SimpleQueue()
42
+ # Putting `None` as the sentinel value for queue termination.
43
+ weakref.finalize(self, self._queue.put, None)
44
+ self._start_consumer()
45
+
46
+ def export(self, span: Span) -> None:
47
+ self._queue.put(span)
48
+
49
+ def _start_consumer(self) -> None:
50
+ Thread(
51
+ target=MethodType(
52
+ self.__class__._consume_spans,
53
+ weakref.proxy(self),
54
+ ),
55
+ daemon=True,
56
+ ).start()
57
+
58
+ def _consume_spans(self) -> None:
59
+ while True:
60
+ if not (span := self._queue.get()):
61
+ return
62
+ self._send(span)
63
+
64
+ def _send(self, span: Span) -> None:
65
+ pb_span = encode(span)
66
+ serialized = pb_span.SerializeToString()
67
+ data = gzip.compress(serialized)
68
+ try:
69
+ self._session.post(self._url, data=data)
70
+ except Exception as e:
71
+ logger.exception(e)
@@ -0,0 +1,181 @@
1
+ import ast
2
+ from typing import Any, Iterator, Mapping, Tuple, cast
3
+
4
+ from phoenix.trace import semantic_conventions
5
+ from phoenix.trace.schemas import Span
6
+
7
+
8
+ class SpanFilter:
9
+ def __init__(self, condition: str) -> None:
10
+ self._root = ast.parse(condition, mode="eval")
11
+ _validate_expression(self._root, condition)
12
+ self._translated = _Translator(condition).visit(self._root)
13
+ ast.fix_missing_locations(self._translated)
14
+ self._compiled = compile(self._translated, filename="", mode="eval")
15
+
16
+ def __call__(self, span: Span) -> bool:
17
+ return cast(bool, eval(self._compiled, {}, {"span": span, "_MISSING": _Missing()}))
18
+
19
+
20
+ def _replace_none_with_missing(
21
+ value: ast.expr,
22
+ as_str: bool = False,
23
+ ) -> ast.IfExp:
24
+ """
25
+ E.g. `value` becomes
26
+ `_MISSING if (_MAYBE := value) is None else _MAYBE`
27
+ """
28
+ _store_MAYBE = ast.Name(id="_MAYBE", ctx=ast.Store())
29
+ _load_MAYBE = ast.Name(id="_MAYBE", ctx=ast.Load())
30
+ return ast.IfExp(
31
+ test=ast.Compare(
32
+ left=ast.NamedExpr(target=_store_MAYBE, value=value),
33
+ ops=[ast.Is()],
34
+ comparators=[ast.Constant(value=None)],
35
+ ),
36
+ body=ast.Name(id="_MISSING", ctx=ast.Load()),
37
+ orelse=_as_str(_load_MAYBE) if as_str else _load_MAYBE,
38
+ )
39
+
40
+
41
+ def _as_str(value: ast.expr) -> ast.Call:
42
+ """E.g. `value` becomes `str(value)`"""
43
+ return ast.Call(func=ast.Name(id="str", ctx=ast.Load()), args=[value], keywords=[])
44
+
45
+
46
+ def _ast_replacement(expression: str) -> ast.expr:
47
+ as_str = expression in (
48
+ "span.status_code",
49
+ "span.span_kind",
50
+ "span.parent_id",
51
+ "span.context.span_id",
52
+ "span.context.trace_id",
53
+ )
54
+ return _replace_none_with_missing(ast.parse(expression, mode="eval").body, as_str)
55
+
56
+
57
+ def _allowed_replacements() -> Iterator[Tuple[str, ast.expr]]:
58
+ for source_segment, ast_replacement in {
59
+ "name": _ast_replacement("span.name"),
60
+ "status_code": _ast_replacement("span.status_code"),
61
+ "span_kind": _ast_replacement("span.span_kind"),
62
+ "parent_id": _ast_replacement("span.parent_id"),
63
+ }.items():
64
+ yield source_segment, ast_replacement
65
+ yield "span." + source_segment, ast_replacement
66
+ for source_segment, ast_replacement in {
67
+ "span_id": _ast_replacement("span.context.span_id"),
68
+ "trace_id": _ast_replacement("span.context.trace_id"),
69
+ }.items():
70
+ yield source_segment, ast_replacement
71
+ yield "context." + source_segment, ast_replacement
72
+ yield "span.context." + source_segment, ast_replacement
73
+ for source_segment, ast_replacement in {
74
+ field_name: _ast_replacement(f"span.attributes.get('{field_name}')")
75
+ for field_name in (
76
+ getattr(semantic_conventions, variable_name)
77
+ for variable_name in dir(semantic_conventions)
78
+ if variable_name.isupper()
79
+ and variable_name.startswith(("RETRIEVAL", "EMBEDDING", "LLM", "TOOL"))
80
+ )
81
+ }.items():
82
+ yield source_segment, ast_replacement
83
+ yield "attributes." + source_segment, ast_replacement
84
+ yield "span.attributes." + source_segment, ast_replacement
85
+
86
+
87
+ class _Translator(ast.NodeTransformer):
88
+ _allowed_fields: Mapping[str, ast.expr] = dict(_allowed_replacements())
89
+
90
+ def __init__(self, source: str) -> None:
91
+ # In Python 3.8, we have to use `ast.get_source_segment(source, node)`.
92
+ # In Python 3.9, we can use `ast.unparse(node)` instead.
93
+ self._source = source
94
+
95
+ def visit_Attribute(self, node: ast.Attribute) -> Any:
96
+ source_segment: str = cast(str, ast.get_source_segment(self._source, node))
97
+ if replacement := self._allowed_fields.get(source_segment):
98
+ return replacement
99
+ raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
100
+
101
+ def visit_Name(self, node: ast.Name) -> Any:
102
+ source_segment: str = cast(str, ast.get_source_segment(self._source, node))
103
+ if replacement := self._allowed_fields.get(source_segment):
104
+ return replacement
105
+ raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
106
+
107
+ def visit_Constant(self, node: ast.Constant) -> Any:
108
+ return ast.Name(id="_MISSING", ctx=ast.Load()) if node.value is None else node
109
+
110
+
111
+ def _validate_expression(expression: ast.Expression, source: str) -> None:
112
+ # In Python 3.8, we have to use `ast.get_source_segment(source, node)`.
113
+ # In Python 3.9, we can use `ast.unparse(node)` instead.
114
+ if not isinstance(expression, ast.Expression):
115
+ raise SyntaxError(f"invalid expression: {source}") # TODO: add details
116
+ for i, node in enumerate(ast.walk(expression.body)):
117
+ if i == 0:
118
+ if isinstance(node, (ast.BoolOp, ast.Compare)):
119
+ continue
120
+ elif isinstance(
121
+ node,
122
+ (
123
+ ast.Attribute,
124
+ ast.BinOp,
125
+ ast.BoolOp,
126
+ ast.Compare,
127
+ ast.Constant,
128
+ ast.Load,
129
+ ast.Name,
130
+ ast.Tuple,
131
+ ast.List,
132
+ ast.UnaryOp,
133
+ ast.boolop,
134
+ ast.cmpop,
135
+ ast.operator,
136
+ ast.unaryop,
137
+ ),
138
+ ):
139
+ continue
140
+ source_segment = cast(str, ast.get_source_segment(source, node))
141
+ raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
142
+
143
+
144
+ class _Missing:
145
+ """Falsifies all comparisons except those with self."""
146
+
147
+ def __lt__(self, other: Any) -> bool:
148
+ return False
149
+
150
+ def __le__(self, other: Any) -> bool:
151
+ return False
152
+
153
+ def __gt__(self, other: Any) -> bool:
154
+ return False
155
+
156
+ def __ge__(self, other: Any) -> bool:
157
+ return False
158
+
159
+ def __eq__(self, other: Any) -> bool:
160
+ return isinstance(other, _Missing)
161
+
162
+ def __ne__(self, other: Any) -> bool:
163
+ return False
164
+
165
+ def __len__(self) -> int:
166
+ return 0
167
+
168
+ def __iter__(self) -> Any:
169
+ return self
170
+
171
+ def __next__(self) -> Any:
172
+ raise StopIteration()
173
+
174
+ def __contains__(self, item: Any) -> bool:
175
+ return False
176
+
177
+ def __str__(self) -> str:
178
+ return ""
179
+
180
+ def __float__(self) -> float:
181
+ return float("nan")