arize-phoenix 4.21.0__py3-none-any.whl → 4.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (28) hide show
  1. {arize_phoenix-4.21.0.dist-info → arize_phoenix-4.22.1.dist-info}/METADATA +1 -1
  2. {arize_phoenix-4.21.0.dist-info → arize_phoenix-4.22.1.dist-info}/RECORD +25 -25
  3. phoenix/config.py +59 -1
  4. phoenix/db/migrations/future_versions/README.md +4 -0
  5. phoenix/db/migrations/future_versions/cd164e83824f_users_and_tokens.py +292 -0
  6. phoenix/db/migrations/versions/.gitignore +1 -0
  7. phoenix/db/models.py +61 -0
  8. phoenix/experiments/functions.py +4 -4
  9. phoenix/experiments/types.py +3 -3
  10. phoenix/server/api/context.py +0 -6
  11. phoenix/server/api/dataloaders/__init__.py +0 -9
  12. phoenix/server/api/routers/v1/experiment_runs.py +7 -2
  13. phoenix/server/api/types/Evaluation.py +1 -26
  14. phoenix/server/api/types/Project.py +1 -60
  15. phoenix/server/api/types/Span.py +1 -9
  16. phoenix/server/app.py +0 -11
  17. phoenix/server/dml_event_handler.py +0 -3
  18. phoenix/server/static/.vite/manifest.json +9 -9
  19. phoenix/server/static/assets/{components-D2V-mOGq.js → components-BC3-LP_a.js} +44 -44
  20. phoenix/server/static/assets/{index-B52Z3aZG.js → index-BjJvafYL.js} +1 -1
  21. phoenix/server/static/assets/{pages-CChOjmat.js → pages--n2933VW.js} +214 -195
  22. phoenix/version.py +1 -1
  23. phoenix/server/api/dataloaders/evaluation_summaries.py +0 -149
  24. phoenix/server/api/dataloaders/span_evaluations.py +0 -35
  25. phoenix/server/api/dataloaders/trace_evaluations.py +0 -35
  26. {arize_phoenix-4.21.0.dist-info → arize_phoenix-4.22.1.dist-info}/WHEEL +0 -0
  27. {arize_phoenix-4.21.0.dist-info → arize_phoenix-4.22.1.dist-info}/licenses/IP_NOTICE +0 -0
  28. {arize_phoenix-4.21.0.dist-info → arize_phoenix-4.22.1.dist-info}/licenses/LICENSE +0 -0
phoenix/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "4.21.0"
1
+ __version__ = "4.22.1"
@@ -1,149 +0,0 @@
1
- from collections import defaultdict
2
- from datetime import datetime
3
- from typing import (
4
- Any,
5
- DefaultDict,
6
- List,
7
- Literal,
8
- Optional,
9
- Tuple,
10
- )
11
-
12
- import pandas as pd
13
- from aioitertools.itertools import groupby
14
- from cachetools import LFUCache, TTLCache
15
- from sqlalchemy import Select, func, or_, select
16
- from strawberry.dataloader import AbstractCache, DataLoader
17
- from typing_extensions import TypeAlias, assert_never
18
-
19
- from phoenix.db import models
20
- from phoenix.server.api.dataloaders.cache import TwoTierCache
21
- from phoenix.server.api.input_types.TimeRange import TimeRange
22
- from phoenix.server.api.types.EvaluationSummary import EvaluationSummary
23
- from phoenix.server.types import DbSessionFactory
24
- from phoenix.trace.dsl import SpanFilter
25
-
26
- Kind: TypeAlias = Literal["span", "trace"]
27
- ProjectRowId: TypeAlias = int
28
- TimeInterval: TypeAlias = Tuple[Optional[datetime], Optional[datetime]]
29
- FilterCondition: TypeAlias = Optional[str]
30
- EvalName: TypeAlias = str
31
-
32
- Segment: TypeAlias = Tuple[Kind, ProjectRowId, TimeInterval, FilterCondition]
33
- Param: TypeAlias = EvalName
34
-
35
- Key: TypeAlias = Tuple[Kind, ProjectRowId, Optional[TimeRange], FilterCondition, EvalName]
36
- Result: TypeAlias = Optional[EvaluationSummary]
37
- ResultPosition: TypeAlias = int
38
- DEFAULT_VALUE: Result = None
39
-
40
-
41
- def _cache_key_fn(key: Key) -> Tuple[Segment, Param]:
42
- kind, project_rowid, time_range, filter_condition, eval_name = key
43
- interval = (
44
- (time_range.start, time_range.end) if isinstance(time_range, TimeRange) else (None, None)
45
- )
46
- return (kind, project_rowid, interval, filter_condition), eval_name
47
-
48
-
49
- _Section: TypeAlias = Tuple[ProjectRowId, EvalName, Kind]
50
- _SubKey: TypeAlias = Tuple[TimeInterval, FilterCondition]
51
-
52
-
53
- class EvaluationSummaryCache(
54
- TwoTierCache[Key, Result, _Section, _SubKey],
55
- ):
56
- def __init__(self) -> None:
57
- super().__init__(
58
- # TTL=3600 (1-hour) because time intervals are always moving forward, but
59
- # interval endpoints are rounded down to the hour by the UI, so anything
60
- # older than an hour most likely won't be a cache-hit anyway.
61
- main_cache=TTLCache(maxsize=64 * 32 * 2, ttl=3600),
62
- sub_cache_factory=lambda: LFUCache(maxsize=2 * 2),
63
- )
64
-
65
- def invalidate_project(self, project_rowid: ProjectRowId) -> None:
66
- for section in self._cache.keys():
67
- if section[0] == project_rowid:
68
- del self._cache[section]
69
-
70
- def _cache_key(self, key: Key) -> Tuple[_Section, _SubKey]:
71
- (kind, project_rowid, interval, filter_condition), eval_name = _cache_key_fn(key)
72
- return (project_rowid, eval_name, kind), (interval, filter_condition)
73
-
74
-
75
- class EvaluationSummaryDataLoader(DataLoader[Key, Result]):
76
- def __init__(
77
- self,
78
- db: DbSessionFactory,
79
- cache_map: Optional[AbstractCache[Key, Result]] = None,
80
- ) -> None:
81
- super().__init__(
82
- load_fn=self._load_fn,
83
- cache_key_fn=_cache_key_fn,
84
- cache_map=cache_map,
85
- )
86
- self._db = db
87
-
88
- async def _load_fn(self, keys: List[Key]) -> List[Result]:
89
- results: List[Result] = [DEFAULT_VALUE] * len(keys)
90
- arguments: DefaultDict[
91
- Segment,
92
- DefaultDict[Param, List[ResultPosition]],
93
- ] = defaultdict(lambda: defaultdict(list))
94
- for position, key in enumerate(keys):
95
- segment, param = _cache_key_fn(key)
96
- arguments[segment][param].append(position)
97
- for segment, params in arguments.items():
98
- stmt = _get_stmt(segment, *params.keys())
99
- async with self._db() as session:
100
- data = await session.stream(stmt)
101
- async for eval_name, group in groupby(data, lambda row: row.name):
102
- summary = EvaluationSummary(pd.DataFrame(group))
103
- for position in params[eval_name]:
104
- results[position] = summary
105
- return results
106
-
107
-
108
- def _get_stmt(
109
- segment: Segment,
110
- *eval_names: Param,
111
- ) -> Select[Any]:
112
- kind, project_rowid, (start_time, end_time), filter_condition = segment
113
- stmt = select()
114
- if kind == "span":
115
- msa = models.SpanAnnotation
116
- name_column, label_column, score_column = msa.name, msa.label, msa.score
117
- annotator_kind_column = msa.annotator_kind
118
- time_column = models.Span.start_time
119
- stmt = stmt.join(models.Span).join_from(models.Span, models.Trace)
120
- if filter_condition:
121
- sf = SpanFilter(filter_condition)
122
- stmt = sf(stmt)
123
- elif kind == "trace":
124
- mta = models.TraceAnnotation
125
- name_column, label_column, score_column = mta.name, mta.label, mta.score
126
- annotator_kind_column = mta.annotator_kind
127
- time_column = models.Trace.start_time
128
- stmt = stmt.join(models.Trace)
129
- else:
130
- assert_never(kind)
131
- stmt = stmt.add_columns(
132
- name_column,
133
- label_column,
134
- func.count().label("record_count"),
135
- func.count(label_column).label("label_count"),
136
- func.count(score_column).label("score_count"),
137
- func.sum(score_column).label("score_sum"),
138
- )
139
- stmt = stmt.group_by(name_column, label_column)
140
- stmt = stmt.order_by(name_column, label_column)
141
- stmt = stmt.where(models.Trace.project_rowid == project_rowid)
142
- stmt = stmt.where(annotator_kind_column == "LLM")
143
- stmt = stmt.where(or_(score_column.is_not(None), label_column.is_not(None)))
144
- stmt = stmt.where(name_column.in_(eval_names))
145
- if start_time:
146
- stmt = stmt.where(start_time <= time_column)
147
- if end_time:
148
- stmt = stmt.where(time_column < end_time)
149
- return stmt
@@ -1,35 +0,0 @@
1
- from collections import defaultdict
2
- from typing import (
3
- DefaultDict,
4
- List,
5
- )
6
-
7
- from sqlalchemy import select
8
- from strawberry.dataloader import DataLoader
9
- from typing_extensions import TypeAlias
10
-
11
- from phoenix.db import models
12
- from phoenix.server.api.types.Evaluation import SpanEvaluation
13
- from phoenix.server.types import DbSessionFactory
14
-
15
- Key: TypeAlias = int
16
- Result: TypeAlias = List[SpanEvaluation]
17
-
18
-
19
- class SpanEvaluationsDataLoader(DataLoader[Key, Result]):
20
- def __init__(self, db: DbSessionFactory) -> None:
21
- super().__init__(load_fn=self._load_fn)
22
- self._db = db
23
-
24
- async def _load_fn(self, keys: List[Key]) -> List[Result]:
25
- span_evaluations_by_id: DefaultDict[Key, Result] = defaultdict(list)
26
- msa = models.SpanAnnotation
27
- async with self._db() as session:
28
- data = await session.stream_scalars(
29
- select(msa).where(msa.span_rowid.in_(keys)).where(msa.annotator_kind == "LLM")
30
- )
31
- async for span_evaluation in data:
32
- span_evaluations_by_id[span_evaluation.span_rowid].append(
33
- SpanEvaluation.from_sql_span_annotation(span_evaluation)
34
- )
35
- return [span_evaluations_by_id[key] for key in keys]
@@ -1,35 +0,0 @@
1
- from collections import defaultdict
2
- from typing import (
3
- DefaultDict,
4
- List,
5
- )
6
-
7
- from sqlalchemy import select
8
- from strawberry.dataloader import DataLoader
9
- from typing_extensions import TypeAlias
10
-
11
- from phoenix.db import models
12
- from phoenix.server.api.types.Evaluation import TraceEvaluation
13
- from phoenix.server.types import DbSessionFactory
14
-
15
- Key: TypeAlias = int
16
- Result: TypeAlias = List[TraceEvaluation]
17
-
18
-
19
- class TraceEvaluationsDataLoader(DataLoader[Key, Result]):
20
- def __init__(self, db: DbSessionFactory) -> None:
21
- super().__init__(load_fn=self._load_fn)
22
- self._db = db
23
-
24
- async def _load_fn(self, keys: List[Key]) -> List[Result]:
25
- trace_evaluations_by_id: DefaultDict[Key, Result] = defaultdict(list)
26
- mta = models.TraceAnnotation
27
- async with self._db() as session:
28
- data = await session.stream_scalars(
29
- select(mta).where(mta.trace_rowid.in_(keys)).where(mta.annotator_kind == "LLM")
30
- )
31
- async for trace_evaluation in data:
32
- trace_evaluations_by_id[trace_evaluation.trace_rowid].append(
33
- TraceEvaluation.from_sql_trace_annotation(trace_evaluation)
34
- )
35
- return [trace_evaluations_by_id[key] for key in keys]