arize-phoenix 0.0.30__py3-none-any.whl → 0.0.31rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arize-phoenix
3
- Version: 0.0.30
3
+ Version: 0.0.31rc2
4
4
  Summary: ML Observability in your notebook
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -41,6 +41,9 @@ Requires-Dist: pytest; extra == 'dev'
41
41
  Requires-Dist: pytest-cov; extra == 'dev'
42
42
  Requires-Dist: pytest-lazy-fixture; extra == 'dev'
43
43
  Requires-Dist: strawberry-graphql[debug-server]==0.178.0; extra == 'dev'
44
+ Provides-Extra: experimental
45
+ Requires-Dist: openai; extra == 'experimental'
46
+ Requires-Dist: tenacity; extra == 'experimental'
44
47
  Description-Content-Type: text/markdown
45
48
 
46
49
  <p align="center">
@@ -1,4 +1,4 @@
1
- phoenix/__init__.py,sha256=2qq5YT2m8nKOG5SGFwBiQNE-v7748SVHl9VCK6stMhc,1114
1
+ phoenix/__init__.py,sha256=sfBQ2lmi-mb29hU-W-ZqstA9EALGX5n--2dKGf1PSJE,1192
2
2
  phoenix/config.py,sha256=tjNn9oqDxQmeO85sCchLlTsDiRJ6AoK0CTt_Uc_hrKM,1442
3
3
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
4
4
  phoenix/services.py,sha256=BlQF7lTQwhXUHBZBjZOoTnDM2Qni_hziUXsfp1Hux4Q,3978
@@ -15,6 +15,9 @@ phoenix/datasets/errors.py,sha256=-Iyk8rsvP_KX-P4gOqjm26slkDq1-9CohK07_LkrYCI,81
15
15
  phoenix/datasets/fixtures.py,sha256=0_PacL3dw49zulKpFpPdhvxJxeGmHTguqIyf2VXkBkk,19158
16
16
  phoenix/datasets/schema.py,sha256=HlM0f-pLFul2sYyHZM-Av8OFxLFkn57dkK_BWbMzyJY,6668
17
17
  phoenix/datasets/validation.py,sha256=dZ9lCFUV0EY7HCkQkQBrs-GLAEIZdpOqUxwD5l4dp88,8294
18
+ phoenix/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ phoenix/experimental/evals/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ phoenix/experimental/evals/retrievals.py,sha256=3PBv2yFIcxdn_y-L6We554TsozqCDjTVBE8017RB2mY,3793
18
21
  phoenix/metrics/README.md,sha256=5gekqTU-5gGdMwvcfNp2Wlu8p1ul9kGY_jq0XXQusoI,1964
19
22
  phoenix/metrics/__init__.py,sha256=sLp7td1GIt_0Z8dPUyP4L0-_4x9c871yAaGX30oMsvg,2433
20
23
  phoenix/metrics/binning.py,sha256=CXPPcAkRmmR__IG36a6UGs5RBtgXXPuWQbafPtuG1ww,12787
@@ -84,12 +87,12 @@ phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZV
84
87
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
85
88
  phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
86
89
  phoenix/server/static/index.html,sha256=xPZZH-y4dWlbDutPEV1k0rhmWJtIV-Db9aYP-dEc7wM,703
87
- phoenix/server/static/index.js,sha256=Z7vMbIgbNLvLrx-FNUc84R8qvqfFm7XhQ2fCKZ3LvLU,2534545
90
+ phoenix/server/static/index.js,sha256=SJnIFu7ufB_k38YBCI7D1btTSq4mmE7WkUf1iOBxiGw,2573278
88
91
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
89
92
  phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
93
  phoenix/session/session.py,sha256=WwGH7qStR4kAhmrRsw35BwXvCQDnEWpGm0crjyrWTvs,9519
91
- arize_phoenix-0.0.30.dist-info/METADATA,sha256=zvzU09aJcBiVSBcrxJnY0OBRX6UBq_gyoshQtEp4S3s,10849
92
- arize_phoenix-0.0.30.dist-info/WHEEL,sha256=9QBuHhg6FNW7lppboF2vKVbCGTVzsFykgRQjjlajrhA,87
93
- arize_phoenix-0.0.30.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
94
- arize_phoenix-0.0.30.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
95
- arize_phoenix-0.0.30.dist-info/RECORD,,
94
+ arize_phoenix-0.0.31rc2.dist-info/METADATA,sha256=iRpUMDo-rwHxcjDEUL4JY3HRi0Zma4Yz9nNWu0Z22hI,10977
95
+ arize_phoenix-0.0.31rc2.dist-info/WHEEL,sha256=hKi7AIIx6qfnsRbr087vpeJnrVUuDokDHZacPPMW7-Y,87
96
+ arize_phoenix-0.0.31rc2.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
97
+ arize_phoenix-0.0.31rc2.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
98
+ arize_phoenix-0.0.31rc2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.18.0
2
+ Generator: hatchling 1.12.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
phoenix/__init__.py CHANGED
@@ -2,8 +2,9 @@ from .datasets.dataset import Dataset
2
2
  from .datasets.fixtures import ExampleDatasets, load_example
3
3
  from .datasets.schema import EmbeddingColumnNames, RetrievalEmbeddingColumnNames, Schema
4
4
  from .session.session import Session, active_session, close_app, launch_app
5
+ from .trace.fixtures import load_example_traces
5
6
 
6
- __version__ = "0.0.30"
7
+ __version__ = "0.0.31rc2"
7
8
 
8
9
  # module level doc-string
9
10
  __doc__ = """
@@ -32,4 +33,5 @@ __all__ = [
32
33
  "close_app",
33
34
  "launch_app",
34
35
  "Session",
36
+ "load_example_traces",
35
37
  ]
File without changes
File without changes
@@ -0,0 +1,91 @@
1
+ """
2
+ Helper functions for evaluating the retrieval step of retrieval-augmented generation.
3
+ """
4
+
5
+ from typing import List, Optional
6
+
7
+ import openai
8
+ from tenacity import (
9
+ retry,
10
+ stop_after_attempt,
11
+ wait_random_exponential,
12
+ )
13
+
14
+ _EVALUATION_SYSTEM_MESSAGE = (
15
+ "You will be given a query and a reference text. "
16
+ "You must determine whether the reference text contains an answer to the input query. "
17
+ 'Your response must be single word, either "relevant" or "irrelevant", '
18
+ "and should not contain any text or characters aside from that word. "
19
+ '"irrelevant" means that the reference text does not contain an answer to the query. '
20
+ '"relevant" means the reference text contains an answer to the query.'
21
+ )
22
+ _QUERY_CONTEXT_PROMPT_TEMPLATE = """# Query: {query}
23
+
24
+ # Reference: {reference}
25
+
26
+ # Answer ("relevant" or "irrelevant"): """
27
+
28
+
29
+ def compute_precisions_at_k(
30
+ relevance_classifications: List[Optional[bool]],
31
+ ) -> List[Optional[float]]:
32
+ """Given a list of relevance classifications, computes precision@k for k = 1, 2, ..., n, where
33
+ n is the length of the input list.
34
+
35
+ Args:
36
+ relevance_classifications (List[Optional[bool]]): A list of relevance classifications for a
37
+ set of retrieved documents, sorted by order of retrieval (i.e., the first element is the
38
+ classification for the first retrieved document, the second element is the
39
+ classification for the second retrieved document, etc.). The list may contain None
40
+ values, which indicate that the relevance classification for the corresponding document
41
+ is unknown.
42
+
43
+ Returns:
44
+ List[Optional[float]]: A list of precision@k values for k = 1, 2, ..., n, where n is the
45
+ length of the input list. The first element is the precision@1 value, the second element
46
+ is the precision@2 value, etc. If the input list contains any None values, those values
47
+ are omitted when computing the precision@k values.
48
+ """
49
+ precisions_at_k = []
50
+ num_relevant_classifications = 0
51
+ num_non_none_classifications = 0
52
+ for relevance_classification in relevance_classifications:
53
+ if isinstance(relevance_classification, bool):
54
+ num_non_none_classifications += 1
55
+ num_relevant_classifications += int(relevance_classification)
56
+ precisions_at_k.append(
57
+ num_relevant_classifications / num_non_none_classifications
58
+ if num_non_none_classifications > 0
59
+ else None
60
+ )
61
+ return precisions_at_k
62
+
63
+
64
+ @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
65
+ def classify_relevance(query: str, document: str, model_name: str) -> Optional[bool]:
66
+ """Given a query and a document, determines whether the document contains an answer to the
67
+ query.
68
+
69
+ Args:
70
+ query (str): The query text. document (str): The document text. model_name (str): The name
71
+ of the OpenAI API model to use for the classification.
72
+
73
+ Returns:
74
+ Optional[bool]: A boolean indicating whether the document contains an answer to the query
75
+ (True meaning relevant, False meaning irrelevant), or None if the LLM produces an
76
+ unparseable output.
77
+ """
78
+ prompt = _QUERY_CONTEXT_PROMPT_TEMPLATE.format(
79
+ query=query,
80
+ reference=document,
81
+ )
82
+ response = openai.ChatCompletion.create(
83
+ messages=[
84
+ {"role": "system", "content": _EVALUATION_SYSTEM_MESSAGE},
85
+ {"role": "user", "content": prompt},
86
+ ],
87
+ model=model_name,
88
+ )
89
+ raw_response_text = str(response["choices"][0]["message"]["content"]).strip()
90
+ relevance_classification = {"relevant": True, "irrelevant": False}.get(raw_response_text)
91
+ return relevance_classification