arize-phoenix 0.0.48__tar.gz → 0.0.50rc0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (152) hide show
  1. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/PKG-INFO +5 -4
  2. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/README.md +4 -3
  3. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/__init__.py +1 -1
  4. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/__init__.py +2 -1
  5. arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/functions/__init__.py +4 -0
  6. arize_phoenix-0.0.48/src/phoenix/experimental/evals/functions/binary.py → arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/functions/classify.py +81 -86
  7. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/models/openai.py +1 -1
  8. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/index.js +406 -396
  9. arize_phoenix-0.0.48/src/phoenix/experimental/evals/functions/__init__.py +0 -4
  10. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/.gitignore +0 -0
  11. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/IP_NOTICE +0 -0
  12. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/LICENSE +0 -0
  13. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/pyproject.toml +0 -0
  14. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/config.py +0 -0
  15. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/core/__init__.py +0 -0
  16. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/core/embedding_dimension.py +0 -0
  17. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/core/model.py +0 -0
  18. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/core/model_schema.py +0 -0
  19. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/core/model_schema_adapter.py +0 -0
  20. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/core/traces.py +0 -0
  21. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/datasets/__init__.py +0 -0
  22. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/datasets/dataset.py +0 -0
  23. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/datasets/errors.py +0 -0
  24. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/datasets/fixtures.py +0 -0
  25. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/datasets/schema.py +0 -0
  26. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/datasets/validation.py +0 -0
  27. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/datetime_utils.py +0 -0
  28. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/__init__.py +0 -0
  29. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/functions/generate.py +0 -0
  30. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/functions/processing.py +0 -0
  31. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/models/__init__.py +0 -0
  32. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/models/base.py +0 -0
  33. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/models/bedrock.py +0 -0
  34. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/models/vertexai.py +0 -0
  35. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/retrievals.py +0 -0
  36. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/templates/__init__.py +0 -0
  37. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/templates/default_templates.py +0 -0
  38. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/templates/template.py +0 -0
  39. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/utils/__init__.py +0 -0
  40. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/utils/downloads.py +0 -0
  41. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/utils/threads.py +0 -0
  42. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/utils/types.py +0 -0
  43. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/experimental/evals/utils.py +0 -0
  44. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/metrics/README.md +0 -0
  45. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/metrics/__init__.py +0 -0
  46. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/metrics/binning.py +0 -0
  47. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/metrics/metrics.py +0 -0
  48. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/metrics/mixins.py +0 -0
  49. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/metrics/timeseries.py +0 -0
  50. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/metrics/wrappers.py +0 -0
  51. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/pointcloud/__init__.py +0 -0
  52. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/pointcloud/clustering.py +0 -0
  53. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/pointcloud/pointcloud.py +0 -0
  54. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/pointcloud/projectors.py +0 -0
  55. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/pointcloud/umap_parameters.py +0 -0
  56. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/py.typed +0 -0
  57. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/__init__.py +0 -0
  58. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/__init__.py +0 -0
  59. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/context.py +0 -0
  60. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/helpers.py +0 -0
  61. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
  62. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
  63. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
  64. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
  65. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  66. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/Granularity.py +0 -0
  67. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
  68. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
  69. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  70. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/input_types/__init__.py +0 -0
  71. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/interceptor.py +0 -0
  72. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/schema.py +0 -0
  73. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/Cluster.py +0 -0
  74. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  75. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/Dataset.py +0 -0
  76. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/DatasetInfo.py +0 -0
  77. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/DatasetRole.py +0 -0
  78. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/DatasetValues.py +0 -0
  79. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/Dimension.py +0 -0
  80. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  81. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/DimensionShape.py +0 -0
  82. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/DimensionType.py +0 -0
  83. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  84. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
  85. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  86. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/Event.py +0 -0
  87. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  88. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/ExportEventsMutation.py +0 -0
  89. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/ExportedFile.py +0 -0
  90. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/Functionality.py +0 -0
  91. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/MimeType.py +0 -0
  92. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/Model.py +0 -0
  93. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/NumericRange.py +0 -0
  94. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
  95. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/PromptResponse.py +0 -0
  96. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/Retrieval.py +0 -0
  97. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
  98. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/Segments.py +0 -0
  99. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/SortDir.py +0 -0
  100. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/Span.py +0 -0
  101. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/TimeSeries.py +0 -0
  102. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
  103. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/ValidationResult.py +0 -0
  104. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
  105. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/__init__.py +0 -0
  106. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/node.py +0 -0
  107. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/api/types/pagination.py +0 -0
  108. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/app.py +0 -0
  109. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/main.py +0 -0
  110. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/span_handler.py +0 -0
  111. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  112. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  113. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  114. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  115. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  116. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  117. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  118. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  119. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/favicon.ico +0 -0
  120. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/index.css +0 -0
  121. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/static/modernizr.js +0 -0
  122. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/templates/__init__.py +0 -0
  123. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/templates/index.html +0 -0
  124. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/server/thread_server.py +0 -0
  125. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/services.py +0 -0
  126. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/session/__init__.py +0 -0
  127. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/session/session.py +0 -0
  128. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/__init__.py +0 -0
  129. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/exporter.py +0 -0
  130. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/filter.py +0 -0
  131. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/fixtures.py +0 -0
  132. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/langchain/__init__.py +0 -0
  133. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/langchain/instrumentor.py +0 -0
  134. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/langchain/tracer.py +0 -0
  135. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/llama_index/__init__.py +0 -0
  136. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/llama_index/callback.py +0 -0
  137. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/llama_index/debug_callback.py +0 -0
  138. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/openai/__init__.py +0 -0
  139. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/openai/instrumentor.py +0 -0
  140. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/schemas.py +0 -0
  141. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/semantic_conventions.py +0 -0
  142. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/span_json_decoder.py +0 -0
  143. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/span_json_encoder.py +0 -0
  144. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/trace_dataset.py +0 -0
  145. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/tracer.py +0 -0
  146. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/utils.py +0 -0
  147. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/v1/__init__.py +0 -0
  148. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/v1/trace_pb2.py +0 -0
  149. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/trace/v1/trace_pb2.pyi +0 -0
  150. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/utilities/__init__.py +0 -0
  151. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/utilities/error_handling.py +0 -0
  152. {arize_phoenix-0.0.48 → arize_phoenix-0.0.50rc0}/src/phoenix/utilities/logging.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arize-phoenix
3
- Version: 0.0.48
3
+ Version: 0.0.50rc0
4
4
  Summary: ML Observability in your notebook
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -196,6 +196,7 @@ Launch Phoenix in a notebook and view the traces of your LangChain application i
196
196
  ```python
197
197
  import phoenix as px
198
198
  import pandas as pd
199
+ import numpy as np
199
200
 
200
201
  # Launch phoenix
201
202
  session = px.launch_app()
@@ -219,7 +220,7 @@ documents_df = pd.read_parquet(
219
220
  "http://storage.googleapis.com/arize-assets/phoenix/datasets/unstructured/llm/context-retrieval/langchain-pinecone/database.parquet"
220
221
  )
221
222
  knn_retriever = KNNRetriever(
222
- index=np.stack(df["text_vector"]),
223
+ index=np.stack(documents_df["text_vector"]),
223
224
  texts=documents_df["text"].tolist(),
224
225
  embeddings=OpenAIEmbeddings(),
225
226
  )
@@ -270,7 +271,7 @@ from phoenix.experimental.evals import (
270
271
  RAG_RELEVANCY_PROMPT_RAILS_MAP,
271
272
  OpenAIModel,
272
273
  download_benchmark_dataset,
273
- llm_eval_binary,
274
+ llm_classify,
274
275
  )
275
276
  from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, ConfusionMatrixDisplay
276
277
 
@@ -291,7 +292,7 @@ model = OpenAIModel(
291
292
  temperature=0.0,
292
293
  )
293
294
  rails =list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values())
294
- df["eval_relevance"] = llm_eval_binary(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE_STR, rails)
295
+ df["eval_relevance"] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE_STR, rails)
295
296
  #Golden dataset has True/False map to -> "irrelevant" / "relevant"
296
297
  #we can then scikit compare to output of template - same format
297
298
  y_true = df["relevant"].map({True: "relevant", False: "irrelevant"})
@@ -141,6 +141,7 @@ Launch Phoenix in a notebook and view the traces of your LangChain application i
141
141
  ```python
142
142
  import phoenix as px
143
143
  import pandas as pd
144
+ import numpy as np
144
145
 
145
146
  # Launch phoenix
146
147
  session = px.launch_app()
@@ -164,7 +165,7 @@ documents_df = pd.read_parquet(
164
165
  "http://storage.googleapis.com/arize-assets/phoenix/datasets/unstructured/llm/context-retrieval/langchain-pinecone/database.parquet"
165
166
  )
166
167
  knn_retriever = KNNRetriever(
167
- index=np.stack(df["text_vector"]),
168
+ index=np.stack(documents_df["text_vector"]),
168
169
  texts=documents_df["text"].tolist(),
169
170
  embeddings=OpenAIEmbeddings(),
170
171
  )
@@ -215,7 +216,7 @@ from phoenix.experimental.evals import (
215
216
  RAG_RELEVANCY_PROMPT_RAILS_MAP,
216
217
  OpenAIModel,
217
218
  download_benchmark_dataset,
218
- llm_eval_binary,
219
+ llm_classify,
219
220
  )
220
221
  from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, ConfusionMatrixDisplay
221
222
 
@@ -236,7 +237,7 @@ model = OpenAIModel(
236
237
  temperature=0.0,
237
238
  )
238
239
  rails =list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values())
239
- df["eval_relevance"] = llm_eval_binary(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE_STR, rails)
240
+ df["eval_relevance"] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE_STR, rails)
240
241
  #Golden dataset has True/False map to -> "irrelevant" / "relevant"
241
242
  #we can then scikit compare to output of template - same format
242
243
  y_true = df["relevant"].map({True: "relevant", False: "irrelevant"})
@@ -5,7 +5,7 @@ from .session.session import Session, active_session, close_app, launch_app
5
5
  from .trace.fixtures import load_example_traces
6
6
  from .trace.trace_dataset import TraceDataset
7
7
 
8
- __version__ = "0.0.48"
8
+ __version__ = "0.0.50rc"
9
9
 
10
10
  # module level doc-string
11
11
  __doc__ = """
@@ -1,4 +1,4 @@
1
- from .functions import llm_eval_binary, llm_generate, run_relevance_eval
1
+ from .functions import llm_classify, llm_eval_binary, llm_generate, run_relevance_eval
2
2
  from .models import OpenAIModel, VertexAIModel
3
3
  from .retrievals import compute_precisions_at_k
4
4
  from .templates import (
@@ -18,6 +18,7 @@ from .utils.downloads import download_benchmark_dataset
18
18
  __all__ = [
19
19
  "compute_precisions_at_k",
20
20
  "download_benchmark_dataset",
21
+ "llm_classify",
21
22
  "llm_eval_binary",
22
23
  "llm_generate",
23
24
  "OpenAIModel",
@@ -0,0 +1,4 @@
1
+ from .classify import llm_classify, llm_eval_binary, run_relevance_eval
2
+ from .generate import llm_generate
3
+
4
+ __all__ = ["llm_classify", "llm_eval_binary", "run_relevance_eval", "llm_generate"]
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Any, Iterable, List, Optional, Set, Union, cast
2
+ import warnings
3
+ from typing import Any, Iterable, List, Optional, Union, cast
3
4
 
4
5
  import pandas as pd
5
6
 
@@ -22,7 +23,7 @@ OPENINFERENCE_QUERY_COLUMN_NAME = "attributes." + INPUT_VALUE
22
23
  OPENINFERENCE_DOCUMENT_COLUMN_NAME = "attributes." + RETRIEVAL_DOCUMENTS
23
24
 
24
25
 
25
- def llm_eval_binary(
26
+ def llm_classify(
26
27
  dataframe: pd.DataFrame,
27
28
  model: BaseEvalModel,
28
29
  template: Union[PromptTemplate, str],
@@ -30,7 +31,7 @@ def llm_eval_binary(
30
31
  system_instruction: Optional[str] = None,
31
32
  verbose: bool = False,
32
33
  ) -> List[str]:
33
- """Runs binary classifications using an LLM.
34
+ """Classifies each input row of the dataframe using an LLM.
34
35
 
35
36
  Args:
36
37
  dataframe (pandas.DataFrame): A pandas dataframe in which each row represents a record to be
@@ -62,9 +63,62 @@ def llm_eval_binary(
62
63
  eval_template = normalize_template(template)
63
64
  prompts = map_template(dataframe, eval_template)
64
65
  responses = verbose_model.generate(prompts.to_list(), instruction=system_instruction)
65
- rails_set = set(rails)
66
- printif(verbose, f"Snapping {len(responses)} responses to rails: {rails_set}")
67
- return [_snap_to_rail(response, rails_set, verbose=verbose) for response in responses]
66
+ printif(verbose, f"Snapping {len(responses)} responses to rails: {rails}")
67
+ return [_snap_to_rail(response, rails, verbose=verbose) for response in responses]
68
+
69
+
70
+ def llm_eval_binary(
71
+ dataframe: pd.DataFrame,
72
+ model: BaseEvalModel,
73
+ template: Union[PromptTemplate, str],
74
+ rails: List[str],
75
+ system_instruction: Optional[str] = None,
76
+ verbose: bool = False,
77
+ ) -> List[str]:
78
+ """Performs a binary classification on the rows of the input dataframe using an LLM.
79
+
80
+ Args:
81
+ dataframe (pandas.DataFrame): A pandas dataframe in which each row represents a record to be
82
+ classified. All template variable names must appear as column names in the dataframe (extra
83
+ columns unrelated to the template are permitted).
84
+
85
+ template (Union[PromptTemplate, str]): The prompt template as either an instance of
86
+ PromptTemplate or a string. If the latter, the variable names should be surrounded by
87
+ curly braces so that a call to `.format` can be made to substitute variable values.
88
+
89
+ model (BaseEvalModel): An LLM model class.
90
+
91
+ rails (List[str]): A list of strings representing the possible output classes of the model's
92
+ predictions.
93
+
94
+ system_instruction (Optional[str], optional): An optional system message.
95
+
96
+ verbose (bool, optional): If True, prints detailed info to stdout such as model invocation
97
+ parameters and details about retries and snapping to rails. Default False.
98
+
99
+ Returns:
100
+ List[str]: A list of strings representing the predicted class for each record in the
101
+ dataframe. The list should have the same length as the input dataframe and its values should
102
+ be the entries in the rails argument or "NOT_PARSABLE" if the model's prediction could not
103
+ be parsed.
104
+ """
105
+
106
+ warnings.warn(
107
+ "This function will soon be deprecated. "
108
+ "Use llm_classify instead, which has the same function signature "
109
+ "and provides support for multi-class classification "
110
+ "in addition to binary classification.",
111
+ category=DeprecationWarning,
112
+ stacklevel=2,
113
+ )
114
+ return llm_classify(
115
+ dataframe=dataframe,
116
+ model=model,
117
+ template=template,
118
+ rails=rails,
119
+ system_instruction=system_instruction,
120
+ verbose=verbose,
121
+ )
68
122
 
69
123
 
70
124
  def run_relevance_eval(
@@ -161,7 +215,7 @@ def run_relevance_eval(
161
215
  indexes.append(index)
162
216
  expanded_queries.append(query)
163
217
  expanded_documents.append(document)
164
- predictions = llm_eval_binary(
218
+ predictions = llm_classify(
165
219
  dataframe=pd.DataFrame(
166
220
  {
167
221
  query_column_name: expanded_queries,
@@ -188,92 +242,33 @@ def _get_contents_from_openinference_documents(documents: Iterable[Any]) -> List
188
242
  return [doc.get(DOCUMENT_CONTENT) if isinstance(doc, dict) else None for doc in documents]
189
243
 
190
244
 
191
- def _snap_to_rail(string: str, rails: Set[str], verbose: bool = False) -> str:
245
+ def _snap_to_rail(raw_string: str, rails: List[str], verbose: bool = False) -> str:
192
246
  """
193
- Snaps a string to the nearest rail, or returns None if the string cannot be snapped to a
194
- rail.
247
+ Snaps a string to the nearest rail, or returns None if the string cannot be
248
+ snapped to a rail.
195
249
 
196
250
  Args:
197
- string (str): An input to be snapped to a rail.
251
+ raw_string (str): An input to be snapped to a rail.
198
252
 
199
- rails (Set[str]): The target set of strings to snap to.
253
+ rails (List[str]): The target set of strings to snap to.
200
254
 
201
255
  Returns:
202
- str: A string from the rails argument or None if the input string could not be snapped.
256
+ str: A string from the rails argument or "UNPARSABLE" if the input
257
+ string could not be snapped.
203
258
  """
204
259
 
205
- processed_string = string.strip()
206
- rails_list = list(rails)
207
- rail = _extract_rail(processed_string, rails_list[0], rails_list[1])
208
- if not rail:
209
- printif(verbose, f"- Cannot snap {repr(string)} to rails: {rails}")
210
- logger.warning(
211
- f"LLM output cannot be snapped to rails {list(rails)}, returning {NOT_PARSABLE}. "
212
- f'Output: "{string}"'
213
- )
260
+ snap_string = raw_string.lower()
261
+ rails = list(set(rails))
262
+ rails = [rail.lower() for rail in rails]
263
+ rails.sort(key=len, reverse=True)
264
+ found_rails = set()
265
+ for rail in rails:
266
+ if rail in snap_string:
267
+ found_rails.add(rail)
268
+ snap_string = snap_string.replace(rail, "")
269
+ if len(found_rails) != 1:
270
+ printif(verbose, f"- Cannot snap {repr(raw_string)} to rails")
214
271
  return NOT_PARSABLE
215
- else:
216
- printif(verbose, f"- Snapped {repr(string)} to rail: {rail}")
272
+ rail = list(found_rails)[0]
273
+ printif(verbose, f"- Snapped {repr(raw_string)} to rail: {rail}")
217
274
  return rail
218
-
219
-
220
- def _extract_rail(string: str, positive_rail: str, negative_rail: str) -> Optional[str]:
221
- """
222
- Extracts the right rails text from the llm output. If the rails have overlapping characters,
223
- (e.x. "regular" and "irregular"), it also ensures that the correct rail is returned.
224
-
225
- Args:
226
- string (str): An input to be snapped to a rail.
227
-
228
- positive_rail (str): The positive rail (e.x. toxic)
229
-
230
- negative_rail (str): The negative rail. (e.x. non-toxic)
231
-
232
- Returns:
233
- str: A string from the rails or None if the input string could not be extracted.
234
-
235
- Examples:
236
- given: positive_rail = "irregular", negative_rail = "regular"
237
-
238
- string = "irregular"
239
- Output: "irregular"
240
-
241
- string = "regular"
242
- Output: "regular"
243
-
244
- string = "regular,:....random"
245
- Output: "regular"
246
-
247
- string = "regular..irregular" - contains both rails
248
- Output: None
249
-
250
- string = "Irregular"
251
- Output: "irregular"
252
- """
253
-
254
- # Convert the inputs to lowercase for case-insensitive matching
255
- string = string.lower()
256
- positive_rail = positive_rail.lower()
257
- negative_rail = negative_rail.lower()
258
-
259
- positive_pos, negative_pos = string.find(positive_rail), string.find(negative_rail)
260
-
261
- # If both positive and negative rails are in the string
262
- if positive_pos != -1 and negative_pos != -1:
263
- # If either one is a substring of the other, return the longer one
264
- # e.x. "regular" and "irregular"
265
- if positive_pos < negative_pos < positive_pos + len(
266
- positive_rail
267
- ) or negative_pos < positive_pos < negative_pos + len(negative_rail):
268
- # Return the longer of the rails since it means the LLM returned the longer one
269
- return max(positive_rail, negative_rail, key=len)
270
- else:
271
- # If both rails values are in the string, we cannot determine which to return
272
- return None
273
- # If only positive is in string
274
- elif positive_pos != -1:
275
- return positive_rail
276
- # If only negative is in the string
277
- elif negative_pos != -1:
278
- return negative_rail
279
- return None
@@ -56,7 +56,7 @@ class OpenAIModel(BaseEvalModel):
56
56
  """Batch size to use when passing multiple documents to generate."""
57
57
  request_timeout: Optional[Union[float, Tuple[float, float]]] = None
58
58
  """Timeout for requests to OpenAI completion API. Default is 600 seconds."""
59
- max_retries: int = 6
59
+ max_retries: int = 20
60
60
  """Maximum number of retries to make when generating."""
61
61
  retry_min_seconds: int = 10
62
62
  """Minimum number of seconds to wait when retrying."""