arize-phoenix 2.0.0__tar.gz → 2.2.0rc0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (177) hide show
  1. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/PKG-INFO +5 -1
  2. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/pyproject.toml +7 -3
  3. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/__init__.py +2 -2
  4. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/core/evals.py +29 -8
  5. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/core/traces.py +45 -34
  6. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/__init__.py +4 -1
  7. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/evaluators.py +85 -8
  8. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/functions/classify.py +16 -41
  9. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/functions/executor.py +1 -0
  10. arize_phoenix-2.2.0rc0/src/phoenix/experimental/evals/models/anthropic.py +171 -0
  11. arize_phoenix-2.2.0rc0/src/phoenix/experimental/evals/models/vertex.py +155 -0
  12. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/templates/__init__.py +2 -0
  13. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/templates/default_templates.py +12 -0
  14. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/utils/__init__.py +64 -2
  15. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/schema.py +24 -0
  16. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/app.py +6 -5
  17. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/main.py +6 -7
  18. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/span_handler.py +7 -7
  19. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/index.js +586 -499
  20. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/templates/index.html +5 -1
  21. arize_phoenix-2.2.0rc0/src/phoenix/server/trace_handler.py +56 -0
  22. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/session/session.py +2 -1
  23. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/exporter.py +4 -3
  24. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/langchain/tracer.py +14 -4
  25. arize_phoenix-2.2.0rc0/src/phoenix/trace/otel.py +409 -0
  26. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/semantic_conventions.py +2 -0
  27. arize_phoenix-2.2.0rc0/src/phoenix/trace/v1/__init__.py +5 -0
  28. arize_phoenix-2.2.0rc0/src/phoenix/version.py +1 -0
  29. arize_phoenix-2.0.0/src/phoenix/trace/v1/__init__.py +0 -9
  30. arize_phoenix-2.0.0/src/phoenix/trace/v1/trace_pb2.py +0 -54
  31. arize_phoenix-2.0.0/src/phoenix/trace/v1/trace_pb2.pyi +0 -361
  32. arize_phoenix-2.0.0/src/phoenix/trace/v1/utils.py +0 -538
  33. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/.gitignore +0 -0
  34. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/IP_NOTICE +0 -0
  35. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/LICENSE +0 -0
  36. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/README.md +0 -0
  37. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/config.py +0 -0
  38. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/core/__init__.py +0 -0
  39. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/core/embedding_dimension.py +0 -0
  40. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/core/model.py +0 -0
  41. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/core/model_schema.py +0 -0
  42. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/core/model_schema_adapter.py +0 -0
  43. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/datasets/__init__.py +0 -0
  44. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/datasets/dataset.py +0 -0
  45. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/datasets/errors.py +0 -0
  46. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/datasets/fixtures.py +0 -0
  47. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/datasets/schema.py +0 -0
  48. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/datasets/validation.py +0 -0
  49. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/datetime_utils.py +0 -0
  50. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/exceptions.py +0 -0
  51. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/__init__.py +0 -0
  52. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/functions/__init__.py +0 -0
  53. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/functions/generate.py +0 -0
  54. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/functions/processing.py +0 -0
  55. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/models/__init__.py +0 -0
  56. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/models/base.py +0 -0
  57. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/models/bedrock.py +0 -0
  58. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/models/litellm.py +0 -0
  59. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/models/openai.py +0 -0
  60. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/models/rate_limiters.py +0 -0
  61. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/models/vertexai.py +0 -0
  62. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/retrievals.py +0 -0
  63. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/templates/template.py +0 -0
  64. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/experimental/evals/utils/threads.py +0 -0
  65. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/metrics/README.md +0 -0
  66. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/metrics/__init__.py +0 -0
  67. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/metrics/binning.py +0 -0
  68. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/metrics/metrics.py +0 -0
  69. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/metrics/mixins.py +0 -0
  70. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/metrics/retrieval_metrics.py +0 -0
  71. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/metrics/timeseries.py +0 -0
  72. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/metrics/wrappers.py +0 -0
  73. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/pointcloud/__init__.py +0 -0
  74. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/pointcloud/clustering.py +0 -0
  75. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/pointcloud/pointcloud.py +0 -0
  76. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/pointcloud/projectors.py +0 -0
  77. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/pointcloud/umap_parameters.py +0 -0
  78. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/py.typed +0 -0
  79. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/__init__.py +0 -0
  80. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/__init__.py +0 -0
  81. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/context.py +0 -0
  82. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/helpers.py +0 -0
  83. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
  84. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
  85. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
  86. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
  87. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  88. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/Granularity.py +0 -0
  89. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
  90. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
  91. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  92. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/input_types/__init__.py +0 -0
  93. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/interceptor.py +0 -0
  94. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Cluster.py +0 -0
  95. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  96. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Dataset.py +0 -0
  97. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DatasetInfo.py +0 -0
  98. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DatasetRole.py +0 -0
  99. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DatasetValues.py +0 -0
  100. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Dimension.py +0 -0
  101. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  102. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DimensionShape.py +0 -0
  103. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DimensionType.py +0 -0
  104. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  105. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
  106. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
  107. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
  108. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  109. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Evaluation.py +0 -0
  110. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
  111. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Event.py +0 -0
  112. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  113. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/ExportEventsMutation.py +0 -0
  114. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/ExportedFile.py +0 -0
  115. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Functionality.py +0 -0
  116. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/MimeType.py +0 -0
  117. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Model.py +0 -0
  118. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/NumericRange.py +0 -0
  119. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
  120. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/PromptResponse.py +0 -0
  121. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Retrieval.py +0 -0
  122. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
  123. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Segments.py +0 -0
  124. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/SortDir.py +0 -0
  125. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/Span.py +0 -0
  126. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/TimeSeries.py +0 -0
  127. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
  128. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/ValidationResult.py +0 -0
  129. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
  130. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/__init__.py +0 -0
  131. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/node.py +0 -0
  132. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/api/types/pagination.py +0 -0
  133. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/evaluation_handler.py +0 -0
  134. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  135. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  136. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  137. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  138. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  139. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  140. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  141. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  142. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/favicon.ico +0 -0
  143. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/index.css +0 -0
  144. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/static/modernizr.js +0 -0
  145. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/templates/__init__.py +0 -0
  146. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/server/thread_server.py +0 -0
  147. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/services.py +0 -0
  148. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/session/__init__.py +0 -0
  149. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/session/evaluation.py +0 -0
  150. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/__init__.py +0 -0
  151. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/dsl/__init__.py +0 -0
  152. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/dsl/filter.py +0 -0
  153. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/dsl/helpers.py +0 -0
  154. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/dsl/missing.py +0 -0
  155. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/dsl/query.py +0 -0
  156. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/evaluation_conventions.py +0 -0
  157. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/fixtures.py +0 -0
  158. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/langchain/__init__.py +0 -0
  159. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/langchain/instrumentor.py +0 -0
  160. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/llama_index/__init__.py +0 -0
  161. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/llama_index/callback.py +0 -0
  162. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/llama_index/debug_callback.py +0 -0
  163. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/llama_index/streaming.py +0 -0
  164. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/openai/__init__.py +0 -0
  165. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/openai/instrumentor.py +0 -0
  166. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/schemas.py +0 -0
  167. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/span_evaluations.py +0 -0
  168. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/span_json_decoder.py +0 -0
  169. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/span_json_encoder.py +0 -0
  170. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/trace_dataset.py +0 -0
  171. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/tracer.py +0 -0
  172. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/utils.py +0 -0
  173. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
  174. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
  175. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/utilities/__init__.py +0 -0
  176. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/utilities/error_handling.py +0 -0
  177. {arize_phoenix-2.0.0 → arize_phoenix-2.2.0rc0}/src/phoenix/utilities/logging.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arize-phoenix
3
- Version: 2.0.0
3
+ Version: 2.2.0rc0
4
4
  Summary: ML Observability in your notebook
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -20,6 +20,8 @@ Requires-Dist: ddsketch
20
20
  Requires-Dist: hdbscan<1.0.0,>=0.8.33
21
21
  Requires-Dist: jinja2
22
22
  Requires-Dist: numpy
23
+ Requires-Dist: opentelemetry-proto
24
+ Requires-Dist: opentelemetry-sdk
23
25
  Requires-Dist: pandas
24
26
  Requires-Dist: protobuf<5.0,>=3.20
25
27
  Requires-Dist: psutil
@@ -36,8 +38,10 @@ Requires-Dist: umap-learn
36
38
  Requires-Dist: uvicorn
37
39
  Requires-Dist: wrapt
38
40
  Provides-Extra: dev
41
+ Requires-Dist: anthropic; extra == 'dev'
39
42
  Requires-Dist: arize[autoembeddings,llm-evaluation]; extra == 'dev'
40
43
  Requires-Dist: gcsfs; extra == 'dev'
44
+ Requires-Dist: google-cloud-aiplatform>=1.3; extra == 'dev'
41
45
  Requires-Dist: hatch; extra == 'dev'
42
46
  Requires-Dist: jupyter; extra == 'dev'
43
47
  Requires-Dist: langchain>=0.0.334; extra == 'dev'
@@ -40,6 +40,8 @@ dependencies = [
40
40
  "ddsketch",
41
41
  "tqdm",
42
42
  "requests",
43
+ "opentelemetry-sdk",
44
+ "opentelemetry-proto",
43
45
  ]
44
46
  dynamic = ["version"]
45
47
 
@@ -60,7 +62,9 @@ dev = [
60
62
  "arize[AutoEmbeddings, LLM_Evaluation]",
61
63
  "llama-index>=0.9.14",
62
64
  "langchain>=0.0.334",
63
- "litellm>=1.0.3"
65
+ "litellm>=1.0.3",
66
+ "google-cloud-aiplatform>=1.3",
67
+ "anthropic",
64
68
  ]
65
69
  experimental = [
66
70
  "tenacity",
@@ -75,7 +79,7 @@ Issues = "https://github.com/Arize-ai/phoenix/issues"
75
79
  Source = "https://github.com/Arize-ai/phoenix"
76
80
 
77
81
  [tool.hatch.version]
78
- path = "src/phoenix/__init__.py"
82
+ path = "src/phoenix/version.py"
79
83
 
80
84
  [build-system]
81
85
  requires = ["hatchling"]
@@ -242,7 +246,6 @@ dependencies = [
242
246
 
243
247
  [tool.hatch.envs.proto.scripts]
244
248
  recompile = """
245
- python -m grpc_tools.protoc -I src/phoenix/proto --python_out=src/phoenix --mypy_out=src/phoenix src/phoenix/proto/trace/v1/trace.proto &&
246
249
  python -m grpc_tools.protoc -I src/phoenix/proto --python_out=src/phoenix --mypy_out=src/phoenix src/phoenix/proto/trace/v1/evaluation.proto
247
250
  """
248
251
 
@@ -288,6 +291,7 @@ module = [
288
291
  "langchain.*",
289
292
  "litellm",
290
293
  "nest_asyncio",
294
+ "opentelemetry.*",
291
295
  ]
292
296
  ignore_missing_imports = true
293
297
 
@@ -5,8 +5,7 @@ from .session.evaluation import log_evaluations
5
5
  from .session.session import NotebookEnvironment, Session, active_session, close_app, launch_app
6
6
  from .trace.fixtures import load_example_traces
7
7
  from .trace.trace_dataset import TraceDataset
8
-
9
- __version__ = "2.0.0"
8
+ from .version import __version__
10
9
 
11
10
  # module level doc-string
12
11
  __doc__ = """
@@ -25,6 +24,7 @@ Here are just a few of the things that phoenix does well:
25
24
  """
26
25
 
27
26
  __all__ = [
27
+ "__version__",
28
28
  "Dataset",
29
29
  "EmbeddingColumnNames",
30
30
  "RetrievalEmbeddingColumnNames",
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import weakref
3
3
  from collections import defaultdict
4
+ from datetime import datetime, timezone
4
5
  from queue import SimpleQueue
5
6
  from threading import RLock, Thread
6
7
  from types import MethodType
@@ -46,6 +47,7 @@ class Evals:
46
47
  self._document_evaluations_by_name: DefaultDict[
47
48
  EvaluationName, DefaultDict[SpanID, Dict[DocumentPosition, pb.Evaluation]]
48
49
  ] = defaultdict(lambda: defaultdict(dict))
50
+ self._last_updated_at: Optional[datetime] = None
49
51
  self._start_consumer()
50
52
 
51
53
  def put(self, evaluation: pb.Evaluation) -> None:
@@ -92,10 +94,16 @@ class Evals:
92
94
  )
93
95
  else:
94
96
  assert_never(subject_id_kind)
97
+ self._last_updated_at = datetime.now(timezone.utc)
98
+
99
+ @property
100
+ def last_updated_at(self) -> Optional[datetime]:
101
+ return self._last_updated_at
95
102
 
96
103
  def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]:
97
104
  with self._lock:
98
- return self._evaluations_by_span_id[span_id].get(name)
105
+ span_evaluations = self._evaluations_by_span_id.get(span_id)
106
+ return span_evaluations.get(name) if span_evaluations else None
99
107
 
100
108
  def get_span_evaluation_names(self) -> List[EvaluationName]:
101
109
  with self._lock:
@@ -108,28 +116,36 @@ class Evals:
108
116
  with self._lock:
109
117
  if span_id is None:
110
118
  return list(self._document_evaluations_by_name)
111
- return list(self._document_evaluations_by_span_id[span_id])
119
+ document_evaluations = self._document_evaluations_by_span_id.get(span_id)
120
+ return list(document_evaluations) if document_evaluations else []
112
121
 
113
122
  def get_span_evaluation_labels(self, name: EvaluationName) -> Tuple[str, ...]:
114
123
  with self._lock:
115
- return tuple(self._span_evaluation_labels[name])
124
+ labels = self._span_evaluation_labels.get(name)
125
+ return tuple(labels) if labels else ()
116
126
 
117
127
  def get_span_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
118
128
  with self._lock:
119
- return tuple(self._span_evaluations_by_name[name].keys())
129
+ span_evaluations = self._span_evaluations_by_name.get(name)
130
+ return tuple(span_evaluations.keys()) if span_evaluations else ()
120
131
 
121
132
  def get_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
122
133
  with self._lock:
123
- return list(self._evaluations_by_span_id[span_id].values())
134
+ evaluations = self._evaluations_by_span_id.get(span_id)
135
+ return list(evaluations.values()) if evaluations else []
124
136
 
125
137
  def get_document_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
126
138
  with self._lock:
127
- return tuple(self._document_evaluations_by_name[name].keys())
139
+ document_evaluations = self._document_evaluations_by_name.get(name)
140
+ return tuple(document_evaluations.keys()) if document_evaluations else ()
128
141
 
129
142
  def get_document_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
130
143
  all_evaluations: List[pb.Evaluation] = []
131
144
  with self._lock:
132
- for evaluations in self._document_evaluations_by_span_id[span_id].values():
145
+ document_evaluations = self._document_evaluations_by_span_id.get(span_id)
146
+ if not document_evaluations:
147
+ return all_evaluations
148
+ for evaluations in document_evaluations.values():
133
149
  all_evaluations.extend(evaluations.values())
134
150
  return all_evaluations
135
151
 
@@ -144,7 +160,12 @@ class Evals:
144
160
  # of one trillion, we would not want to create a result that large.
145
161
  scores: List[float] = [np.nan] * num_documents
146
162
  with self._lock:
147
- evaluations = self._document_evaluations_by_span_id[span_id][evaluation_name]
163
+ document_evaluations = self._document_evaluations_by_span_id.get(span_id)
164
+ if not document_evaluations:
165
+ return scores
166
+ evaluations = document_evaluations.get(evaluation_name)
167
+ if not evaluations:
168
+ return scores
148
169
  for document_position, evaluation in evaluations.items():
149
170
  result = evaluation.result
150
171
  if result.HasField("score") and document_position < num_documents:
@@ -13,20 +13,21 @@ from typing import (
13
13
  Iterator,
14
14
  List,
15
15
  Optional,
16
+ Set,
16
17
  SupportsFloat,
17
18
  Tuple,
18
- Union,
19
19
  cast,
20
20
  )
21
21
 
22
+ import opentelemetry.proto.trace.v1.trace_pb2 as otlp
22
23
  from ddsketch import DDSketch
23
24
  from sortedcontainers import SortedKeyList
24
25
  from typing_extensions import TypeAlias
25
26
  from wrapt import ObjectProxy
26
27
 
27
- import phoenix.trace.v1 as pb
28
28
  from phoenix.datetime_utils import right_open_time_range
29
29
  from phoenix.trace import semantic_conventions
30
+ from phoenix.trace.otel import decode
30
31
  from phoenix.trace.schemas import (
31
32
  ATTRIBUTE_PREFIX,
32
33
  COMPUTED_PREFIX,
@@ -34,9 +35,10 @@ from phoenix.trace.schemas import (
34
35
  Span,
35
36
  SpanAttributes,
36
37
  SpanID,
38
+ SpanStatusCode,
37
39
  TraceID,
38
40
  )
39
- from phoenix.trace.v1.utils import decode, encode
41
+ from phoenix.trace.semantic_conventions import RETRIEVAL_DOCUMENTS
40
42
 
41
43
  END_OF_QUEUE = None # sentinel value for queue termination
42
44
 
@@ -74,15 +76,15 @@ class ReadableSpan(ObjectProxy): # type: ignore
74
76
  are ingested, and would need to be re-computed on the fly.
75
77
  """
76
78
 
77
- __wrapped__: pb.Span
78
-
79
- def __init__(self, span: pb.Span) -> None:
79
+ def __init__(self, otlp_span: otlp.Span) -> None:
80
+ span = decode(otlp_span)
80
81
  super().__init__(span)
82
+ self._self_otlp_span = otlp_span
81
83
  self._self_computed_values: Dict[str, SupportsFloat] = {}
82
84
 
83
85
  @property
84
86
  def span(self) -> Span:
85
- span = decode(self.__wrapped__)
87
+ span = decode(self._self_otlp_span)
86
88
  span.attributes.update(cast(SpanAttributes, self._self_computed_values))
87
89
  # TODO: compute latency rank percent (which can change depending on how
88
90
  # many spans already ingested).
@@ -96,9 +98,7 @@ class ReadableSpan(ObjectProxy): # type: ignore
96
98
  return getattr(self.__wrapped__.context, suffix_key, None)
97
99
  if key.startswith(ATTRIBUTE_PREFIX):
98
100
  suffix_key = key[len(ATTRIBUTE_PREFIX) :]
99
- if suffix_key not in self.__wrapped__.attributes:
100
- return None
101
- return self.__wrapped__.attributes[suffix_key]
101
+ return self.__wrapped__.attributes.get(suffix_key)
102
102
  return getattr(self.__wrapped__, key, None)
103
103
 
104
104
  def __setitem__(self, key: str, value: Any) -> None:
@@ -113,21 +113,21 @@ ChildSpanID: TypeAlias = SpanID
113
113
 
114
114
  class Traces:
115
115
  def __init__(self) -> None:
116
- self._queue: "SimpleQueue[Optional[pb.Span]]" = SimpleQueue()
116
+ self._queue: "SimpleQueue[Optional[otlp.Span]]" = SimpleQueue()
117
117
  # Putting `None` as the sentinel value for queue termination.
118
118
  weakref.finalize(self, self._queue.put, END_OF_QUEUE)
119
119
  self._lock = RLock()
120
120
  self._spans: Dict[SpanID, ReadableSpan] = {}
121
121
  self._parent_span_ids: Dict[SpanID, ParentSpanID] = {}
122
- self._traces: Dict[TraceID, List[SpanID]] = defaultdict(list)
123
- self._child_span_ids: DefaultDict[SpanID, List[ChildSpanID]] = defaultdict(list)
124
- self._orphan_spans: DefaultDict[ParentSpanID, List[pb.Span]] = defaultdict(list)
122
+ self._traces: DefaultDict[TraceID, List[SpanID]] = defaultdict(list)
123
+ self._child_span_ids: DefaultDict[SpanID, Set[ChildSpanID]] = defaultdict(set)
124
+ self._orphan_spans: DefaultDict[ParentSpanID, List[otlp.Span]] = defaultdict(list)
125
125
  self._num_documents: DefaultDict[SpanID, int] = defaultdict(int)
126
126
  self._start_time_sorted_span_ids: SortedKeyList[SpanID] = SortedKeyList(
127
- key=lambda span_id: self._spans[span_id].start_time.ToDatetime(timezone.utc),
127
+ key=lambda span_id: self._spans[span_id].start_time,
128
128
  )
129
129
  self._start_time_sorted_root_span_ids: SortedKeyList[SpanID] = SortedKeyList(
130
- key=lambda span_id: self._spans[span_id].start_time.ToDatetime(timezone.utc),
130
+ key=lambda span_id: self._spans[span_id].start_time,
131
131
  )
132
132
  self._latency_sorted_root_span_ids: SortedKeyList[SpanID] = SortedKeyList(
133
133
  key=lambda span_id: self._spans[span_id][ComputedAttributes.LATENCY_MS.value],
@@ -136,15 +136,18 @@ class Traces:
136
136
  self._min_start_time: Optional[datetime] = None
137
137
  self._max_start_time: Optional[datetime] = None
138
138
  self._token_count_total: int = 0
139
+ self._last_updated_at: Optional[datetime] = None
139
140
  self._start_consumer()
140
141
 
141
- def put(self, span: Optional[Union[Span, pb.Span]] = None) -> None:
142
- self._queue.put(encode(span) if isinstance(span, Span) else span)
142
+ def put(self, span: Optional[otlp.Span] = None) -> None:
143
+ self._queue.put(span)
143
144
 
144
145
  def get_trace(self, trace_id: TraceID) -> Iterator[Span]:
145
146
  with self._lock:
146
147
  # make a copy because source data can mutate during iteration
147
- span_ids = tuple(self._traces[trace_id])
148
+ if not (trace := self._traces.get(trace_id)):
149
+ return
150
+ span_ids = tuple(trace)
148
151
  for span_id in span_ids:
149
152
  if span := self[span_id]:
150
153
  yield span
@@ -194,7 +197,7 @@ class Traces:
194
197
 
195
198
  def get_num_documents(self, span_id: SpanID) -> int:
196
199
  with self._lock:
197
- return self._num_documents[span_id]
200
+ return self._num_documents.get(span_id) or 0
198
201
 
199
202
  def latency_rank_percent(self, latency_ms: float) -> Optional[float]:
200
203
  """
@@ -221,11 +224,17 @@ class Traces:
221
224
  def get_descendant_span_ids(self, span_id: SpanID) -> Iterator[SpanID]:
222
225
  with self._lock:
223
226
  # make a copy because source data can mutate during iteration
224
- span_ids = tuple(self._child_span_ids[span_id])
227
+ if not (child_span_ids := self._child_span_ids.get(span_id)):
228
+ return
229
+ span_ids = tuple(child_span_ids)
225
230
  for child_span_id in span_ids:
226
231
  yield child_span_id
227
232
  yield from self.get_descendant_span_ids(child_span_id)
228
233
 
234
+ @property
235
+ def last_updated_at(self) -> Optional[datetime]:
236
+ return self._last_updated_at
237
+
229
238
  @property
230
239
  def span_count(self) -> int:
231
240
  """Total number of spans (excluding orphan spans if any)"""
@@ -259,24 +268,24 @@ class Traces:
259
268
  with self._lock:
260
269
  self._process_span(item)
261
270
 
262
- def _process_span(self, span: pb.Span) -> None:
263
- span_id = SpanID(span.context.span_id)
271
+ def _process_span(self, span: otlp.Span) -> None:
272
+ new_span = ReadableSpan(span)
273
+ span_id = new_span.context.span_id
264
274
  existing_span = self._spans.get(span_id)
265
- if existing_span and existing_span.HasField("end_time"):
275
+ if existing_span and existing_span.end_time:
266
276
  # Reject updates if span has ended.
267
277
  return
268
- is_root_span = not span.HasField("parent_span_id")
278
+ is_root_span = not new_span.parent_id
269
279
  if not is_root_span:
270
- parent_span_id = SpanID(span.parent_span_id.value)
280
+ parent_span_id = new_span.parent_id
271
281
  if parent_span_id not in self._spans:
272
282
  # Span can't be processed before its parent.
273
283
  self._orphan_spans[parent_span_id].append(span)
274
284
  return
275
- self._child_span_ids[parent_span_id].append(span_id)
285
+ self._child_span_ids[parent_span_id].add(span_id)
276
286
  self._parent_span_ids[span_id] = parent_span_id
277
- new_span = ReadableSpan(span)
278
- start_time = span.start_time.ToDatetime(timezone.utc)
279
- end_time = span.end_time.ToDatetime(timezone.utc) if span.HasField("end_time") else None
287
+ start_time = new_span.start_time
288
+ end_time = new_span.end_time
280
289
  if end_time:
281
290
  new_span[ComputedAttributes.LATENCY_MS.value] = latency = (
282
291
  end_time - start_time
@@ -287,7 +296,7 @@ class Traces:
287
296
  if is_root_span and end_time:
288
297
  self._latency_sorted_root_span_ids.add(span_id)
289
298
  if not existing_span:
290
- trace_id = TraceID(span.context.trace_id)
299
+ trace_id = new_span.context.trace_id
291
300
  self._traces[trace_id].append(span_id)
292
301
  if is_root_span:
293
302
  self._start_time_sorted_root_span_ids.add(span_id)
@@ -303,7 +312,7 @@ class Traces:
303
312
  else max(self._max_start_time, start_time)
304
313
  )
305
314
  new_span[ComputedAttributes.ERROR_COUNT.value] = int(
306
- span.status.code is pb.Span.Status.Code.ERROR
315
+ new_span.status_code is SpanStatusCode.ERROR
307
316
  )
308
317
  # Update cumulative values for span's ancestors.
309
318
  for attribute_name, cumulative_attribute_name in (
@@ -336,14 +345,16 @@ class Traces:
336
345
  self._token_count_total -= existing_span[LLM_TOKEN_COUNT_TOTAL] or 0
337
346
  self._token_count_total += new_span[LLM_TOKEN_COUNT_TOTAL] or 0
338
347
  # Update number of documents
339
- num_documents_update = len(span.retrieval.documents)
348
+ num_documents_update = len(new_span.attributes.get(RETRIEVAL_DOCUMENTS) or ())
340
349
  if existing_span:
341
- num_documents_update -= len(existing_span.retrieval.documents)
350
+ num_documents_update -= len(existing_span.attributes.get(RETRIEVAL_DOCUMENTS) or ())
342
351
  if num_documents_update:
343
352
  self._num_documents[span_id] += num_documents_update
344
353
  # Process previously orphaned spans, if any.
345
354
  for orphan_span in self._orphan_spans.pop(span_id, ()):
346
355
  self._process_span(orphan_span)
356
+ # Update last updated timestamp
357
+ self._last_updated_at = datetime.now(timezone.utc)
347
358
 
348
359
  def _add_value_to_span_ancestors(
349
360
  self,
@@ -1,4 +1,4 @@
1
- from .evaluators import LLMEvaluator
1
+ from .evaluators import InvalidEvalCriteriaError, LLMEvaluator
2
2
  from .functions import llm_classify, llm_generate, run_relevance_eval
3
3
  from .models import BedrockModel, LiteLLMModel, OpenAIModel, VertexAIModel
4
4
  from .retrievals import compute_precisions_at_k
@@ -16,11 +16,13 @@ from .templates import (
16
16
  TOXICITY_PROMPT_RAILS_MAP,
17
17
  TOXICITY_PROMPT_TEMPLATE,
18
18
  ClassificationTemplate,
19
+ EvalCriteria,
19
20
  PromptTemplate,
20
21
  )
21
22
  from .utils import NOT_PARSABLE, download_benchmark_dataset
22
23
 
23
24
  __all__ = [
25
+ "EvalCriteria",
24
26
  "compute_precisions_at_k",
25
27
  "download_benchmark_dataset",
26
28
  "llm_classify",
@@ -46,4 +48,5 @@ __all__ = [
46
48
  "QA_PROMPT_TEMPLATE",
47
49
  "NOT_PARSABLE",
48
50
  "run_relevance_eval",
51
+ "InvalidEvalCriteriaError",
49
52
  ]
@@ -1,15 +1,26 @@
1
1
  from typing import List, Mapping, Optional, Tuple
2
2
 
3
+ from phoenix.exceptions import PhoenixException
3
4
  from phoenix.experimental.evals.models import set_verbosity
4
- from phoenix.experimental.evals.utils import parse_openai_function_call, snap_to_rail
5
+ from phoenix.experimental.evals.templates.default_templates import (
6
+ EvalCriteria,
7
+ )
8
+ from phoenix.experimental.evals.utils import (
9
+ NOT_PARSABLE,
10
+ openai_function_call_kwargs,
11
+ parse_openai_function_call,
12
+ snap_to_rail,
13
+ )
5
14
  from phoenix.utilities.logging import printif
6
15
 
7
- from .models import BaseEvalModel
16
+ from .models import BaseEvalModel, OpenAIModel
8
17
  from .templates import ClassificationTemplate, PromptOptions, PromptTemplate
9
18
 
10
19
  Record = Mapping[str, str]
11
20
 
12
- NOT_PARSABLE = "NOT_PARSABLE"
21
+
22
+ class InvalidEvalCriteriaError(PhoenixException):
23
+ pass
13
24
 
14
25
 
15
26
  class LLMEvaluator:
@@ -35,6 +46,7 @@ class LLMEvaluator:
35
46
  self,
36
47
  record: Record,
37
48
  provide_explanation: bool = False,
49
+ use_function_calling_if_available: bool = True,
38
50
  verbose: bool = False,
39
51
  ) -> Tuple[str, Optional[str]]:
40
52
  """
@@ -46,27 +58,53 @@ class LLMEvaluator:
46
58
  provide_explanation (bool, optional): Whether to provide an
47
59
  explanation.
48
60
 
61
+ use_function_calling_if_available (bool, optional): If True, use
62
+ function calling (if available) as a means to constrain the LLM
63
+ outputs. With function calling, the LLM is instructed to provide its
64
+ response as a structured JSON object, which is easier to parse.
65
+
66
+ use_function_calling_if_available (bool, optional): If True, use
67
+ function calling (if available) as a means to constrain the LLM
68
+ outputs. With function calling, the LLM is instructed to provide its
69
+ response as a structured JSON object, which is easier to parse.
70
+
49
71
  verbose (bool, optional): Whether to print verbose output.
50
72
 
51
73
  Returns:
52
74
  Tuple[str, Optional[str]]: The label and explanation (if provided).
53
75
  """
76
+ use_openai_function_call = (
77
+ use_function_calling_if_available
78
+ and isinstance(self._model, OpenAIModel)
79
+ and self._model.supports_function_calling
80
+ )
54
81
  prompt = self._template.format(
55
82
  record, options=PromptOptions(provide_explanation=provide_explanation)
56
83
  )
57
84
  with set_verbosity(self._model, verbose) as verbose_model:
58
- unparsed_output = verbose_model(prompt)
85
+ unparsed_output = verbose_model(
86
+ prompt,
87
+ **(
88
+ openai_function_call_kwargs(self._template.rails, provide_explanation)
89
+ if use_openai_function_call
90
+ else {}
91
+ ),
92
+ )
59
93
  label, explanation = _extract_label_and_explanation(
60
94
  unparsed_output=unparsed_output,
61
95
  template=self._template,
62
- use_openai_function_call=False,
63
96
  provide_explanation=provide_explanation,
97
+ use_openai_function_call=use_openai_function_call,
64
98
  verbose=verbose,
65
99
  )
66
100
  return label, explanation
67
101
 
68
102
  async def aevaluate(
69
- self, record: Record, provide_explanation: bool = False, verbose: bool = False
103
+ self,
104
+ record: Record,
105
+ provide_explanation: bool = False,
106
+ use_function_calling_if_available: bool = True,
107
+ verbose: bool = False,
70
108
  ) -> Tuple[str, Optional[str]]:
71
109
  """
72
110
  Evaluates a single record.
@@ -77,25 +115,64 @@ class LLMEvaluator:
77
115
  provide_explanation (bool, optional): Whether to provide an
78
116
  explanation.
79
117
 
118
+ use_function_calling_if_available (bool, optional): If True, use
119
+ function calling (if available) as a means to constrain the LLM
120
+ outputs. With function calling, the LLM is instructed to provide its
121
+ response as a structured JSON object, which is easier to parse.
122
+
80
123
  verbose (bool, optional): Whether to print verbose output.
81
124
 
82
125
  Returns:
83
126
  Tuple[str, Optional[str]]: The label and explanation (if provided).
84
127
  """
128
+ use_openai_function_call = (
129
+ use_function_calling_if_available
130
+ and isinstance(self._model, OpenAIModel)
131
+ and self._model.supports_function_calling
132
+ )
85
133
  prompt = self._template.format(
86
134
  record, options=PromptOptions(provide_explanation=provide_explanation)
87
135
  )
88
136
  with set_verbosity(self._model, verbose) as verbose_model:
89
- unparsed_output = await verbose_model._async_generate(prompt)
137
+ unparsed_output = await verbose_model._async_generate(
138
+ prompt,
139
+ **(
140
+ openai_function_call_kwargs(self._template.rails, provide_explanation)
141
+ if use_openai_function_call
142
+ else {}
143
+ ),
144
+ )
90
145
  label, explanation = _extract_label_and_explanation(
91
146
  unparsed_output=unparsed_output,
92
147
  template=self._template,
93
- use_openai_function_call=False,
94
148
  provide_explanation=provide_explanation,
149
+ use_openai_function_call=use_openai_function_call,
95
150
  verbose=verbose,
96
151
  )
97
152
  return label, explanation
98
153
 
154
+ @classmethod
155
+ def from_criteria(
156
+ cls,
157
+ criteria: EvalCriteria,
158
+ model: BaseEvalModel,
159
+ ) -> "LLMEvaluator":
160
+ """
161
+ Instantiates an LLMEvaluator from an eval criteria.
162
+
163
+ Args:
164
+ criteria (EvalCriteria): The eval criteria.
165
+
166
+ model (BaseEvalModel): The model to use for evaluation.
167
+
168
+ Returns:
169
+ LLMEvaluator: The instantiate evaluator.
170
+ """
171
+ return cls(
172
+ model=model,
173
+ template=criteria.value,
174
+ )
175
+
99
176
 
100
177
  class MapReducer:
101
178
  """