fiddler-evals 0.1.1.dev14__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {fiddler_evals-0.1.1.dev14/fiddler_evals.egg-info → fiddler_evals-0.2.0}/PKG-INFO +47 -24
  2. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/PUBLIC.md +46 -22
  3. fiddler_evals-0.2.0/fiddler_evals/VERSION +1 -0
  4. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/__init__.py +0 -2
  5. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/connection.py +58 -95
  6. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/application.py +82 -77
  7. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/dataset.py +347 -331
  8. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/experiment.py +291 -284
  9. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/project.py +67 -60
  10. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/__init__.py +0 -2
  11. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/answer_relevance.py +29 -21
  12. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/base.py +25 -0
  13. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/coherence.py +15 -9
  14. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/conciseness.py +6 -3
  15. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/regex.py +4 -4
  16. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/test_answer_relevance.py +64 -15
  17. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/test_coherence.py +58 -76
  18. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/test_conciseness.py +61 -15
  19. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/exceptions.py +44 -62
  20. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/runner/evaluation.py +64 -62
  21. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/runner/experiment_runner.py +3 -5
  22. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/runner/tests/test_evaluate.py +156 -34
  23. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/tests/constants.py +3 -0
  24. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0/fiddler_evals.egg-info}/PKG-INFO +47 -24
  25. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals.egg-info/SOURCES.txt +0 -2
  26. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals.egg-info/requires.txt +0 -1
  27. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/pyproject.toml +1 -2
  28. fiddler_evals-0.1.1.dev14/fiddler_evals/VERSION +0 -1
  29. fiddler_evals-0.1.1.dev14/fiddler_evals/evaluators/tests/test_toxicity.py +0 -201
  30. fiddler_evals-0.1.1.dev14/fiddler_evals/evaluators/toxicity.py +0 -101
  31. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/MANIFEST.in +0 -0
  32. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/README.md +0 -0
  33. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/configs.py +0 -0
  34. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/conftest.py +0 -0
  35. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/constants.py +0 -0
  36. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/decorators.py +0 -0
  37. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/__init__.py +0 -0
  38. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/base.py +0 -0
  39. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/tests/__init__.py +0 -0
  40. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/tests/test_application.py +0 -0
  41. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/tests/test_dataset.py +0 -0
  42. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/tests/test_dataset_items.py +0 -0
  43. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/tests/test_experiment.py +0 -0
  44. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/tests/test_experiment_items.py +0 -0
  45. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/tests/test_experiment_results.py +0 -0
  46. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/entities/tests/test_project.py +0 -0
  47. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/eval_fn.py +0 -0
  48. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/ftl_prompt_safety.py +0 -0
  49. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/ftl_response_faithfulness.py +0 -0
  50. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/sentiment.py +0 -0
  51. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/__init__.py +0 -0
  52. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/test_eval_fn.py +0 -0
  53. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/test_ftl_prompt_safety.py +0 -0
  54. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/test_ftl_response_faithfulness.py +0 -0
  55. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/test_regex.py +0 -0
  56. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/test_sentiment.py +0 -0
  57. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/tests/test_topic_classification.py +0 -0
  58. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/evaluators/topic.py +0 -0
  59. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/libs/__init__.py +0 -0
  60. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/libs/http_client.py +0 -0
  61. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/libs/json_encoder.py +0 -0
  62. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/libs/semver.py +0 -0
  63. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/libs/tests/__init__.py +0 -0
  64. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/libs/tests/test_json_encoder.py +0 -0
  65. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/libs/tests/test_request_client.py +0 -0
  66. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/__init__.py +0 -0
  67. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/application.py +0 -0
  68. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/base.py +0 -0
  69. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/compact.py +0 -0
  70. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/dataset.py +0 -0
  71. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/error.py +0 -0
  72. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/evaluator.py +0 -0
  73. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/experiment.py +0 -0
  74. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/filter_query.py +0 -0
  75. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/project.py +0 -0
  76. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/response.py +0 -0
  77. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/score.py +0 -0
  78. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/pydantic_models/server_info.py +0 -0
  79. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/runner/__init__.py +0 -0
  80. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/runner/executor.py +0 -0
  81. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/runner/experiment_result_publisher.py +0 -0
  82. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/runner/tests/__init__.py +0 -0
  83. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/runner/tests/test_experiment_result_publisher.py +0 -0
  84. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/tests/__init__.py +0 -0
  85. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/tests/test_connection.py +0 -0
  86. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/tests/test_decorators.py +0 -0
  87. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/utils/__init__.py +0 -0
  88. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/utils/environment.py +0 -0
  89. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/utils/pd.py +0 -0
  90. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/utils/tests/__init__.py +0 -0
  91. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/utils/tests/test_environment.py +0 -0
  92. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/utils/tqdm.py +0 -0
  93. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/version.py +0 -0
  94. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals.egg-info/dependency_links.txt +0 -0
  95. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals.egg-info/top_level.txt +0 -0
  96. {fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fiddler-evals
3
- Version: 0.1.1.dev14
3
+ Version: 0.2.0
4
4
  Summary: Python SDK for evaluating LLM Applications
5
5
  Author-email: Fiddler AI <support@fiddler.ai>
6
6
  Maintainer-email: Fiddler AI <support@fiddler.ai>
@@ -15,7 +15,6 @@ Requires-Dist: requests<3
15
15
  Requires-Dist: pydantic>=2.0.0
16
16
  Requires-Dist: tqdm
17
17
  Requires-Dist: typing-extensions<5,>=4.6.0
18
- Requires-Dist: pandas>=1.2.5
19
18
  Requires-Dist: python-decouple
20
19
  Provides-Extra: pandas
21
20
  Requires-Dist: pandas>=1.2.5; extra == "pandas"
@@ -60,7 +59,7 @@ pip install --upgrade --pre fiddler-evals
60
59
  from fiddler_evals import init
61
60
 
62
61
  # Initialize connection
63
- init(url='https://your-org.fiddler.ai', token='your-api-token')
62
+ init(url='https://your-instance.fiddler.ai', token='your-api-token')
64
63
  ```
65
64
 
66
65
  ### 2. Create Project Structure
@@ -101,19 +100,32 @@ dataset.insert(test_cases)
101
100
 
102
101
  ### 4. Use Built-in Evaluators
103
102
 
103
+ **Configure LLM Gateway provider:**
104
+
105
+ Add an LLM provider via UI (**Settings > LLM Gateway**) to use Fiddler's pre-build LLM-as-a-Judge evaluators.
106
+ LLM-as-a-Judge evaluators require a `model` parameter in the format `{provider}/{model}` (e.g., `openai/gpt-4o`) and an optional `credential` parameter for LLM Gateway authentication.
107
+
104
108
  ```python
105
109
  from fiddler_evals.evaluators import (
106
110
  AnswerRelevance, Coherence, Conciseness,
107
- Toxicity, Sentiment, RegexSearch
111
+ Sentiment, RegexSearch
108
112
  )
109
113
 
110
- # Test individual evaluators
111
- relevance_evaluator = AnswerRelevance()
114
+ # Test LLM-as-a-Judge evaluators (require model parameter)
115
+ relevance_evaluator = AnswerRelevance(
116
+ model="openai/gpt-4o", # Required: LLM Gateway model in {provider}/{model} format
117
+ credential="my-openai-cred" # Optional: LLM Gateway credential name
118
+ )
112
119
  score = relevance_evaluator.score(
113
120
  prompt="What is the capital of France?",
114
121
  response="Paris is the capital of France."
115
122
  )
116
123
  print(f"Score: {score.value} - {score.reasoning}")
124
+
125
+ # Test other evaluators (no model parameter needed)
126
+ sentiment_evaluator = Sentiment()
127
+ scores = sentiment_evaluator.score(text="This is a helpful response.")
128
+ print("Sentiments:", [f'{score.name}: {score.value}' for score in scores])
117
129
  ```
118
130
 
119
131
  ### 5. Create Custom Evaluators
@@ -199,8 +211,8 @@ def contains_number_evaluator(output: str) -> float:
199
211
 
200
212
  # Use functions directly in evaluators list
201
213
  evaluators = [
202
- AnswerRelevance(),
203
- Conciseness(),
214
+ AnswerRelevance(model="openai/gpt-4o", credential="my-openai-cred"),
215
+ Conciseness(model="openai/gpt-4o", credential="my-openai-cred"),
204
216
  word_count_evaluator, # Function evaluator
205
217
  contains_number_evaluator, # Function evaluator
206
218
  ]
@@ -231,9 +243,19 @@ def my_llm_task(inputs: dict, extras: dict, metadata: dict) -> dict:
231
243
 
232
244
  # Set up evaluators with different configurations
233
245
  evaluators = [
234
- # Primary evaluation metrics
235
- AnswerRelevance(score_name_prefix="primary"),
236
- Conciseness(score_name_prefix="primary"),
246
+ # LLM-as-a-Judge evaluators (require model parameter)
247
+ AnswerRelevance(
248
+ model="openai/gpt-4o",
249
+ credential="my-openai-cred",
250
+ score_name_prefix="primary"
251
+ ),
252
+ Conciseness(
253
+ model="openai/gpt-4o",
254
+ credential="my-openai-cred",
255
+ score_name_prefix="primary"
256
+ ),
257
+
258
+ # Other evaluators
237
259
  Sentiment(score_name_prefix="primary"),
238
260
 
239
261
  # Custom evaluators with specific mappings
@@ -245,13 +267,13 @@ evaluators = [
245
267
  # Multiple instances of same evaluator for different fields
246
268
  RegexSearch(
247
269
  pattern=r"\d+",
248
- score_name_prefix="validation",
270
+ score_name_prefix="question",
249
271
  score_name="has_number",
250
272
  score_fn_kwargs_mapping={"output": "question"}
251
273
  ),
252
274
  RegexSearch(
253
275
  pattern=r"\d+",
254
- score_name_prefix="validation",
276
+ score_name_prefix="answer",
255
277
  score_name="has_number",
256
278
  score_fn_kwargs_mapping={"output": "answer"}
257
279
  ),
@@ -277,21 +299,22 @@ print(f"Generated {sum(len(result.scores) for result in experiment_result.result
277
299
 
278
300
  # Results in organized score names:
279
301
  # "primary_answer_relevance", "primary_conciseness", "primary_sentiment",
280
- # "quality_politeness", "validation_has_number" (for question), "validation_has_number" (for answer)
302
+ # "quality_politeness", "question_has_number", "answer_has_number"
281
303
  ```
282
304
 
283
305
  ## Built-in Evaluators
284
306
 
285
- | Evaluator | Purpose | Key Parameters |
286
- |-----------|---------|----------------|
287
- | `AnswerRelevance` | Checks if response addresses the question | `prompt`, `response` |
288
- | `Coherence` | Evaluates logical flow and consistency | `response`, `prompt` |
289
- | `Conciseness` | Measures response brevity and clarity | `response` |
290
- | `Toxicity` | Detects harmful or toxic content | `text` |
291
- | `Sentiment` | Analyzes emotional tone | `text` |
292
- | `RegexSearch` | Pattern matching for specific formats | `output`, `pattern` |
293
- | `FTLPromptSafety` | Compute safety scores for prompts | `text` |
294
- | `FTLResponseFaithfulness` | Evaluate faithfulness of LLM responses | `response`, `context` |
307
+ | Evaluator | Purpose | Constructor Parameters | Score Parameters |
308
+ |-----------|---------|------------------------|------------------|
309
+ | `AnswerRelevance` | Checks if response addresses the question | `model` (required), `credential` (required) | `prompt`, `response` |
310
+ | `Coherence` | Evaluates logical flow and consistency | `model` (required), `credential` (required) | `response`, `prompt` (optional) |
311
+ | `Conciseness` | Measures response brevity and clarity | `model` (required), `credential` (required) | `response` |
312
+ | `Sentiment` | Analyzes emotional tone | - | `text` |
313
+ | `RegexSearch` | Pattern matching for specific formats | `pattern` (required) | `output` |
314
+ | `FTLPromptSafety` | Compute safety scores for prompts | - | `text` |
315
+ | `FTLResponseFaithfulness` | Evaluate faithfulness of LLM responses | - | `response`, `context` |
316
+
317
+ **Note:** Evaluators marked with `model` and `credential` parameters are LLM-as-a-Judge evaluators that require an LLM Gateway model. The `model` parameter should be in `{provider}/{model}` format (e.g., `openai/gpt-4o`). The `credential` parameter is the name of the LLM Gateway credential for authentication.
295
318
 
296
319
  ## Data Import Options
297
320
 
@@ -38,7 +38,7 @@ pip install --upgrade --pre fiddler-evals
38
38
  from fiddler_evals import init
39
39
 
40
40
  # Initialize connection
41
- init(url='https://your-org.fiddler.ai', token='your-api-token')
41
+ init(url='https://your-instance.fiddler.ai', token='your-api-token')
42
42
  ```
43
43
 
44
44
  ### 2. Create Project Structure
@@ -79,19 +79,32 @@ dataset.insert(test_cases)
79
79
 
80
80
  ### 4. Use Built-in Evaluators
81
81
 
82
+ **Configure LLM Gateway provider:**
83
+
84
+ Add an LLM provider via UI (**Settings > LLM Gateway**) to use Fiddler's pre-build LLM-as-a-Judge evaluators.
85
+ LLM-as-a-Judge evaluators require a `model` parameter in the format `{provider}/{model}` (e.g., `openai/gpt-4o`) and an optional `credential` parameter for LLM Gateway authentication.
86
+
82
87
  ```python
83
88
  from fiddler_evals.evaluators import (
84
89
  AnswerRelevance, Coherence, Conciseness,
85
- Toxicity, Sentiment, RegexSearch
90
+ Sentiment, RegexSearch
86
91
  )
87
92
 
88
- # Test individual evaluators
89
- relevance_evaluator = AnswerRelevance()
93
+ # Test LLM-as-a-Judge evaluators (require model parameter)
94
+ relevance_evaluator = AnswerRelevance(
95
+ model="openai/gpt-4o", # Required: LLM Gateway model in {provider}/{model} format
96
+ credential="my-openai-cred" # Optional: LLM Gateway credential name
97
+ )
90
98
  score = relevance_evaluator.score(
91
99
  prompt="What is the capital of France?",
92
100
  response="Paris is the capital of France."
93
101
  )
94
102
  print(f"Score: {score.value} - {score.reasoning}")
103
+
104
+ # Test other evaluators (no model parameter needed)
105
+ sentiment_evaluator = Sentiment()
106
+ scores = sentiment_evaluator.score(text="This is a helpful response.")
107
+ print("Sentiments:", [f'{score.name}: {score.value}' for score in scores])
95
108
  ```
96
109
 
97
110
  ### 5. Create Custom Evaluators
@@ -177,8 +190,8 @@ def contains_number_evaluator(output: str) -> float:
177
190
 
178
191
  # Use functions directly in evaluators list
179
192
  evaluators = [
180
- AnswerRelevance(),
181
- Conciseness(),
193
+ AnswerRelevance(model="openai/gpt-4o", credential="my-openai-cred"),
194
+ Conciseness(model="openai/gpt-4o", credential="my-openai-cred"),
182
195
  word_count_evaluator, # Function evaluator
183
196
  contains_number_evaluator, # Function evaluator
184
197
  ]
@@ -209,9 +222,19 @@ def my_llm_task(inputs: dict, extras: dict, metadata: dict) -> dict:
209
222
 
210
223
  # Set up evaluators with different configurations
211
224
  evaluators = [
212
- # Primary evaluation metrics
213
- AnswerRelevance(score_name_prefix="primary"),
214
- Conciseness(score_name_prefix="primary"),
225
+ # LLM-as-a-Judge evaluators (require model parameter)
226
+ AnswerRelevance(
227
+ model="openai/gpt-4o",
228
+ credential="my-openai-cred",
229
+ score_name_prefix="primary"
230
+ ),
231
+ Conciseness(
232
+ model="openai/gpt-4o",
233
+ credential="my-openai-cred",
234
+ score_name_prefix="primary"
235
+ ),
236
+
237
+ # Other evaluators
215
238
  Sentiment(score_name_prefix="primary"),
216
239
 
217
240
  # Custom evaluators with specific mappings
@@ -223,13 +246,13 @@ evaluators = [
223
246
  # Multiple instances of same evaluator for different fields
224
247
  RegexSearch(
225
248
  pattern=r"\d+",
226
- score_name_prefix="validation",
249
+ score_name_prefix="question",
227
250
  score_name="has_number",
228
251
  score_fn_kwargs_mapping={"output": "question"}
229
252
  ),
230
253
  RegexSearch(
231
254
  pattern=r"\d+",
232
- score_name_prefix="validation",
255
+ score_name_prefix="answer",
233
256
  score_name="has_number",
234
257
  score_fn_kwargs_mapping={"output": "answer"}
235
258
  ),
@@ -255,21 +278,22 @@ print(f"Generated {sum(len(result.scores) for result in experiment_result.result
255
278
 
256
279
  # Results in organized score names:
257
280
  # "primary_answer_relevance", "primary_conciseness", "primary_sentiment",
258
- # "quality_politeness", "validation_has_number" (for question), "validation_has_number" (for answer)
281
+ # "quality_politeness", "question_has_number", "answer_has_number"
259
282
  ```
260
283
 
261
284
  ## Built-in Evaluators
262
285
 
263
- | Evaluator | Purpose | Key Parameters |
264
- |-----------|---------|----------------|
265
- | `AnswerRelevance` | Checks if response addresses the question | `prompt`, `response` |
266
- | `Coherence` | Evaluates logical flow and consistency | `response`, `prompt` |
267
- | `Conciseness` | Measures response brevity and clarity | `response` |
268
- | `Toxicity` | Detects harmful or toxic content | `text` |
269
- | `Sentiment` | Analyzes emotional tone | `text` |
270
- | `RegexSearch` | Pattern matching for specific formats | `output`, `pattern` |
271
- | `FTLPromptSafety` | Compute safety scores for prompts | `text` |
272
- | `FTLResponseFaithfulness` | Evaluate faithfulness of LLM responses | `response`, `context` |
286
+ | Evaluator | Purpose | Constructor Parameters | Score Parameters |
287
+ |-----------|---------|------------------------|------------------|
288
+ | `AnswerRelevance` | Checks if response addresses the question | `model` (required), `credential` (required) | `prompt`, `response` |
289
+ | `Coherence` | Evaluates logical flow and consistency | `model` (required), `credential` (required) | `response`, `prompt` (optional) |
290
+ | `Conciseness` | Measures response brevity and clarity | `model` (required), `credential` (required) | `response` |
291
+ | `Sentiment` | Analyzes emotional tone | - | `text` |
292
+ | `RegexSearch` | Pattern matching for specific formats | `pattern` (required) | `output` |
293
+ | `FTLPromptSafety` | Compute safety scores for prompts | - | `text` |
294
+ | `FTLResponseFaithfulness` | Evaluate faithfulness of LLM responses | - | `response`, `context` |
295
+
296
+ **Note:** Evaluators marked with `model` and `credential` parameters are LLM-as-a-Judge evaluators that require an LLM Gateway model. The `model` parameter should be in `{provider}/{model}` format (e.g., `openai/gpt-4o`). The `credential` parameter is the name of the LLM Gateway credential for authentication.
273
297
 
274
298
  ## Data Import Options
275
299
 
@@ -0,0 +1 @@
1
+ 0.2.0
@@ -20,7 +20,6 @@ from fiddler_evals.evaluators import (
20
20
  RegexSearch,
21
21
  Sentiment,
22
22
  TopicClassification,
23
- Toxicity,
24
23
  )
25
24
  from fiddler_evals.evaluators.base import Evaluator
26
25
  from fiddler_evals.evaluators.eval_fn import EvalFn
@@ -55,7 +54,6 @@ __all__ = [
55
54
  "AnswerRelevance",
56
55
  "Coherence",
57
56
  "Conciseness",
58
- "Toxicity",
59
57
  "Sentiment",
60
58
  "RegexSearch",
61
59
  "RegexMatch",
@@ -29,63 +29,30 @@ class Connection:
29
29
  managing connection parameters, authentication tokens, and ensuring proper
30
30
  communication protocols are established.
31
31
 
32
- Attributes
33
- ----------
34
- url : str
35
- Base URL of the Fiddler platform instance
36
- token : str
37
- Authentication token for API access
38
- proxies : dict, optional
39
- Optional proxy configuration for HTTP requests
40
- timeout : float or tuple, optional
41
- HTTP request timeout settings
42
- verify : bool
43
- Whether to verify SSL/TLS certificates
44
- request_headers : dict
45
- HTTP headers including authentication and client info
46
- client : RequestClient
47
- Cached HTTP client instance for making requests
48
- server_info : ServerInfo
49
- Cached server information and metadata
50
- server_version : VersionInfo
51
- Version of the connected Fiddler server
52
- organization_name : str
53
- Name of the connected organization
54
- organization_id : UUID
55
- UUID of the connected organization
56
-
57
- Examples
58
- --------
59
- Creating a basic connection:
60
-
61
- .. code-block:: python
62
-
63
- connection = Connection(
64
- url="https://your-fiddler-instance.com",
65
- token="your-auth-token"
66
- )
67
-
68
- Creating a connection with custom timeout and proxy:
69
-
70
- .. code-block:: python
71
-
72
- connection = Connection(
73
- url="https://your-fiddler-instance.com",
74
- token="your-auth-token",
75
- timeout=(5.0, 30.0), # (connect_timeout, read_timeout)
76
- proxies={"https": "https://proxy.company.com:8080"}
77
- )
78
-
79
- Creating a connection without SSL verification:
80
-
81
- .. code-block:: python
82
-
83
- connection = Connection(
84
- url="https://your-fiddler-instance.com",
85
- token="your-auth-token",
86
- verify=False, # Not recommended for production
87
- validate=False # Skip version compatibility check
88
- )
32
+ Example:
33
+ .. code-block:: python
34
+
35
+ # Creating a basic connection
36
+ connection = Connection(
37
+ url="https://your-instance.fiddler.ai",
38
+ token="your-auth-token"
39
+ )
40
+
41
+ # Creating a connection with custom timeout and proxy
42
+ connection = Connection(
43
+ url="https://your-instance.fiddler.ai",
44
+ token="your-auth-token",
45
+ timeout=(5.0, 30.0), # (connect_timeout, read_timeout)
46
+ proxies={"https": "https://proxy.company.com:8080"}
47
+ )
48
+
49
+ # Creating a connection without SSL verification
50
+ connection = Connection(
51
+ url="https://your-instance.fiddler.ai",
52
+ token="your-auth-token",
53
+ verify=False, # Not recommended for production
54
+ validate=False # Skip version compatibility check
55
+ )
89
56
  """
90
57
 
91
58
  def __init__( # pylint: disable=too-many-arguments
@@ -99,27 +66,17 @@ class Connection:
99
66
  ) -> None:
100
67
  """Initialize a connection to the Fiddler platform.
101
68
 
102
- Parameters
103
- ----------
104
- url : str
105
- The base URL to your Fiddler platform instance
106
- token : str
107
- Authentication token obtained from the Fiddler UI
108
- proxies : dict, optional
109
- Dictionary mapping protocol to proxy URL for HTTP requests
110
- timeout : float or tuple, optional
111
- HTTP request timeout settings (float or tuple of connect/read timeouts)
112
- verify : bool, default True
113
- Whether to verify server's TLS certificate
114
- validate : bool, default True
115
- Whether to validate server/client version compatibility
116
-
117
- Raises
118
- ------
119
- ValueError
120
- If url or token parameters are empty
121
- IncompatibleClient
122
- If server version is incompatible with client version
69
+ Args:
70
+ url: The base URL to your Fiddler platform instance
71
+ token: Authentication token obtained from the Fiddler UI
72
+ proxies: Dictionary mapping protocol to proxy URL for HTTP requests
73
+ timeout: HTTP request timeout settings (float or tuple of connect/read timeouts)
74
+ verify: Whether to verify server's TLS certificate (default: True)
75
+ validate: Whether to validate server/client version compatibility (default: True)
76
+
77
+ Raises:
78
+ ValueError: If url or token parameters are empty
79
+ IncompatibleClient: If server version is incompatible with client version
123
80
  """
124
81
 
125
82
  self.url = url
@@ -363,30 +320,36 @@ def init( # pylint: disable=too-many-arguments
363
320
  Examples:
364
321
  Basic initialization:
365
322
 
366
- import fiddler as fdl
323
+ .. code-block:: python
367
324
 
368
- fdl.init(
369
- url="https://your-fiddler-instance.com",
370
- token="your-auth-token"
371
- )
325
+ import fiddler as fdl
326
+
327
+ fdl.init(
328
+ url="https://your-instance.fiddler.ai",
329
+ token="your-auth-token"
330
+ )
372
331
 
373
332
  Initialization with custom timeout and proxy:
374
333
 
375
- fdl.init(
376
- url="https://your-fiddler-instance.com",
377
- token="your-auth-token",
378
- timeout=(10.0, 60.0), # 10s connect, 60s read timeout
379
- proxies={"https": "https://proxy.company.com:8080"}
380
- )
334
+ .. code-block:: python
335
+
336
+ fdl.init(
337
+ url="https://your-instance.fiddler.ai",
338
+ token="your-auth-token",
339
+ timeout=(10.0, 60.0), # 10s connect, 60s read timeout
340
+ proxies={"https": "https://proxy.company.com:8080"}
341
+ )
381
342
 
382
343
  Initialization for development with relaxed settings:
383
344
 
384
- fdl.init(
385
- url="https://dev-fiddler-instance.com",
386
- token="dev-token",
387
- verify=False, # Skip SSL verification
388
- validate=False, # Skip version compatibility check
389
- )
345
+ .. code-block:: python
346
+
347
+ fdl.init(
348
+ url="https://your-instance.fiddler.ai",
349
+ token="dev-token",
350
+ verify=False, # Skip SSL verification
351
+ validate=False, # Skip version compatibility check
352
+ )
390
353
 
391
354
 
392
355