judgeval 0.0.29__tar.gz → 0.0.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval-0.0.34/.github/pull_request_template.md +31 -0
- judgeval-0.0.34/.github/workflows/ci.yaml +91 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/PKG-INFO +15 -2
- {judgeval-0.0.29 → judgeval-0.0.34}/Pipfile +4 -1
- {judgeval-0.0.29 → judgeval-0.0.34}/Pipfile.lock +1112 -892
- {judgeval-0.0.29 → judgeval-0.0.34}/README.md +12 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/alerts/notifications.mdx +107 -15
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/alerts/rules.mdx +55 -6
- judgeval-0.0.34/docs/api_reference/judgment_client.mdx +101 -0
- judgeval-0.0.34/docs/api_reference/trace.mdx +173 -0
- judgeval-0.0.34/docs/changelog/2025-04-21.mdx +19 -0
- judgeval-0.0.34/docs/clustering/clustering.mdx +68 -0
- judgeval-0.0.34/docs/evaluation/data_datasets.mdx +288 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/evaluation/data_examples.mdx +96 -7
- judgeval-0.0.34/docs/evaluation/data_sequences.mdx +80 -0
- judgeval-0.0.34/docs/evaluation/introduction.mdx +224 -0
- judgeval-0.0.34/docs/evaluation/judges.mdx +209 -0
- judgeval-0.0.34/docs/evaluation/scorers/agent/derailment.mdx +54 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/evaluation/scorers/custom_scorers.mdx +185 -0
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/answer_correctness.mdx +31 -1
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/answer_relevancy.mdx +29 -1
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/comparison.mdx +44 -4
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/contextual_precision.mdx +33 -1
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/contextual_recall.mdx +33 -1
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/contextual_relevancy.mdx +31 -1
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/faithfulness.mdx +33 -2
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/groundedness.mdx +1 -1
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/evaluation/scorers/introduction.mdx +48 -25
- judgeval-0.0.34/docs/evaluation/unit_testing.mdx +93 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/getting_started.mdx +166 -188
- judgeval-0.0.34/docs/images/annotation_queue_ui.png +0 -0
- judgeval-0.0.34/docs/images/cluster.png +0 -0
- judgeval-0.0.34/docs/images/cluster_button.png +0 -0
- judgeval-0.0.34/docs/images/dashboard_annotation_queue_button.png +0 -0
- judgeval-0.0.34/docs/mcp_server/mcp_server.mdx +586 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/mint.json +35 -18
- judgeval-0.0.34/docs/monitoring/annotations.mdx +41 -0
- judgeval-0.0.34/docs/monitoring/tracing.mdx +443 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/pyproject.toml +3 -2
- judgeval-0.0.34/src/demo/custom_scorer/main.py +43 -0
- judgeval-0.0.34/src/demo/custom_scorer/scorer.py +44 -0
- judgeval-0.0.34/src/demo/dataset.py +16 -0
- judgeval-0.0.34/src/demo/demo.py +54 -0
- judgeval-0.0.34/src/demo/demo2.py +144 -0
- judgeval-0.0.34/src/demo/new_bot/basic_bot.py +116 -0
- judgeval-0.0.34/src/demo/simple_trace.py +89 -0
- {judgeval-0.0.29/src/demo/new_trace → judgeval-0.0.34/src/demo/simplified_tracing}/example_complex_async.py +53 -35
- judgeval-0.0.34/src/demo/streaming_anthropic_demo.py +82 -0
- judgeval-0.0.34/src/demo/streaming_openai_demo.py +61 -0
- judgeval-0.0.34/src/demo/test.py +51 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/__init__.py +3 -1
- judgeval-0.0.34/src/judgeval/common/s3_storage.py +93 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/tracer.py +901 -177
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/constants.py +5 -3
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/__init__.py +4 -0
- judgeval-0.0.34/src/judgeval/data/custom_example.py +18 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/datasets/dataset.py +5 -1
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/datasets/eval_dataset_client.py +64 -5
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/example.py +1 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/result.py +7 -6
- judgeval-0.0.34/src/judgeval/data/sequence.py +49 -0
- judgeval-0.0.34/src/judgeval/data/sequence_run.py +44 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/evaluation_run.py +12 -7
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/integrations/langgraph.py +89 -72
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judgment_client.py +86 -145
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/rules.py +4 -7
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/run_evaluation.py +87 -13
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/__init__.py +6 -4
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorer.py +3 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -1
- judgeval-0.0.34/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +21 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/score.py +6 -5
- judgeval-0.0.34/src/judgeval/version_check.py +22 -0
- judgeval-0.0.34/src/test.py +143 -0
- judgeval-0.0.29/.github/workflows/ci.yaml +0 -44
- judgeval-0.0.29/docs/api_reference/judgment_client.mdx +0 -61
- judgeval-0.0.29/docs/api_reference/trace.mdx +0 -82
- judgeval-0.0.29/docs/evaluation/data_datasets.mdx +0 -159
- judgeval-0.0.29/docs/evaluation/introduction.mdx +0 -111
- judgeval-0.0.29/docs/evaluation/judges.mdx +0 -88
- judgeval-0.0.29/docs/evaluation/scorers/hallucination.mdx +0 -54
- judgeval-0.0.29/docs/evaluation/unit_testing.mdx +0 -39
- judgeval-0.0.29/docs/judgment/introduction.mdx +0 -11
- judgeval-0.0.29/docs/monitoring/tracing.mdx +0 -214
- judgeval-0.0.29/src/demo/cookbooks/JNPR_Mist/test.py +0 -21
- judgeval-0.0.29/src/demo/cookbooks/linkd/text2sql.py +0 -14
- judgeval-0.0.29/src/demo/custom_example_demo/osiris_test.py +0 -22
- judgeval-0.0.29/src/demo/custom_example_demo/qodo_scorer.py +0 -78
- judgeval-0.0.29/src/demo/demo.py +0 -21
- judgeval-0.0.29/src/judgeval/data/custom_api_example.py +0 -91
- judgeval-0.0.29/src/judgeval/scorers/base_scorer.py +0 -58
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -169
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -27
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -276
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py +0 -161
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py +0 -222
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py +0 -3
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py +0 -156
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -318
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -264
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py +0 -232
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py +0 -102
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
- judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -551
- judgeval-0.0.29/src/test.py +0 -21
- {judgeval-0.0.29 → judgeval-0.0.34}/.gitignore +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/LICENSE.md +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/README.md +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/alerts/platform_notifications.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/development.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/code.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/images.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/markdown.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/navigation.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/reusable-snippets.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/settings.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/execution_order.mdx +0 -0
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/json_correctness.mdx +0 -0
- {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/summarization.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/favicon.svg +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/basic_trace_example.png +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/checks-passed.png +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/create_aggressive_scorer.png +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/create_scorer.png +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/evaluation_diagram.png +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/hero-dark.svg +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/hero-light.svg +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/notifications_page.png +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/online_eval_fault.png +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/reports_modal.png +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/trace_ss.png +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/integration/langgraph.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/introduction.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/logo/dark.svg +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/logo/light.svg +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/monitoring/introduction.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/monitoring/production_insights.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/create_dataset.ipynb +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/create_scorer.ipynb +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/demo.ipynb +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/prompt_scorer.ipynb +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/quickstart.ipynb +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/quickstart.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/docs/snippets/snippet-intro.mdx +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/pytest.ini +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/demo/travel_agent.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/clients.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/__init__.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/exceptions.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/logger.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/utils.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/datasets/__init__.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/scorer_data.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/litellm_judge.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/mixture_of_judges.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/together_judge.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/utils.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/api_scorer.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison → judgeval-0.0.34/src/judgeval/scorers/judgeval_scorers}/__init__.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/prompt_scorer.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/utils.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/tracer/__init__.py +0 -0
- {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/utils/alerts.py +0 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
## 📝 Summary
|
2
|
+
|
3
|
+
<!-- Provide a brief description of the changes introduced by this PR -->
|
4
|
+
|
5
|
+
## 🎯 Purpose
|
6
|
+
|
7
|
+
<!-- Explain the motivation behind these changes. Why are they necessary? -->
|
8
|
+
|
9
|
+
## 🎥 Demo of Changes
|
10
|
+
|
11
|
+
<!-- Add a short 1-3 minute video describing/demoing the changes -->
|
12
|
+
|
13
|
+
## 🧪 Testing
|
14
|
+
|
15
|
+
<!-- Describe how the changes were tested (unit/manual) -->
|
16
|
+
|
17
|
+
## ✅ Checklist
|
18
|
+
|
19
|
+
- [ ] Self-review
|
20
|
+
- [ ] Video demo of changes
|
21
|
+
- [ ] Unit Tests and CI/CD tests are passing
|
22
|
+
- [ ] Reviewers assigned
|
23
|
+
|
24
|
+
|
25
|
+
## 📌 Linear Issue
|
26
|
+
|
27
|
+
<!-- Reference to associated Linear ticket, e.g., ABC-123 -->
|
28
|
+
|
29
|
+
## ✏️ Additional Notes
|
30
|
+
|
31
|
+
<!-- Any additional information that doesn't fit into the other sections -->
|
@@ -0,0 +1,91 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
pull_request_review:
|
5
|
+
types: [submitted]
|
6
|
+
branches:
|
7
|
+
- main
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
run-tests:
|
11
|
+
strategy:
|
12
|
+
fail-fast: false
|
13
|
+
matrix:
|
14
|
+
os: [ubuntu-latest, macos-latest]
|
15
|
+
python-version:
|
16
|
+
- "3.11"
|
17
|
+
name: Test
|
18
|
+
runs-on: ${{ matrix.os }}
|
19
|
+
env:
|
20
|
+
PYTHONPATH: "."
|
21
|
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
22
|
+
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- name: Checkout code
|
26
|
+
uses: actions/checkout@v4
|
27
|
+
|
28
|
+
- name: Set up Python
|
29
|
+
uses: actions/setup-python@v4
|
30
|
+
with:
|
31
|
+
python-version: ${{ matrix.python-version }}
|
32
|
+
|
33
|
+
- name: Install dependencies
|
34
|
+
run: |
|
35
|
+
pip install pipenv
|
36
|
+
pipenv install --dev
|
37
|
+
|
38
|
+
|
39
|
+
- name: Run tests
|
40
|
+
run: |
|
41
|
+
cd src
|
42
|
+
pipenv run pytest
|
43
|
+
|
44
|
+
run-e2e-tests:
|
45
|
+
if: "!contains(github.actor, '[bot]')" # Exclude if the actor is a bot
|
46
|
+
concurrency:
|
47
|
+
group: e2e-tests
|
48
|
+
strategy:
|
49
|
+
fail-fast: false
|
50
|
+
matrix:
|
51
|
+
os: [ubuntu-latest]
|
52
|
+
python-version:
|
53
|
+
- "3.11"
|
54
|
+
name: E2E Tests
|
55
|
+
runs-on: ${{ matrix.os }}
|
56
|
+
steps:
|
57
|
+
- name: Configure AWS Credentials
|
58
|
+
uses: aws-actions/configure-aws-credentials@v4
|
59
|
+
with:
|
60
|
+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
61
|
+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
62
|
+
aws-region: us-west-1
|
63
|
+
|
64
|
+
- name: Checkout code
|
65
|
+
uses: actions/checkout@v4
|
66
|
+
|
67
|
+
- name: Set up Python
|
68
|
+
uses: actions/setup-python@v4
|
69
|
+
with:
|
70
|
+
python-version: ${{ matrix.python-version }}
|
71
|
+
|
72
|
+
- name: Install judgeval dependencies
|
73
|
+
run: |
|
74
|
+
pip install pipenv
|
75
|
+
pipenv install --dev
|
76
|
+
|
77
|
+
- name: Check if server is running
|
78
|
+
run: |
|
79
|
+
if ! curl -s http://api.judgmentlabs.ai/health > /dev/null; then
|
80
|
+
echo "Production Judgment server is not running properly. Check logs on AWS CloudWatch for more details."
|
81
|
+
exit 1
|
82
|
+
else
|
83
|
+
echo "Server is running."
|
84
|
+
fi
|
85
|
+
|
86
|
+
- name: Run E2E tests
|
87
|
+
working-directory: src
|
88
|
+
run: |
|
89
|
+
SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions/api-keys/judgeval --query SecretString --output text)
|
90
|
+
export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
|
91
|
+
pipenv run pytest ./e2etests
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: judgeval
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.34
|
4
4
|
Summary: Judgeval Package
|
5
5
|
Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
|
6
6
|
Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
|
@@ -12,12 +12,13 @@ Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Requires-Python: >=3.11
|
13
13
|
Requires-Dist: anthropic
|
14
14
|
Requires-Dist: fastapi
|
15
|
+
Requires-Dist: google-genai
|
15
16
|
Requires-Dist: langchain
|
16
17
|
Requires-Dist: langchain-anthropic
|
17
18
|
Requires-Dist: langchain-core
|
18
19
|
Requires-Dist: langchain-huggingface
|
19
20
|
Requires-Dist: langchain-openai
|
20
|
-
Requires-Dist: litellm
|
21
|
+
Requires-Dist: litellm==1.38.12
|
21
22
|
Requires-Dist: nest-asyncio
|
22
23
|
Requires-Dist: openai
|
23
24
|
Requires-Dist: openpyxl
|
@@ -94,9 +95,21 @@ Create a file named `traces.py` with the following code:
|
|
94
95
|
from judgeval.common.tracer import Tracer, wrap
|
95
96
|
from openai import OpenAI
|
96
97
|
|
98
|
+
# Basic initialization
|
97
99
|
client = wrap(OpenAI())
|
98
100
|
judgment = Tracer(project_name="my_project")
|
99
101
|
|
102
|
+
# Or with S3 storage enabled
|
103
|
+
# NOTE: Make sure AWS creds correspond to an account with write access to the specified S3 bucket
|
104
|
+
judgment = Tracer(
|
105
|
+
project_name="my_project",
|
106
|
+
use_s3=True,
|
107
|
+
s3_bucket_name="my-traces-bucket", # Bucket created automatically if it doesn't exist
|
108
|
+
s3_aws_access_key_id="your-access-key", # Optional: defaults to AWS_ACCESS_KEY_ID env var
|
109
|
+
s3_aws_secret_access_key="your-secret-key", # Optional: defaults to AWS_SECRET_ACCESS_KEY env var
|
110
|
+
s3_region_name="us-west-1" # Optional: defaults to AWS_REGION env var or "us-west-1"
|
111
|
+
)
|
112
|
+
|
100
113
|
@judgment.observe(span_type="tool")
|
101
114
|
def my_tool():
|
102
115
|
return "Hello world!"
|
@@ -4,7 +4,7 @@ verify_ssl = true
|
|
4
4
|
name = "pypi"
|
5
5
|
|
6
6
|
[packages]
|
7
|
-
litellm = "
|
7
|
+
litellm = "==1.38.12"
|
8
8
|
python-dotenv = "==1.0.1"
|
9
9
|
fastapi = "*"
|
10
10
|
uvicorn = "*"
|
@@ -24,6 +24,9 @@ langchain-openai = "*"
|
|
24
24
|
langchain-anthropic = "*"
|
25
25
|
langchain-core = "*"
|
26
26
|
langchain-community = "*"
|
27
|
+
langgraph = "*"
|
28
|
+
google-genai = "*"
|
29
|
+
boto3 = "*"
|
27
30
|
|
28
31
|
[dev-packages]
|
29
32
|
pytest = "*"
|