evaldeck 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {evaldeck-0.1.1 → evaldeck-0.1.3}/PKG-INFO +4 -1
  2. {evaldeck-0.1.1 → evaldeck-0.1.3}/pyproject.toml +15 -2
  3. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/config.py +1 -0
  4. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/evaluator.py +13 -0
  5. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/integrations/__init__.py +14 -1
  6. evaldeck-0.1.3/src/evaldeck/integrations/langchain.py +125 -0
  7. evaldeck-0.1.1/.claude/settings.local.json +0 -34
  8. evaldeck-0.1.1/.devcontainer/Dockerfile +0 -30
  9. evaldeck-0.1.1/.devcontainer/devcontainer.json +0 -60
  10. evaldeck-0.1.1/.github/workflows/ci.yaml +0 -65
  11. evaldeck-0.1.1/.github/workflows/docs.yaml +0 -50
  12. evaldeck-0.1.1/.github/workflows/publish.yaml +0 -28
  13. evaldeck-0.1.1/docs/api/config.md +0 -7
  14. evaldeck-0.1.1/docs/api/evalcase.md +0 -29
  15. evaldeck-0.1.1/docs/api/evaluation-result.md +0 -23
  16. evaldeck-0.1.1/docs/api/evaluator.md +0 -15
  17. evaldeck-0.1.1/docs/api/grade-result.md +0 -20
  18. evaldeck-0.1.1/docs/api/graders/base.md +0 -15
  19. evaldeck-0.1.1/docs/api/graders/code.md +0 -71
  20. evaldeck-0.1.1/docs/api/graders/llm.md +0 -17
  21. evaldeck-0.1.1/docs/api/index.md +0 -86
  22. evaldeck-0.1.1/docs/api/metrics.md +0 -48
  23. evaldeck-0.1.1/docs/api/step.md +0 -7
  24. evaldeck-0.1.1/docs/api/trace.md +0 -39
  25. evaldeck-0.1.1/docs/concepts/architecture.md +0 -279
  26. evaldeck-0.1.1/docs/concepts/evaluation-workflow.md +0 -376
  27. evaldeck-0.1.1/docs/concepts/grading-strategies.md +0 -318
  28. evaldeck-0.1.1/docs/concepts/index.md +0 -147
  29. evaldeck-0.1.1/docs/concepts/traces.md +0 -361
  30. evaldeck-0.1.1/docs/contributing/adding-graders.md +0 -331
  31. evaldeck-0.1.1/docs/contributing/adding-integrations.md +0 -245
  32. evaldeck-0.1.1/docs/contributing/adding-metrics.md +0 -299
  33. evaldeck-0.1.1/docs/contributing/code-standards.md +0 -287
  34. evaldeck-0.1.1/docs/contributing/index.md +0 -56
  35. evaldeck-0.1.1/docs/contributing/setup.md +0 -220
  36. evaldeck-0.1.1/docs/examples/basic-usage.md +0 -248
  37. evaldeck-0.1.1/docs/examples/index.md +0 -97
  38. evaldeck-0.1.1/docs/examples/langchain-agent.md +0 -344
  39. evaldeck-0.1.1/docs/examples/llm-judge.md +0 -322
  40. evaldeck-0.1.1/docs/examples/tool-calls.md +0 -232
  41. evaldeck-0.1.1/docs/getting-started/first-evaluation.md +0 -287
  42. evaldeck-0.1.1/docs/getting-started/index.md +0 -74
  43. evaldeck-0.1.1/docs/getting-started/installation.md +0 -177
  44. evaldeck-0.1.1/docs/getting-started/quickstart.md +0 -183
  45. evaldeck-0.1.1/docs/includes/abbreviations.md +0 -8
  46. evaldeck-0.1.1/docs/index.md +0 -146
  47. evaldeck-0.1.1/docs/stylesheets/extra.css +0 -106
  48. evaldeck-0.1.1/docs/user-guide/ci-cd.md +0 -411
  49. evaldeck-0.1.1/docs/user-guide/cli.md +0 -293
  50. evaldeck-0.1.1/docs/user-guide/configuration.md +0 -301
  51. evaldeck-0.1.1/docs/user-guide/graders/code-based.md +0 -367
  52. evaldeck-0.1.1/docs/user-guide/graders/custom.md +0 -339
  53. evaldeck-0.1.1/docs/user-guide/graders/index.md +0 -194
  54. evaldeck-0.1.1/docs/user-guide/graders/llm-based.md +0 -327
  55. evaldeck-0.1.1/docs/user-guide/index.md +0 -176
  56. evaldeck-0.1.1/docs/user-guide/integrations/index.md +0 -221
  57. evaldeck-0.1.1/docs/user-guide/integrations/manual.md +0 -322
  58. evaldeck-0.1.1/docs/user-guide/integrations/opentelemetry.md +0 -226
  59. evaldeck-0.1.1/docs/user-guide/metrics.md +0 -331
  60. evaldeck-0.1.1/docs/user-guide/test-cases.md +0 -447
  61. evaldeck-0.1.1/examples/basic_usage.py +0 -117
  62. evaldeck-0.1.1/examples/langchain_react_agent.py +0 -343
  63. evaldeck-0.1.1/tests/__init__.py +0 -1
  64. evaldeck-0.1.1/tests/conftest.py +0 -52
  65. evaldeck-0.1.1/tests/test_evaluator.py +0 -429
  66. evaldeck-0.1.1/tests/test_graders.py +0 -247
  67. evaldeck-0.1.1/tests/test_trace.py +0 -115
  68. {evaldeck-0.1.1 → evaldeck-0.1.3}/.gitignore +0 -0
  69. {evaldeck-0.1.1 → evaldeck-0.1.3}/.pre-commit-config.yaml +0 -0
  70. {evaldeck-0.1.1 → evaldeck-0.1.3}/CONTRIBUTING.md +0 -0
  71. {evaldeck-0.1.1 → evaldeck-0.1.3}/LICENSE +0 -0
  72. {evaldeck-0.1.1 → evaldeck-0.1.3}/README.md +0 -0
  73. {evaldeck-0.1.1 → evaldeck-0.1.3}/mkdocs.yml +0 -0
  74. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/__init__.py +0 -0
  75. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/cli.py +0 -0
  76. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/graders/__init__.py +0 -0
  77. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/graders/base.py +0 -0
  78. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/graders/code.py +0 -0
  79. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/graders/llm.py +0 -0
  80. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/integrations/opentelemetry.py +0 -0
  81. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/metrics/__init__.py +0 -0
  82. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/metrics/base.py +0 -0
  83. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/metrics/builtin.py +0 -0
  84. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/results.py +0 -0
  85. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/test_case.py +0 -0
  86. {evaldeck-0.1.1 → evaldeck-0.1.3}/src/evaldeck/trace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evaldeck
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: The evaluation framework for AI agents. Pytest for agents.
5
5
  Project-URL: Homepage, https://github.com/tantra-run/evaldeck-py
6
6
  Project-URL: Documentation, https://tantra-run.github.io/evaldeck-py/
@@ -29,6 +29,7 @@ Requires-Dist: rich>=13.0
29
29
  Provides-Extra: all
30
30
  Requires-Dist: anthropic>=0.18; extra == 'all'
31
31
  Requires-Dist: openai>=1.0; extra == 'all'
32
+ Requires-Dist: openinference-instrumentation-langchain>=0.1; extra == 'all'
32
33
  Provides-Extra: anthropic
33
34
  Requires-Dist: anthropic>=0.18; extra == 'anthropic'
34
35
  Provides-Extra: dev
@@ -45,6 +46,8 @@ Requires-Dist: mkdocs-autorefs>=0.5; extra == 'docs'
45
46
  Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
46
47
  Requires-Dist: mkdocs>=1.5; extra == 'docs'
47
48
  Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
49
+ Provides-Extra: langchain
50
+ Requires-Dist: openinference-instrumentation-langchain>=0.1; extra == 'langchain'
48
51
  Provides-Extra: openai
49
52
  Requires-Dist: openai>=1.0; extra == 'openai'
50
53
  Description-Content-Type: text/markdown
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "evaldeck"
7
- version = "0.1.1"
7
+ version = "0.1.3"
8
8
  description = "The evaluation framework for AI agents. Pytest for agents."
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -43,7 +43,8 @@ dependencies = [
43
43
  [project.optional-dependencies]
44
44
  openai = ["openai>=1.0"]
45
45
  anthropic = ["anthropic>=0.18"]
46
- all = ["evaldeck[openai,anthropic]"]
46
+ langchain = ["openinference-instrumentation-langchain>=0.1"]
47
+ all = ["evaldeck[openai,anthropic,langchain]"]
47
48
  dev = [
48
49
  "pytest>=7.0",
49
50
  "pytest-asyncio>=0.21",
@@ -73,6 +74,18 @@ Issues = "https://github.com/tantra-run/evaldeck-py/issues"
73
74
  [tool.hatch.build.targets.wheel]
74
75
  packages = ["src/evaldeck"]
75
76
 
77
+ [tool.hatch.build.targets.sdist]
78
+ exclude = [
79
+ "tests/",
80
+ "docs/",
81
+ ".github/",
82
+ ".devcontainer/",
83
+ ".evaldeck/",
84
+ "internal/",
85
+ "examples/",
86
+ ".claude/",
87
+ ]
88
+
76
89
  [tool.ruff]
77
90
  target-version = "py310"
78
91
  line-length = 100
@@ -15,6 +15,7 @@ class AgentConfig(BaseModel):
15
15
  module: str | None = None
16
16
  function: str | None = None
17
17
  class_name: str | None = None
18
+ framework: str | None = None # "langchain", "crewai", etc.
18
19
 
19
20
 
20
21
  class GraderDefaults(BaseModel):
@@ -571,4 +571,17 @@ class EvaluationRunner:
571
571
 
572
572
  module = importlib.import_module(agent_config.module)
573
573
  func = getattr(module, agent_config.function)
574
+
575
+ # Handle framework-specific integration
576
+ if agent_config.framework:
577
+ framework = agent_config.framework.lower()
578
+
579
+ if framework == "langchain":
580
+ from evaldeck.integrations.langchain import create_langchain_runner
581
+
582
+ return create_langchain_runner(func)
583
+
584
+ else:
585
+ raise ValueError(f"Unknown framework: {agent_config.framework}")
586
+
574
587
  return func
@@ -3,7 +3,7 @@
3
3
  This module provides the OpenTelemetry/OpenInference adapter for capturing traces
4
4
  from any instrumented AI framework (LangChain, CrewAI, LiteLLM, OpenAI, Anthropic, etc.)
5
5
 
6
- Usage:
6
+ Basic usage (manual setup):
7
7
  from evaldeck.integrations import EvaldeckSpanProcessor, setup_otel_tracing
8
8
  from openinference.instrumentation.langchain import LangChainInstrumentor
9
9
 
@@ -14,6 +14,19 @@ Usage:
14
14
 
15
15
  trace = processor.get_latest_trace()
16
16
  result = evaluator.evaluate(trace, test_case)
17
+
18
+ With framework integration (automatic setup via evaldeck.yaml):
19
+ # evaldeck.yaml
20
+ agent:
21
+ module: my_agent
22
+ function: create_agent
23
+ framework: langchain
24
+
25
+ # my_agent.py
26
+ def create_agent():
27
+ return create_react_agent(llm, tools)
28
+
29
+ # Run: evaldeck run
17
30
  """
18
31
 
19
32
  from evaldeck.integrations.opentelemetry import (
@@ -0,0 +1,125 @@
1
+ """LangChain integration for evaldeck.
2
+
3
+ Provides automatic instrumentation and trace capture for LangChain/LangGraph agents.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING, Any, Callable
9
+
10
+ if TYPE_CHECKING:
11
+ from evaldeck.trace import Trace
12
+
13
+
14
+ class LangChainIntegration:
15
+ """LangChain/LangGraph integration.
16
+
17
+ Automatically sets up OpenTelemetry tracing and provides a wrapper
18
+ that invokes the agent and returns a Trace.
19
+ """
20
+
21
+ def __init__(self) -> None:
22
+ self._processor: Any = None
23
+ self._agent: Any = None
24
+ self._initialized = False
25
+
26
+ def setup(self, agent_factory: Callable[[], Any]) -> None:
27
+ """Set up instrumentation and create the agent.
28
+
29
+ Args:
30
+ agent_factory: Function that returns the agent instance.
31
+ """
32
+ if self._initialized:
33
+ return
34
+
35
+ # Import here to make langchain an optional dependency
36
+ try:
37
+ from openinference.instrumentation.langchain import LangChainInstrumentor
38
+ except ImportError as e:
39
+ raise ImportError(
40
+ "LangChain integration requires openinference-instrumentation-langchain. "
41
+ "Install with: pip install evaldeck[langchain]"
42
+ ) from e
43
+
44
+ from evaldeck.integrations import setup_otel_tracing
45
+
46
+ # Set up OTel tracing
47
+ self._processor = setup_otel_tracing()
48
+
49
+ # Instrument LangChain
50
+ LangChainInstrumentor().instrument()
51
+
52
+ # Create the agent
53
+ self._agent = agent_factory()
54
+ self._initialized = True
55
+
56
+ def run(self, input: str) -> Trace:
57
+ """Run the agent and return a trace.
58
+
59
+ Args:
60
+ input: The input string to send to the agent.
61
+
62
+ Returns:
63
+ Trace captured from the agent execution.
64
+ """
65
+ if not self._initialized:
66
+ raise RuntimeError("Integration not initialized. Call setup() first.")
67
+
68
+ # Reset processor for fresh trace
69
+ self._processor.reset()
70
+
71
+ # Invoke the agent - auto-detect format
72
+ self._invoke_agent(input)
73
+
74
+ # Get and return trace
75
+ trace = self._processor.get_latest_trace()
76
+ if trace is None:
77
+ raise RuntimeError("No trace captured from agent execution")
78
+
79
+ return trace
80
+
81
+ def _invoke_agent(self, input: str) -> Any:
82
+ """Invoke the agent with the appropriate format.
83
+
84
+ Auto-detects LangGraph vs legacy LangChain format.
85
+ """
86
+ # LangGraph style (current)
87
+ if hasattr(self._agent, "invoke"):
88
+ # Try LangGraph message format first
89
+ try:
90
+ return self._agent.invoke({"messages": [("human", input)]})
91
+ except (TypeError, KeyError):
92
+ # Fall back to simple input
93
+ try:
94
+ return self._agent.invoke({"input": input})
95
+ except (TypeError, KeyError):
96
+ return self._agent.invoke(input)
97
+
98
+ # Legacy LangChain style
99
+ if hasattr(self._agent, "run"):
100
+ return self._agent.run(input)
101
+
102
+ # Callable
103
+ if callable(self._agent):
104
+ return self._agent(input)
105
+
106
+ raise RuntimeError(
107
+ f"Don't know how to invoke agent of type {type(self._agent)}. "
108
+ "Agent must have invoke(), run(), or be callable."
109
+ )
110
+
111
+
112
+ def create_langchain_runner(agent_factory: Callable[[], Any]) -> Callable[[str], Trace]:
113
+ """Create a runner function for LangChain agents.
114
+
115
+ This is the main entry point used by evaldeck's EvaluationRunner.
116
+
117
+ Args:
118
+ agent_factory: Function that returns the agent instance.
119
+
120
+ Returns:
121
+ A function that takes input and returns a Trace.
122
+ """
123
+ integration = LangChainIntegration()
124
+ integration.setup(agent_factory)
125
+ return integration.run
@@ -1,34 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(python -m pytest:*)",
5
- "Bash(grep:*)",
6
- "Bash(python:*)",
7
- "Bash(python3:*)",
8
- "Bash(PYTHONPATH=src python3:*)",
9
- "Bash(PYTHONPATH=src /opt/homebrew/bin/python3.11:*)",
10
- "Bash(wc:*)",
11
- "WebFetch(domain:docs.langchain.com)",
12
- "WebSearch",
13
- "WebFetch(domain:arize-ai.github.io)",
14
- "WebFetch(domain:docs.arize.com)",
15
- "WebFetch(domain:arize.com)",
16
- "WebFetch(domain:github.com)",
17
- "Bash(ls:*)",
18
- "Bash(find:*)",
19
- "Bash(pip install:*)",
20
- "Bash(pip3 install:*)",
21
- "Bash(mkdocs serve --help:*)",
22
- "Bash(git init:*)",
23
- "Bash(git add:*)",
24
- "Bash(git commit:*)",
25
- "Bash(git branch:*)",
26
- "Bash(git remote add:*)",
27
- "Bash(git push:*)",
28
- "Bash(ruff check:*)",
29
- "Bash(source:*)",
30
- "Bash(./venv/bin/python -m pytest:*)",
31
- "Bash(uv run pytest:*)"
32
- ]
33
- }
34
- }
@@ -1,30 +0,0 @@
1
- FROM mcr.microsoft.com/devcontainers/python:1-3.11-bullseye
2
-
3
- # Remove yarn repo (has expired GPG key) and install system dependencies
4
- RUN rm -f /etc/apt/sources.list.d/yarn.list && \
5
- apt-get update && apt-get install -y --no-install-recommends \
6
- build-essential \
7
- curl \
8
- && rm -rf /var/lib/apt/lists/*
9
-
10
- # Set working directory
11
- WORKDIR /workspaces/evaldeck
12
-
13
- # Upgrade pip
14
- RUN pip install --upgrade pip
15
-
16
- # Install Python development tools
17
- RUN pip install --no-cache-dir \
18
- ruff \
19
- mypy \
20
- pytest \
21
- pytest-asyncio \
22
- pre-commit \
23
- build \
24
- twine
25
-
26
- # Create directory for evaldeck output
27
- RUN mkdir -p /workspaces/evaldeck/.evaldeck
28
-
29
- # Set Python path
30
- ENV PYTHONPATH="/workspaces/evaldeck/src:${PYTHONPATH}"
@@ -1,60 +0,0 @@
1
- {
2
- "name": "Evaldeck Development",
3
- "build": {
4
- "dockerfile": "Dockerfile",
5
- "context": ".."
6
- },
7
- "features": {
8
- "ghcr.io/devcontainers/features/git:1": {},
9
- "ghcr.io/devcontainers/features/github-cli:1": {}
10
- },
11
- "customizations": {
12
- "vscode": {
13
- "extensions": [
14
- "ms-python.python",
15
- "ms-python.vscode-pylance",
16
- "charliermarsh.ruff",
17
- "tamasfe.even-better-toml",
18
- "redhat.vscode-yaml",
19
- "github.copilot"
20
- ],
21
- "settings": {
22
- "python.defaultInterpreterPath": "/usr/local/bin/python",
23
- "python.testing.pytestEnabled": true,
24
- "python.testing.pytestArgs": [
25
- "tests"
26
- ],
27
- "editor.formatOnSave": true,
28
- "editor.codeActionsOnSave": {
29
- "source.organizeImports": "explicit",
30
- "source.fixAll": "explicit"
31
- },
32
- "[python]": {
33
- "editor.defaultFormatter": "charliermarsh.ruff"
34
- },
35
- "files.exclude": {
36
- "**/__pycache__": true,
37
- "**/*.pyc": true,
38
- "**/.pytest_cache": true,
39
- "**/.mypy_cache": true,
40
- "**/.ruff_cache": true,
41
- "**/*.egg-info": true
42
- }
43
- }
44
- }
45
- },
46
- "postCreateCommand": "pip install -e '.[dev,all,docs]' && (git rev-parse --git-dir > /dev/null 2>&1 && pre-commit install || echo 'Skipping pre-commit install (not a git repo)')",
47
- "runArgs": [
48
- "--env-file",
49
- "${localWorkspaceFolder}/.env"
50
- ],
51
- "forwardPorts": [],
52
- "remoteUser": "vscode",
53
- "mounts": [
54
- "source=${localWorkspaceFolder}/.evaldeck,target=/workspaces/evaldeck/.evaldeck,type=bind,consistency=cached"
55
- ],
56
- "remoteEnv": {
57
- "PYTHONDONTWRITEBYTECODE": "1",
58
- "PYTHONUNBUFFERED": "1"
59
- }
60
- }
@@ -1,65 +0,0 @@
1
- name: CI
2
-
3
- on:
4
- push:
5
- branches: [main]
6
- pull_request:
7
- branches: [main]
8
-
9
- jobs:
10
- test:
11
- runs-on: ubuntu-latest
12
- strategy:
13
- matrix:
14
- python-version: ["3.10", "3.11", "3.12"]
15
-
16
- steps:
17
- - uses: actions/checkout@v4
18
-
19
- - name: Set up Python ${{ matrix.python-version }}
20
- uses: actions/setup-python@v5
21
- with:
22
- python-version: ${{ matrix.python-version }}
23
-
24
- - name: Install dependencies
25
- run: |
26
- python -m pip install --upgrade pip
27
- pip install -e ".[dev]"
28
-
29
- - name: Lint with ruff
30
- run: |
31
- ruff check src/ tests/
32
-
33
- - name: Type check with mypy
34
- run: |
35
- mypy src/
36
-
37
- - name: Run tests
38
- run: |
39
- pytest tests/ -v --tb=short
40
-
41
- build:
42
- runs-on: ubuntu-latest
43
- needs: test
44
-
45
- steps:
46
- - uses: actions/checkout@v4
47
-
48
- - name: Set up Python
49
- uses: actions/setup-python@v5
50
- with:
51
- python-version: "3.11"
52
-
53
- - name: Install build dependencies
54
- run: |
55
- python -m pip install --upgrade pip
56
- pip install build
57
-
58
- - name: Build package
59
- run: |
60
- python -m build
61
-
62
- - name: Check package
63
- run: |
64
- pip install twine
65
- twine check dist/*
@@ -1,50 +0,0 @@
1
- name: Deploy Docs
2
-
3
- on:
4
- push:
5
- branches:
6
- - main
7
- workflow_dispatch:
8
-
9
- permissions:
10
- contents: read
11
- pages: write
12
- id-token: write
13
-
14
- concurrency:
15
- group: "pages"
16
- cancel-in-progress: false
17
-
18
- jobs:
19
- build:
20
- runs-on: ubuntu-latest
21
- steps:
22
- - uses: actions/checkout@v4
23
-
24
- - name: Set up Python
25
- uses: actions/setup-python@v5
26
- with:
27
- python-version: "3.11"
28
-
29
- - name: Install dependencies
30
- run: |
31
- pip install -e ".[docs]"
32
-
33
- - name: Build docs
34
- run: mkdocs build
35
-
36
- - name: Upload artifact
37
- uses: actions/upload-pages-artifact@v3
38
- with:
39
- path: site/
40
-
41
- deploy:
42
- environment:
43
- name: github-pages
44
- url: ${{ steps.deployment.outputs.page_url }}
45
- runs-on: ubuntu-latest
46
- needs: build
47
- steps:
48
- - name: Deploy to GitHub Pages
49
- id: deployment
50
- uses: actions/deploy-pages@v4
@@ -1,28 +0,0 @@
1
- name: Publish to PyPI
2
-
3
- on:
4
- release:
5
- types: [published]
6
-
7
- jobs:
8
- publish:
9
- runs-on: ubuntu-latest
10
- environment: pypi
11
- permissions:
12
- id-token: write # Required for trusted publishing
13
- steps:
14
- - uses: actions/checkout@v4
15
-
16
- - name: Set up Python
17
- uses: actions/setup-python@v5
18
- with:
19
- python-version: "3.11"
20
-
21
- - name: Install build tools
22
- run: pip install build
23
-
24
- - name: Build package
25
- run: python -m build
26
-
27
- - name: Publish to PyPI
28
- uses: pypa/gh-action-pypi-publish@release/v1
@@ -1,7 +0,0 @@
1
- # Configuration
2
-
3
- ::: evaldeck.config.EvaldeckConfig
4
- options:
5
- show_root_heading: true
6
- show_source: true
7
- members_order: source
@@ -1,29 +0,0 @@
1
- # Test Case Models
2
-
3
- ::: evaldeck.test_case.EvalCase
4
- options:
5
- show_root_heading: true
6
- show_source: true
7
- members_order: source
8
-
9
- ---
10
-
11
- ::: evaldeck.test_case.ExpectedBehavior
12
- options:
13
- show_root_heading: true
14
- show_source: true
15
-
16
- ---
17
-
18
- ::: evaldeck.test_case.EvalSuite
19
- options:
20
- show_root_heading: true
21
- show_source: true
22
- members_order: source
23
-
24
- ---
25
-
26
- ::: evaldeck.test_case.GraderConfig
27
- options:
28
- show_root_heading: true
29
- show_source: true
@@ -1,23 +0,0 @@
1
- # Evaluation Results
2
-
3
- ::: evaldeck.results.EvaluationResult
4
- options:
5
- show_root_heading: true
6
- show_source: true
7
- members_order: source
8
-
9
- ---
10
-
11
- ::: evaldeck.results.SuiteResult
12
- options:
13
- show_root_heading: true
14
- show_source: true
15
- members_order: source
16
-
17
- ---
18
-
19
- ::: evaldeck.results.RunResult
20
- options:
21
- show_root_heading: true
22
- show_source: true
23
- members_order: source
@@ -1,15 +0,0 @@
1
- # Evaluator
2
-
3
- ::: evaldeck.evaluator.Evaluator
4
- options:
5
- show_root_heading: true
6
- show_source: true
7
- members_order: source
8
-
9
- ---
10
-
11
- ::: evaldeck.evaluator.EvaluationRunner
12
- options:
13
- show_root_heading: true
14
- show_source: true
15
- members_order: source
@@ -1,20 +0,0 @@
1
- # Results
2
-
3
- ::: evaldeck.results.GradeResult
4
- options:
5
- show_root_heading: true
6
- show_source: true
7
- members_order: source
8
-
9
- ---
10
-
11
- ::: evaldeck.results.GradeStatus
12
- options:
13
- show_root_heading: true
14
-
15
- ---
16
-
17
- ::: evaldeck.results.MetricResult
18
- options:
19
- show_root_heading: true
20
- show_source: true
@@ -1,15 +0,0 @@
1
- # Base Grader
2
-
3
- ::: evaldeck.graders.BaseGrader
4
- options:
5
- show_root_heading: true
6
- show_source: true
7
- members_order: source
8
-
9
- ---
10
-
11
- ::: evaldeck.graders.CompositeGrader
12
- options:
13
- show_root_heading: true
14
- show_source: true
15
- members_order: source