rakam-eval-sdk 0.2.0rc1__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rakam-eval-sdk
3
- Version: 0.2.0rc1
3
+ Version: 0.2.1
4
4
  Summary: Evaluation Framework SDK
5
5
  Author: Mohamed Bachar Touil
6
6
  License: MIT
@@ -94,6 +94,7 @@ client = DeepEvalClient(
94
94
  )
95
95
 
96
96
  ```
97
+
97
98
  3. Schema Evaluation
98
99
 
99
100
  ```python
@@ -137,6 +138,7 @@ client = DeepEvalClient(
137
138
  )
138
139
 
139
140
  ```
141
+
140
142
  ## Configuration
141
143
 
142
144
  The client can be configured in multiple ways:
@@ -150,7 +152,7 @@ DeepEvalClient(base_url="http://api", api_token="123")
150
152
  ### Environment variables
151
153
 
152
154
  ```bash
153
- export EVALFRAMWORK_URL=http://api
155
+ export EVALFRAMEWORK_URL=http://api
154
156
  export EVALFRAMWORK_API_KEY=123
155
157
  ```
156
158
 
@@ -80,6 +80,7 @@ client = DeepEvalClient(
80
80
  )
81
81
 
82
82
  ```
83
+
83
84
  3. Schema Evaluation
84
85
 
85
86
  ```python
@@ -123,6 +124,7 @@ client = DeepEvalClient(
123
124
  )
124
125
 
125
126
  ```
127
+
126
128
  ## Configuration
127
129
 
128
130
  The client can be configured in multiple ways:
@@ -136,7 +138,7 @@ DeepEvalClient(base_url="http://api", api_token="123")
136
138
  ### Environment variables
137
139
 
138
140
  ```bash
139
- export EVALFRAMWORK_URL=http://api
141
+ export EVALFRAMEWORK_URL=http://api
140
142
  export EVALFRAMWORK_API_KEY=123
141
143
  ```
142
144
 
@@ -4,7 +4,7 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "rakam-eval-sdk"
7
- version = "0.2.0rc1"
7
+ version = "0.2.1"
8
8
  description = "Evaluation Framework SDK"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -6,7 +6,7 @@ import uuid
6
6
  from datetime import datetime
7
7
  from pathlib import Path
8
8
  from pprint import pprint
9
- from typing import Any, Optional
9
+ from typing import Any, Dict, Optional, Sequence
10
10
 
11
11
  import typer
12
12
  from dotenv import load_dotenv
@@ -29,9 +29,64 @@ console = Console()
29
29
  PROJECT_ROOT = os.path.abspath(".")
30
30
  if PROJECT_ROOT not in sys.path:
31
31
  sys.path.insert(0, PROJECT_ROOT)
32
+ list_app = typer.Typer(help="List resources")
33
+ app.add_typer(list_app, name="list")
34
+ metrics_app = typer.Typer(help="Metrics utilities")
35
+ app.add_typer(metrics_app, name="metrics")
32
36
 
33
37
 
34
- @app.command()
38
+ @metrics_app.command("list")
39
+ def list_metrics(
40
+ limit: int = typer.Option(
41
+ 20,
42
+ "--limit",
43
+ help="Number of testcases to inspect for metrics",
44
+ ),
45
+ ):
46
+ """
47
+ List unique metric names found in evaluation testcases.
48
+ """
49
+ client = DeepEvalClient()
50
+
51
+ testcases = client.list_evaluation_testcases(
52
+ limit=limit,
53
+ offset=0,
54
+ raise_exception=True,
55
+ )
56
+
57
+ if not testcases:
58
+ typer.echo("No evaluation testcases found.")
59
+ return
60
+
61
+ metric_names: set[str] = set()
62
+
63
+ def collect_metrics(entries: Sequence[Dict] | None):
64
+ if not entries:
65
+ return
66
+ for entry in entries:
67
+ for metric in entry.get("metrics", []) or []:
68
+ name = metric.get("name")
69
+ if name:
70
+ metric_names.add(name)
71
+
72
+ for tc in testcases:
73
+
74
+ collect_metrics(tc.get("result"))
75
+
76
+ if not metric_names:
77
+ typer.echo("No metrics found.")
78
+ return
79
+
80
+ typer.echo(
81
+ f"📊 Found {len(metric_names)} unique metrics "
82
+ f"(from latest {limit} testcases)\n"
83
+ )
84
+
85
+ for name in sorted(metric_names):
86
+ typer.echo(f"- {name}")
87
+
88
+
89
+ @list_app.command("eval")
35
90
  def list(
36
91
  directory: Path = typer.Argument(
37
92
  Path("./eval"),
@@ -65,17 +120,10 @@ def list(
65
120
  typer.echo(f"No @{TARGET_DECORATOR} functions found.")
66
121
 
67
122
 
68
- list_app = typer.Typer(help="List resources")
69
- app.add_typer(list_app, name="list")
70
-
71
-
72
123
  @list_app.command("runs")
73
124
  def list_runs(
74
125
  limit: int = typer.Option(20, help="Max number of runs"),
75
126
  offset: int = typer.Option(0, help="Pagination offset"),
76
- status: Optional[str] = typer.Option(
77
- None, help="Filter by status (running, completed, failed)"
78
- ),
79
127
  ):
80
128
  """
81
129
  List evaluation runs (newest first).
@@ -92,19 +140,7 @@ def list_runs(
92
140
  typer.echo("No evaluation runs found.")
93
141
  return
94
142
 
95
- # optional status filtering (client-side for now)
96
- if status:
97
- runs = [
98
- r for r in runs
99
- if r.get("result", {}).get("status") == status
100
- ]
101
-
102
- typer.echo(
103
- f"[id] "
104
- f"{'unique_id':<20}"
105
- f"{'label':<20}"
106
- f"created_at"
107
- )
143
+ typer.echo(f"[id] " f"{'unique_id':<20}" f"{'label':<20}" f"created_at")
108
144
  # pretty CLI output
109
145
  for run in runs:
110
146
  run_id = run.get("id")
@@ -120,12 +156,7 @@ def list_runs(
120
156
  except ValueError:
121
157
  pass
122
158
 
123
- typer.echo(
124
- f"[{run_id}] "
125
- f"{uid:<20} "
126
- f"{label:<20} "
127
- f"{created_at}"
128
- )
159
+ typer.echo(f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
129
160
 
130
161
 
131
162
  @list_app.command("show")
@@ -345,8 +376,7 @@ def _print_and_save(
345
376
  return
346
377
 
347
378
  if out.exists() and not overwrite:
348
- typer.echo(
349
- f"❌ File already exists: {out} (use --overwrite to replace)")
379
+ typer.echo(f"❌ File already exists: {out} (use --overwrite to replace)")
350
380
  raise typer.Exit(code=1)
351
381
 
352
382
  out.parent.mkdir(parents=True, exist_ok=True)
@@ -27,19 +27,19 @@ class DeepEvalClient:
27
27
  settings_module: Optional[Any] = None, # optional external settings
28
28
  timeout: int = 30,
29
29
  ):
30
- settings_url = getattr(settings_module, "EVALFRAMWORK_URL", None)
30
+ settings_url = getattr(settings_module, "EVALFRAMEWORK_URL", None)
31
31
  settings_token = getattr(settings_module, "EVALFRAMWORK_API_KEY", None)
32
32
 
33
33
  raw_url = (
34
34
  base_url
35
35
  or settings_url
36
- or os.getenv("EVALFRAMWORK_URL")
36
+ or os.getenv("EVALFRAMEWORK_URL")
37
37
  or "http://localhost:8080"
38
38
  )
39
39
  self.base_url = raw_url.rstrip("/")
40
40
  self.api_token = (
41
41
  api_token or settings_token or os.getenv(
42
- "EVALFRAMWORK_API_KEY", "")
42
+ "EVALFRAMEWORK_API_KEY", "")
43
43
  )
44
44
  self.timeout = timeout
45
45
 
@@ -94,8 +94,7 @@ MetricConfig = Annotated[
94
94
  ]
95
95
 
96
96
  SchemaMetricConfig = Annotated[
97
- Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(
98
- discriminator="type")
97
+ Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(discriminator="type")
99
98
  ]
100
99
 
101
100