eval-protocol 0.2.46.dev2__py3-none-any.whl → 0.2.46.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eval_protocol/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-10T01:09:04-0700",
11
+ "date": "2025-10-10T01:45:32-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "859dce2af419fc26a8634c1254a75a52d9c246d4",
15
- "version": "0.2.46-dev2"
14
+ "full-revisionid": "1757548441eb93afd5dc0428b0218637787cdd80",
15
+ "version": "0.2.46-dev3"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -343,11 +343,11 @@ class FireworksTracingAdapter(BaseAdapter):
343
343
  # Remove None values
344
344
  params = {k: v for k, v in params.items() if v is not None}
345
345
 
346
- # Make request to proxy
346
+ # Make request to proxy (using pointwise for efficiency)
347
347
  if self.project_id:
348
- url = f"{self.base_url}/v1/project_id/{self.project_id}/traces"
348
+ url = f"{self.base_url}/v1/project_id/{self.project_id}/traces/pointwise"
349
349
  else:
350
- url = f"{self.base_url}/v1/traces"
350
+ url = f"{self.base_url}/v1/traces/pointwise"
351
351
 
352
352
  headers = {"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}"}
353
353
 
@@ -367,7 +367,7 @@ class FireworksTracingAdapter(BaseAdapter):
367
367
  except Exception: # In case e.response.json() fails
368
368
  error_msg = f"Proxy error: {e.response.text}"
369
369
 
370
- logger.error("Failed to fetch traces from proxy: %s", error_msg)
370
+ logger.error("Failed to fetch traces from proxy (HTTP %s): %s", e.response.status_code, error_msg)
371
371
  return eval_rows
372
372
  except requests.exceptions.RequestException as e:
373
373
  # Non-HTTP errors (network issues, timeouts, etc.)
@@ -16,7 +16,7 @@ from contextlib import asynccontextmanager
16
16
  from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
17
17
  from .auth import AuthProvider, NoAuthProvider
18
18
  from .litellm import handle_chat_completion, proxy_to_litellm
19
- from .langfuse import fetch_langfuse_traces
19
+ from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace
20
20
 
21
21
  # Configure logging before any other imports (so all modules inherit this config)
22
22
  log_level = os.getenv("LOG_LEVEL", "INFO").upper()
@@ -267,6 +267,27 @@ def create_app(
267
267
  params=params,
268
268
  )
269
269
 
270
+ @app.get("/traces/pointwise", response_model=LangfuseTracesResponse)
271
+ @app.get("/v1/traces/pointwise", response_model=LangfuseTracesResponse)
272
+ @app.get("/project_id/{project_id}/traces/pointwise", response_model=LangfuseTracesResponse)
273
+ @app.get("/v1/project_id/{project_id}/traces/pointwise", response_model=LangfuseTracesResponse)
274
+ async def pointwise_get_langfuse_trace(
275
+ request: Request,
276
+ params: TracesParams = Depends(get_traces_params),
277
+ project_id: Optional[str] = None,
278
+ config: ProxyConfig = Depends(get_config),
279
+ redis_client: redis.Redis = Depends(get_redis),
280
+ _: None = Depends(require_auth),
281
+ ) -> LangfuseTracesResponse:
282
+ if project_id is not None:
283
+ params.project_id = project_id
284
+ return await pointwise_fetch_langfuse_trace(
285
+ config=config,
286
+ redis_client=redis_client,
287
+ request=request,
288
+ params=params,
289
+ )
290
+
270
291
  # Health
271
292
  @app.get("/health")
272
293
  async def health():
@@ -366,3 +366,161 @@ async def fetch_langfuse_traces(
366
366
  raise
367
367
  except Exception as e:
368
368
  raise HTTPException(status_code=500, detail=f"Error fetching traces from Langfuse: {str(e)}")
369
+
370
+
371
+ async def pointwise_fetch_langfuse_trace(
372
+ config: ProxyConfig,
373
+ redis_client: redis.Redis,
374
+ request: Request,
375
+ params: TracesParams,
376
+ ):
377
+ """
378
+ Fetch the latest trace from Langfuse for the specified project.
379
+
380
+ Since insertion_ids are UUID v7 (time-ordered), we only fetch the last one
381
+ as it contains all accumulated information from the pointwise evaluation.
382
+
383
+ Returns a single trace object or raises if not found.
384
+ """
385
+
386
+ # Preprocess traces request
387
+ if config.preprocess_traces_request:
388
+ params = config.preprocess_traces_request(request, params)
389
+
390
+ tags = params.tags
391
+ project_id = params.project_id
392
+ user_id = params.user_id
393
+ session_id = params.session_id
394
+ name = params.name
395
+ environment = params.environment
396
+ version = params.version
397
+ release = params.release
398
+ fields = params.fields
399
+ hours_back = params.hours_back
400
+ from_timestamp = params.from_timestamp
401
+ to_timestamp = params.to_timestamp
402
+ sleep_between_gets = params.sleep_between_gets
403
+ max_retries = params.max_retries
404
+
405
+ # Use default project if not specified
406
+ if project_id is None:
407
+ project_id = config.default_project_id
408
+
409
+ # Validate project_id
410
+ if project_id not in config.langfuse_keys:
411
+ raise HTTPException(
412
+ status_code=404,
413
+ detail=f"Project ID '{project_id}' not found. Available projects: {list(config.langfuse_keys.keys())}",
414
+ )
415
+
416
+ # Extract rollout_id from tags for Redis lookup
417
+ rollout_id = _extract_tag_value(tags, "rollout_id:")
418
+
419
+ try:
420
+ # Import the Langfuse adapter
421
+ from langfuse import Langfuse
422
+
423
+ # Create Langfuse client with the project's keys
424
+ logger.debug(f"Connecting to Langfuse at {config.langfuse_host} for project '{project_id}'")
425
+ langfuse_client = Langfuse(
426
+ public_key=config.langfuse_keys[project_id]["public_key"],
427
+ secret_key=config.langfuse_keys[project_id]["secret_key"],
428
+ host=config.langfuse_host,
429
+ )
430
+
431
+ # Parse datetime strings if provided
432
+ from_ts = None
433
+ to_ts = None
434
+ if from_timestamp:
435
+ from_ts = datetime.fromisoformat(from_timestamp.replace("Z", "+00:00"))
436
+ if to_timestamp:
437
+ to_ts = datetime.fromisoformat(to_timestamp.replace("Z", "+00:00"))
438
+
439
+ # Determine time window: explicit from/to takes precedence over hours_back
440
+ if from_ts is None and to_ts is None and hours_back:
441
+ to_ts = datetime.now()
442
+ from_ts = to_ts - timedelta(hours=hours_back)
443
+
444
+ # Get expected insertion_ids from Redis for completeness checking
445
+ expected_ids: Set[str] = set()
446
+ if rollout_id:
447
+ expected_ids = get_insertion_ids(redis_client, rollout_id)
448
+ logger.info(f"Pointwise fetch for rollout_id '{rollout_id}', expecting {len(expected_ids)} insertion_ids")
449
+ if not expected_ids:
450
+ logger.warning(
451
+ f"No expected insertion_ids found in Redis for rollout '{rollout_id}'. Returning empty trace."
452
+ )
453
+ raise HTTPException(
454
+ status_code=500,
455
+ detail=f"No expected insertion_ids found in Redis for rollout '{rollout_id}'. Returning empty trace.",
456
+ )
457
+
458
+ # Get the latest (last) insertion_id since UUID v7 is time-ordered
459
+ latest_insertion_id = max(expected_ids) # UUID v7 max = newest
460
+ logger.info(f"Targeting latest insertion_id (last5): {latest_insertion_id[-5:]} for rollout '{rollout_id}'")
461
+
462
+ for retry in range(max_retries):
463
+ # Fetch trace list targeting the latest insertion_id
464
+ traces = await _fetch_trace_list_with_retry(
465
+ langfuse_client,
466
+ page=1,
467
+ limit=1, # Only need the one trace
468
+ tags=[f"insertion_id:{latest_insertion_id}"],
469
+ user_id=user_id,
470
+ session_id=session_id,
471
+ name=name,
472
+ environment=environment,
473
+ version=version,
474
+ release=release,
475
+ fields=fields,
476
+ from_ts=from_ts,
477
+ to_ts=to_ts,
478
+ max_retries=max_retries,
479
+ )
480
+
481
+ if traces and traces.data:
482
+ # Get the trace info
483
+ trace_info = traces.data[0]
484
+ logger.debug(f"Found trace {trace_info.id} for latest insertion_id {latest_insertion_id[-5:]}")
485
+
486
+ # Fetch full trace details
487
+ trace_full = await _fetch_trace_detail_with_retry(
488
+ langfuse_client,
489
+ trace_info.id,
490
+ max_retries,
491
+ )
492
+
493
+ if trace_full:
494
+ trace_dict = _serialize_trace_to_dict(trace_full)
495
+ logger.info(
496
+ f"Successfully fetched latest trace for rollout '{rollout_id}', insertion_id (last5): {latest_insertion_id[-5:]}"
497
+ )
498
+ return LangfuseTracesResponse(
499
+ project_id=project_id,
500
+ total_traces=1,
501
+ traces=[TraceResponse(**trace_dict)],
502
+ )
503
+
504
+ # If not successful and not last retry, sleep and continue
505
+ if retry < max_retries - 1:
506
+ wait_time = 2**retry
507
+ logger.info(
508
+ f"Pointwise fetch attempt {retry + 1}/{max_retries} failed for rollout '{rollout_id}', insertion_id (last5): {latest_insertion_id[-5:]}. Retrying in {wait_time}s..."
509
+ )
510
+ await asyncio.sleep(wait_time)
511
+
512
+ # After all retries failed
513
+ logger.error(
514
+ f"Failed to fetch latest trace for rollout '{rollout_id}', insertion_id (last5): {latest_insertion_id[-5:]} after {max_retries} retries"
515
+ )
516
+ raise HTTPException(
517
+ status_code=404,
518
+ detail=f"Failed to fetch latest trace for rollout '{rollout_id}' after {max_retries} retries",
519
+ )
520
+
521
+ except ImportError:
522
+ raise HTTPException(status_code=500, detail="Langfuse SDK not installed. Install with: pip install langfuse")
523
+ except HTTPException:
524
+ raise
525
+ except Exception as e:
526
+ raise HTTPException(status_code=500, detail=f"Error fetching latest trace from Langfuse: {str(e)}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.46.dev2
3
+ Version: 0.2.46.dev3
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -5,7 +5,7 @@ development/utils/generate_api_key.py,sha256=hHCMFkzW4yxqwcn2ct5diDm-PR9cMX9XP7I
5
5
  development/utils/subprocess_manager.py,sha256=7n7rT9ji7h93i79SMrGS5RNesrnLFjdFON9_eQCmYNE,18937
6
6
  eval_protocol/__init__.py,sha256=bPjRmbNv6MggQz8dq_KSmSKwGSwyLGvuPenS_H8lsoc,3987
7
7
  eval_protocol/__main__.py,sha256=FIW5fo2X2rsPThurSlZEuqvE1u0XNwJW1uoej_OhAAs,161
8
- eval_protocol/_version.py,sha256=gzKeHivAEoFj1hOoUFImQJrTSWPqo5-Ca7KHlw5Au80,503
8
+ eval_protocol/_version.py,sha256=JSNgYjGNXILDIicPuCTagxr-Bfge7f0ZiyNv_OqEVbg,503
9
9
  eval_protocol/auth.py,sha256=Yvpx-DkxcXYvezmcuNl9BFws2A9o9u8iLbIu4mBUUhE,8463
10
10
  eval_protocol/cli.py,sha256=0J7RJKT_4aWYy02yB9Q6gm-RxvlRSt6dxwM1fW-pCpE,22989
11
11
  eval_protocol/common_utils.py,sha256=eRzO_SQNfxBz-vIF9PdTVFAzuAb2zfuCnKSKY_xdKQY,2491
@@ -30,7 +30,7 @@ eval_protocol/adapters/__init__.py,sha256=yLGzY9Gcr6KsIgN36fUc_4HRpnEUL2bkBu7pC-
30
30
  eval_protocol/adapters/base.py,sha256=7-0nj76caW0E2COWnWFIkRamlOCUeK0xWbd_Bpyt-iU,783
31
31
  eval_protocol/adapters/bigquery.py,sha256=vYMc9JTUFX2jyQ70W7TOHBjttGscyJQLdMcEQD36I08,11432
32
32
  eval_protocol/adapters/braintrust.py,sha256=b2BRpecpqszECMPUm5pXjX0BW8Bwyhis39EuIqbH-NY,11422
33
- eval_protocol/adapters/fireworks_tracing.py,sha256=i6EilIwPPUy6oAsyIPqXmhbSvtXeSxBNMZKwV8ULseg,15399
33
+ eval_protocol/adapters/fireworks_tracing.py,sha256=9DwoclXLBsuI-Rr5iJ5I-MXiGmp3iMIUQvLv2CXI9BM,15486
34
34
  eval_protocol/adapters/huggingface.py,sha256=6UJHXKI8wBkf_1eLlkgoucmhQuoVAMRW-NsXiKAwzqs,15228
35
35
  eval_protocol/adapters/langchain.py,sha256=LJWz6KMrX2svTbejdMbzHWb913bWCA1ElsG95FHFxLs,8822
36
36
  eval_protocol/adapters/langfuse.py,sha256=u6vDXHfGITUEqftTBJJiJz0GcvTWXR8kKtouMbgyfU4,22870
@@ -149,9 +149,9 @@ eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md,
149
149
  eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md,sha256=GRSWoBs9xJwMjJXezCfyuT70pvEovEG7J4bie3tyS8o,7061
150
150
  eval_protocol/proxy/__init__.py,sha256=HkIDs5jsSA1gf-Too0ekzKwXOfuRISASe0dIwiEVWMs,416
151
151
  eval_protocol/proxy/proxy_core/__init__.py,sha256=O30kHfOExmPgf4jXuO2l5_nOYhC-jiSVAc6ndlfomH8,265
152
- eval_protocol/proxy/proxy_core/app.py,sha256=5sWByGaru6MDktfzrnyqIOYAoc1CxgRqf6-dj4HFwvI,10898
152
+ eval_protocol/proxy/proxy_core/app.py,sha256=bDruTeGbF9_oVcbyyDOn1FBRwEbML7et_mQGE8aLlCY,11892
153
153
  eval_protocol/proxy/proxy_core/auth.py,sha256=ymuPE0pXegiWCm5L9LrCTmdMU3YQ_eD5YYXEwSgYUjk,463
154
- eval_protocol/proxy/proxy_core/langfuse.py,sha256=4TX84FBgs2v4FHmjxPkUD5K1J2M334BCTqkxJkAmcwI,15011
154
+ eval_protocol/proxy/proxy_core/langfuse.py,sha256=3o38KjwNy4pQ6Cat4AomkTs_kJDzZcM9pnI6Gaa25xE,21396
155
155
  eval_protocol/proxy/proxy_core/litellm.py,sha256=LhYUvK9PywmBrBJLDMYeNjqTE0f-ST35AkdxXVyDg9Y,5949
156
156
  eval_protocol/proxy/proxy_core/main.py,sha256=8f5jxhT-dGLRVdEVrWkvJjHirpV6O4oQiZxHVYAuHHU,266
157
157
  eval_protocol/proxy/proxy_core/models.py,sha256=R0LU_daiZlcuTCYAt5q521fDRsNjMWeeLy0iYpZg-Go,2707
@@ -230,7 +230,7 @@ eval_protocol/utils/show_results_url.py,sha256=PHM6dWtCUiuV5WQgvHegnxY7ofkE4b9wO
230
230
  eval_protocol/utils/static_policy.py,sha256=fiKnOS06EG5OB6p5An_yY_dLAvVboYnC4Sqx5z_v3-g,10716
231
231
  eval_protocol/utils/subprocess_utils.py,sha256=2EcoVNLSlfdxwQn-2pscqjiGpBR4Ho8kfRnmzmew-1w,3504
232
232
  eval_protocol/utils/vite_server.py,sha256=0Tfh1LfTqYpFZxkO2syrF5I0cBEJHFmYXd2N4CWkca8,5051
233
- eval_protocol-0.2.46.dev2.dist-info/licenses/LICENSE,sha256=OzeIb507xW9AVhGMqqHpoL_EFRJUo8Sb7A3LN5NqFfQ,1075
233
+ eval_protocol-0.2.46.dev3.dist-info/licenses/LICENSE,sha256=OzeIb507xW9AVhGMqqHpoL_EFRJUo8Sb7A3LN5NqFfQ,1075
234
234
  vendor/tau2/__init__.py,sha256=EQMX_v8x-YBV24ia35_nLkf5MrC6aAuT_M5m7IJcl3k,541
235
235
  vendor/tau2/cli.py,sha256=lhJocXCDxEfdv7gIxya5b0w5J5qebpgrg_ZTpjGp_ww,7515
236
236
  vendor/tau2/config.py,sha256=LrkKRGSFH4Cvf9CNO-MttJMvIia0a2zP1uKVnUQi6B8,1278
@@ -330,8 +330,8 @@ vite-app/dist/assets/index-C81y9r9l.js,sha256=7fWlEMpE0hAZHAQ4gHEnv1yNu_STClSwsY
330
330
  vite-app/dist/assets/index-C81y9r9l.js.map,sha256=MkiPbSe9_T6HHqVZOdpIde-LcujdcNWkc4uEEwgfyk8,3862571
331
331
  vite-app/dist/assets/index-DpYZaoAr.css,sha256=v5t6cVU5X1gnABnBT6RBLRTBbrrFbJ92rYFqzaNoUkg,24878
332
332
  vite-app/dist/assets/logo-light-BprIBJQW.png,sha256=rRXC24eqrQO3y--N493THrD48WQVAhSVMHM_iDKy250,21694
333
- eval_protocol-0.2.46.dev2.dist-info/METADATA,sha256=r2beRO4vHNtn1TEakiGJHsFMjMnhW8vApBz0MrbGQCI,7498
334
- eval_protocol-0.2.46.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
335
- eval_protocol-0.2.46.dev2.dist-info/entry_points.txt,sha256=CebRaxbWXly21zPN1fbyAw26kNUU2dv7zZyGkXxtFVw,183
336
- eval_protocol-0.2.46.dev2.dist-info/top_level.txt,sha256=8jjn7dpvLPL4RX2JBeAfPPMOR6x6f7E4o4yFiKLEHuw,33
337
- eval_protocol-0.2.46.dev2.dist-info/RECORD,,
333
+ eval_protocol-0.2.46.dev3.dist-info/METADATA,sha256=gqHozwmc1Anw3jeTU1t1EmDYM3Ybh4XdZEWCEuG9_3Y,7498
334
+ eval_protocol-0.2.46.dev3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
335
+ eval_protocol-0.2.46.dev3.dist-info/entry_points.txt,sha256=CebRaxbWXly21zPN1fbyAw26kNUU2dv7zZyGkXxtFVw,183
336
+ eval_protocol-0.2.46.dev3.dist-info/top_level.txt,sha256=8jjn7dpvLPL4RX2JBeAfPPMOR6x6f7E4o4yFiKLEHuw,33
337
+ eval_protocol-0.2.46.dev3.dist-info/RECORD,,