@alis-build/harness-eval 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -4
- package/dist/adapters/claude-code/index.d.ts +1 -1
- package/dist/adapters/claude-code/index.js +1 -1
- package/dist/{claude-code-ycT0JQZF.js → claude-code-DZ4Vkgp6.js} +35 -6
- package/dist/{claude-code-ycT0JQZF.js.map → claude-code-DZ4Vkgp6.js.map} +1 -1
- package/dist/cli/bin.js +109 -12
- package/dist/cli/bin.js.map +1 -1
- package/dist/config/loader.d.ts +1 -1
- package/dist/config/loader.js +1 -1
- package/dist/{index-6Z17eKZx.d.ts → index-V22PrR0p.d.ts} +2 -1
- package/dist/index.d.ts +270 -152
- package/dist/index.js +124 -5
- package/dist/index.js.map +1 -0
- package/dist/{loader-DTvoVfN0.d.ts → loader-C9yQHUPC.d.ts} +19 -2
- package/dist/{loader-BCnFJ8rm.js → loader-DcI0KfRX.js} +291 -4
- package/dist/loader-DcI0KfRX.js.map +1 -0
- package/dist/{build-DsVJ_UeU.js → projections-BcX7w-f6.js} +486 -243
- package/dist/projections-BcX7w-f6.js.map +1 -0
- package/dist/runner/suite.d.ts +1 -1
- package/dist/runner/suite.js +1 -1
- package/dist/{suite-BoOvK_lq.d.ts → suite-DPJMIEbu.d.ts} +7 -2
- package/dist/{suite-chj0j22j.js → suite-Dlzl-HI0.js} +58 -4
- package/dist/suite-Dlzl-HI0.js.map +1 -0
- package/dist/{types-BQol062t.d.ts → types-CD3TwOtZ.d.ts} +151 -10
- package/package.json +4 -2
- package/schemas/eval-interchange-instances.schema.json +196 -0
- package/schemas/eval-interchange.schema.json +65 -52
- package/schemas/eval-run-envelope.schema.json +182 -425
- package/dist/build-DsVJ_UeU.js.map +0 -1
- package/dist/loader-BCnFJ8rm.js.map +0 -1
- package/dist/suite-chj0j22j.js.map +0 -1
- package/schemas/eval-interchange-agent-trace.schema.json +0 -322
- package/schemas/eval-interchange-proto-instance.schema.json +0 -106
|
@@ -383,10 +383,10 @@
|
|
|
383
383
|
"expectations": {
|
|
384
384
|
"$ref": "#/$defs/__schema55"
|
|
385
385
|
},
|
|
386
|
-
"
|
|
386
|
+
"referenceTrajectory": {
|
|
387
387
|
"$ref": "#/$defs/__schema57"
|
|
388
388
|
},
|
|
389
|
-
"
|
|
389
|
+
"humanRatings": {
|
|
390
390
|
"$ref": "#/$defs/__schema61"
|
|
391
391
|
},
|
|
392
392
|
"cellLabel": {
|
|
@@ -459,39 +459,55 @@
|
|
|
459
459
|
}
|
|
460
460
|
},
|
|
461
461
|
"__schema57": {
|
|
462
|
-
"description": "Reference
|
|
463
|
-
"$ref": "#/$defs/
|
|
462
|
+
"description": "Reference trajectory in Vertex protojson wire format.",
|
|
463
|
+
"$ref": "#/$defs/ProtojsonTrajectory"
|
|
464
|
+
},
|
|
465
|
+
"ProtojsonTrajectory": {
|
|
466
|
+
"type": "object",
|
|
467
|
+
"properties": {
|
|
468
|
+
"toolCalls": {
|
|
469
|
+
"$ref": "#/$defs/__schema58"
|
|
470
|
+
}
|
|
471
|
+
},
|
|
472
|
+
"required": [
|
|
473
|
+
"toolCalls"
|
|
474
|
+
],
|
|
475
|
+
"additionalProperties": false,
|
|
476
|
+
"title": "ProtojsonTrajectory",
|
|
477
|
+
"description": "Vertex Trajectory message wire format."
|
|
464
478
|
},
|
|
465
479
|
"__schema58": {
|
|
466
480
|
"type": "array",
|
|
467
481
|
"items": {
|
|
468
|
-
"$ref": "#/$defs/
|
|
469
|
-
}
|
|
482
|
+
"$ref": "#/$defs/ProtojsonToolCall"
|
|
483
|
+
},
|
|
484
|
+
"description": "Ordered tool calls in the trajectory."
|
|
470
485
|
},
|
|
471
|
-
"
|
|
486
|
+
"ProtojsonToolCall": {
|
|
472
487
|
"type": "object",
|
|
473
488
|
"properties": {
|
|
474
|
-
"
|
|
489
|
+
"toolName": {
|
|
475
490
|
"$ref": "#/$defs/__schema59"
|
|
476
491
|
},
|
|
477
|
-
"
|
|
492
|
+
"toolInput": {
|
|
478
493
|
"$ref": "#/$defs/__schema60"
|
|
479
494
|
}
|
|
480
495
|
},
|
|
481
496
|
"required": [
|
|
482
|
-
"
|
|
483
|
-
"
|
|
497
|
+
"toolName",
|
|
498
|
+
"toolInput"
|
|
484
499
|
],
|
|
485
500
|
"additionalProperties": false,
|
|
486
|
-
"title": "
|
|
487
|
-
"description": "Tool call
|
|
501
|
+
"title": "ProtojsonToolCall",
|
|
502
|
+
"description": "Tool call in Vertex EvaluationService wire format."
|
|
488
503
|
},
|
|
489
504
|
"__schema59": {
|
|
490
505
|
"type": "string",
|
|
491
506
|
"description": "Tool name as emitted by the agent."
|
|
492
507
|
},
|
|
493
508
|
"__schema60": {
|
|
494
|
-
"
|
|
509
|
+
"type": "string",
|
|
510
|
+
"description": "JSON-serialized tool arguments (Vertex wire format)."
|
|
495
511
|
},
|
|
496
512
|
"__schema61": {
|
|
497
513
|
"description": "Human ratings keyed by metric name for judge calibration.",
|
|
@@ -647,26 +663,23 @@
|
|
|
647
663
|
"artifacts": {
|
|
648
664
|
"$ref": "#/$defs/__schema181"
|
|
649
665
|
},
|
|
650
|
-
"
|
|
666
|
+
"evaluationInstance": {
|
|
651
667
|
"$ref": "#/$defs/__schema188"
|
|
652
668
|
},
|
|
653
|
-
"
|
|
654
|
-
"$ref": "#/$defs/
|
|
655
|
-
},
|
|
656
|
-
"latency_in_seconds": {
|
|
657
|
-
"$ref": "#/$defs/__schema226"
|
|
669
|
+
"trajectoryInstances": {
|
|
670
|
+
"$ref": "#/$defs/__schema194"
|
|
658
671
|
},
|
|
659
|
-
"
|
|
660
|
-
"$ref": "#/$defs/
|
|
672
|
+
"harnessMetrics": {
|
|
673
|
+
"$ref": "#/$defs/__schema204"
|
|
661
674
|
},
|
|
662
|
-
"
|
|
663
|
-
"$ref": "#/$defs/
|
|
675
|
+
"latencySeconds": {
|
|
676
|
+
"$ref": "#/$defs/__schema211"
|
|
664
677
|
},
|
|
665
|
-
"
|
|
666
|
-
"$ref": "#/$defs/
|
|
678
|
+
"failure": {
|
|
679
|
+
"$ref": "#/$defs/__schema213"
|
|
667
680
|
},
|
|
668
681
|
"error": {
|
|
669
|
-
"$ref": "#/$defs/
|
|
682
|
+
"$ref": "#/$defs/__schema215"
|
|
670
683
|
}
|
|
671
684
|
},
|
|
672
685
|
"required": [
|
|
@@ -1674,495 +1687,239 @@
|
|
|
1674
1687
|
"type": "string"
|
|
1675
1688
|
},
|
|
1676
1689
|
"__schema188": {
|
|
1677
|
-
"description": "
|
|
1678
|
-
"$ref": "#/$defs/
|
|
1690
|
+
"description": "Vertex EvaluationInstance protojson wire object.",
|
|
1691
|
+
"$ref": "#/$defs/EvaluationInstanceJson"
|
|
1692
|
+
},
|
|
1693
|
+
"EvaluationInstanceJson": {
|
|
1694
|
+
"type": "object",
|
|
1695
|
+
"properties": {
|
|
1696
|
+
"prompt": {
|
|
1697
|
+
"$ref": "#/$defs/__schema189"
|
|
1698
|
+
},
|
|
1699
|
+
"response": {
|
|
1700
|
+
"$ref": "#/$defs/__schema192"
|
|
1701
|
+
},
|
|
1702
|
+
"reference": {
|
|
1703
|
+
"$ref": "#/$defs/__schema193"
|
|
1704
|
+
}
|
|
1705
|
+
},
|
|
1706
|
+
"additionalProperties": false,
|
|
1707
|
+
"title": "EvaluationInstanceJson",
|
|
1708
|
+
"description": "Vertex EvaluationInstance wire format (agentEvalData omitted in v1)."
|
|
1679
1709
|
},
|
|
1680
1710
|
"__schema189": {
|
|
1681
|
-
"
|
|
1682
|
-
"
|
|
1683
|
-
"$ref": "#/$defs/InterchangeToolCall"
|
|
1684
|
-
}
|
|
1711
|
+
"description": "Eval prompt.",
|
|
1712
|
+
"$ref": "#/$defs/InstanceData"
|
|
1685
1713
|
},
|
|
1686
|
-
"
|
|
1714
|
+
"InstanceData": {
|
|
1687
1715
|
"type": "object",
|
|
1688
1716
|
"properties": {
|
|
1689
|
-
"
|
|
1717
|
+
"text": {
|
|
1690
1718
|
"$ref": "#/$defs/__schema190"
|
|
1691
|
-
},
|
|
1692
|
-
"tool_input": {
|
|
1693
|
-
"$ref": "#/$defs/__schema191"
|
|
1694
1719
|
}
|
|
1695
1720
|
},
|
|
1696
|
-
"required": [
|
|
1697
|
-
"tool_name",
|
|
1698
|
-
"tool_input"
|
|
1699
|
-
],
|
|
1700
1721
|
"additionalProperties": false,
|
|
1701
|
-
"title": "
|
|
1702
|
-
"description": "
|
|
1722
|
+
"title": "InstanceData",
|
|
1723
|
+
"description": "EvaluationInstance prompt/response/reference text wrapper."
|
|
1703
1724
|
},
|
|
1704
1725
|
"__schema190": {
|
|
1705
|
-
"
|
|
1706
|
-
"
|
|
1726
|
+
"description": "Plain text instance data.",
|
|
1727
|
+
"$ref": "#/$defs/__schema191"
|
|
1707
1728
|
},
|
|
1708
1729
|
"__schema191": {
|
|
1709
|
-
"type": "string"
|
|
1710
|
-
"description": "JSON-serialized tool arguments (wire format)."
|
|
1730
|
+
"type": "string"
|
|
1711
1731
|
},
|
|
1712
1732
|
"__schema192": {
|
|
1713
|
-
"description": "
|
|
1714
|
-
"$ref": "#/$defs/
|
|
1715
|
-
},
|
|
1716
|
-
"AgentTrace": {
|
|
1717
|
-
"type": "object",
|
|
1718
|
-
"properties": {
|
|
1719
|
-
"agents": {
|
|
1720
|
-
"$ref": "#/$defs/__schema193"
|
|
1721
|
-
},
|
|
1722
|
-
"turns": {
|
|
1723
|
-
"$ref": "#/$defs/__schema206"
|
|
1724
|
-
}
|
|
1725
|
-
},
|
|
1726
|
-
"required": [
|
|
1727
|
-
"agents",
|
|
1728
|
-
"turns"
|
|
1729
|
-
],
|
|
1730
|
-
"additionalProperties": false,
|
|
1731
|
-
"title": "AgentTrace",
|
|
1732
|
-
"description": "Full multi-turn agent execution trace."
|
|
1733
|
+
"description": "Final agent response.",
|
|
1734
|
+
"$ref": "#/$defs/InstanceData"
|
|
1733
1735
|
},
|
|
1734
1736
|
"__schema193": {
|
|
1735
|
-
"
|
|
1736
|
-
"
|
|
1737
|
-
"$ref": "#/$defs/__schema194"
|
|
1738
|
-
},
|
|
1739
|
-
"additionalProperties": {
|
|
1740
|
-
"$ref": "#/$defs/AgentConfig"
|
|
1741
|
-
},
|
|
1742
|
-
"description": "Agent configurations keyed by agent id."
|
|
1737
|
+
"description": "Reference answer text.",
|
|
1738
|
+
"$ref": "#/$defs/InstanceData"
|
|
1743
1739
|
},
|
|
1744
1740
|
"__schema194": {
|
|
1745
|
-
"
|
|
1741
|
+
"description": "Vertex Trajectory*Instance protojson wire objects keyed by metric.",
|
|
1742
|
+
"$ref": "#/$defs/TrajectoryInstancesJson"
|
|
1746
1743
|
},
|
|
1747
|
-
"
|
|
1744
|
+
"TrajectoryInstancesJson": {
|
|
1748
1745
|
"type": "object",
|
|
1749
1746
|
"properties": {
|
|
1750
|
-
"
|
|
1747
|
+
"exactMatch": {
|
|
1751
1748
|
"$ref": "#/$defs/__schema195"
|
|
1752
1749
|
},
|
|
1753
|
-
"
|
|
1754
|
-
"$ref": "#/$defs/__schema196"
|
|
1755
|
-
},
|
|
1756
|
-
"description": {
|
|
1750
|
+
"inOrderMatch": {
|
|
1757
1751
|
"$ref": "#/$defs/__schema198"
|
|
1758
1752
|
},
|
|
1759
|
-
"
|
|
1753
|
+
"anyOrderMatch": {
|
|
1754
|
+
"$ref": "#/$defs/__schema199"
|
|
1755
|
+
},
|
|
1756
|
+
"precision": {
|
|
1760
1757
|
"$ref": "#/$defs/__schema200"
|
|
1761
1758
|
},
|
|
1762
|
-
"
|
|
1763
|
-
"$ref": "#/$defs/
|
|
1759
|
+
"recall": {
|
|
1760
|
+
"$ref": "#/$defs/__schema201"
|
|
1764
1761
|
},
|
|
1765
|
-
"
|
|
1766
|
-
"$ref": "#/$defs/
|
|
1762
|
+
"singleToolUse": {
|
|
1763
|
+
"$ref": "#/$defs/__schema202"
|
|
1767
1764
|
}
|
|
1768
1765
|
},
|
|
1769
|
-
"required": [
|
|
1770
|
-
"agent_id"
|
|
1771
|
-
],
|
|
1772
1766
|
"additionalProperties": false,
|
|
1773
|
-
"title": "
|
|
1774
|
-
"description": "
|
|
1767
|
+
"title": "TrajectoryInstancesJson",
|
|
1768
|
+
"description": "Vertex Trajectory*Instance messages keyed by metric."
|
|
1775
1769
|
},
|
|
1776
1770
|
"__schema195": {
|
|
1777
|
-
"
|
|
1778
|
-
"
|
|
1779
|
-
},
|
|
1780
|
-
"__schema196": {
|
|
1781
|
-
"description": "Agent type or role.",
|
|
1782
|
-
"$ref": "#/$defs/__schema197"
|
|
1783
|
-
},
|
|
1784
|
-
"__schema197": {
|
|
1785
|
-
"type": "string"
|
|
1786
|
-
},
|
|
1787
|
-
"__schema198": {
|
|
1788
|
-
"description": "Human-readable agent description.",
|
|
1789
|
-
"$ref": "#/$defs/__schema199"
|
|
1790
|
-
},
|
|
1791
|
-
"__schema199": {
|
|
1792
|
-
"type": "string"
|
|
1793
|
-
},
|
|
1794
|
-
"__schema200": {
|
|
1795
|
-
"description": "System instruction for the agent.",
|
|
1796
|
-
"$ref": "#/$defs/__schema201"
|
|
1797
|
-
},
|
|
1798
|
-
"__schema201": {
|
|
1799
|
-
"type": "string"
|
|
1771
|
+
"description": "Exact match instance.",
|
|
1772
|
+
"$ref": "#/$defs/TrajectoryPairInstanceJson"
|
|
1800
1773
|
},
|
|
1801
|
-
"
|
|
1802
|
-
"description": "Tools available to this agent.",
|
|
1803
|
-
"$ref": "#/$defs/__schema203"
|
|
1804
|
-
},
|
|
1805
|
-
"__schema203": {
|
|
1806
|
-
"type": "array",
|
|
1807
|
-
"items": {
|
|
1808
|
-
"type": "object",
|
|
1809
|
-
"properties": {
|
|
1810
|
-
"name": {
|
|
1811
|
-
"type": "string",
|
|
1812
|
-
"description": "Tool name."
|
|
1813
|
-
}
|
|
1814
|
-
},
|
|
1815
|
-
"required": [
|
|
1816
|
-
"name"
|
|
1817
|
-
],
|
|
1818
|
-
"additionalProperties": false
|
|
1819
|
-
}
|
|
1820
|
-
},
|
|
1821
|
-
"__schema204": {
|
|
1822
|
-
"description": "Sub-agent identifiers when using multi-agent setups.",
|
|
1823
|
-
"$ref": "#/$defs/__schema205"
|
|
1824
|
-
},
|
|
1825
|
-
"__schema205": {
|
|
1826
|
-
"type": "array",
|
|
1827
|
-
"items": {
|
|
1828
|
-
"type": "string"
|
|
1829
|
-
}
|
|
1830
|
-
},
|
|
1831
|
-
"__schema206": {
|
|
1832
|
-
"type": "array",
|
|
1833
|
-
"items": {
|
|
1834
|
-
"$ref": "#/$defs/ConversationTurn"
|
|
1835
|
-
},
|
|
1836
|
-
"description": "Chronological conversation turns."
|
|
1837
|
-
},
|
|
1838
|
-
"ConversationTurn": {
|
|
1774
|
+
"TrajectoryPairInstanceJson": {
|
|
1839
1775
|
"type": "object",
|
|
1840
1776
|
"properties": {
|
|
1841
|
-
"
|
|
1842
|
-
"$ref": "#/$defs/
|
|
1843
|
-
},
|
|
1844
|
-
"turn_id": {
|
|
1845
|
-
"$ref": "#/$defs/__schema208"
|
|
1846
|
-
},
|
|
1847
|
-
"events": {
|
|
1848
|
-
"$ref": "#/$defs/__schema210"
|
|
1849
|
-
}
|
|
1850
|
-
},
|
|
1851
|
-
"required": [
|
|
1852
|
-
"turn_index",
|
|
1853
|
-
"events"
|
|
1854
|
-
],
|
|
1855
|
-
"additionalProperties": false,
|
|
1856
|
-
"title": "ConversationTurn",
|
|
1857
|
-
"description": "One turn in a multi-turn agent conversation."
|
|
1858
|
-
},
|
|
1859
|
-
"__schema207": {
|
|
1860
|
-
"type": "integer",
|
|
1861
|
-
"minimum": -9007199254740991,
|
|
1862
|
-
"maximum": 9007199254740991,
|
|
1863
|
-
"description": "Zero-based turn index."
|
|
1864
|
-
},
|
|
1865
|
-
"__schema208": {
|
|
1866
|
-
"description": "Optional stable turn identifier.",
|
|
1867
|
-
"$ref": "#/$defs/__schema209"
|
|
1868
|
-
},
|
|
1869
|
-
"__schema209": {
|
|
1870
|
-
"type": "string"
|
|
1871
|
-
},
|
|
1872
|
-
"__schema210": {
|
|
1873
|
-
"type": "array",
|
|
1874
|
-
"items": {
|
|
1875
|
-
"$ref": "#/$defs/AgentEvent"
|
|
1876
|
-
},
|
|
1877
|
-
"description": "Events in chronological order."
|
|
1878
|
-
},
|
|
1879
|
-
"AgentEvent": {
|
|
1880
|
-
"type": "object",
|
|
1881
|
-
"properties": {
|
|
1882
|
-
"author": {
|
|
1883
|
-
"$ref": "#/$defs/__schema211"
|
|
1884
|
-
},
|
|
1885
|
-
"content": {
|
|
1886
|
-
"$ref": "#/$defs/__schema212"
|
|
1887
|
-
},
|
|
1888
|
-
"event_time": {
|
|
1889
|
-
"$ref": "#/$defs/__schema220"
|
|
1890
|
-
},
|
|
1891
|
-
"state_delta": {
|
|
1892
|
-
"$ref": "#/$defs/__schema222"
|
|
1777
|
+
"predictedTrajectory": {
|
|
1778
|
+
"$ref": "#/$defs/__schema196"
|
|
1893
1779
|
},
|
|
1894
|
-
"
|
|
1895
|
-
"$ref": "#/$defs/
|
|
1780
|
+
"referenceTrajectory": {
|
|
1781
|
+
"$ref": "#/$defs/__schema197"
|
|
1896
1782
|
}
|
|
1897
1783
|
},
|
|
1898
1784
|
"required": [
|
|
1899
|
-
"
|
|
1900
|
-
"
|
|
1785
|
+
"predictedTrajectory",
|
|
1786
|
+
"referenceTrajectory"
|
|
1901
1787
|
],
|
|
1902
1788
|
"additionalProperties": false,
|
|
1903
|
-
"title": "
|
|
1904
|
-
"description": "
|
|
1789
|
+
"title": "TrajectoryPairInstanceJson",
|
|
1790
|
+
"description": "Shared shape for Trajectory*Match/Precision/Recall instances."
|
|
1905
1791
|
},
|
|
1906
|
-
"
|
|
1907
|
-
"
|
|
1908
|
-
"
|
|
1909
|
-
},
|
|
1910
|
-
"__schema212": {
|
|
1911
|
-
"type": "object",
|
|
1912
|
-
"properties": {
|
|
1913
|
-
"parts": {
|
|
1914
|
-
"$ref": "#/$defs/__schema213"
|
|
1915
|
-
}
|
|
1916
|
-
},
|
|
1917
|
-
"required": [
|
|
1918
|
-
"parts"
|
|
1919
|
-
],
|
|
1920
|
-
"additionalProperties": false,
|
|
1921
|
-
"description": "Structured event content."
|
|
1922
|
-
},
|
|
1923
|
-
"__schema213": {
|
|
1924
|
-
"type": "array",
|
|
1925
|
-
"items": {
|
|
1926
|
-
"$ref": "#/$defs/ContentPart"
|
|
1927
|
-
},
|
|
1928
|
-
"description": "Content parts for this event."
|
|
1792
|
+
"__schema196": {
|
|
1793
|
+
"description": "Predicted tool-call trajectory.",
|
|
1794
|
+
"$ref": "#/$defs/ProtojsonTrajectory"
|
|
1929
1795
|
},
|
|
1930
|
-
"
|
|
1931
|
-
"
|
|
1932
|
-
"
|
|
1933
|
-
"text": {
|
|
1934
|
-
"$ref": "#/$defs/__schema214"
|
|
1935
|
-
},
|
|
1936
|
-
"function_call": {
|
|
1937
|
-
"$ref": "#/$defs/__schema216"
|
|
1938
|
-
},
|
|
1939
|
-
"function_response": {
|
|
1940
|
-
"$ref": "#/$defs/__schema218"
|
|
1941
|
-
}
|
|
1942
|
-
},
|
|
1943
|
-
"additionalProperties": false,
|
|
1944
|
-
"title": "ContentPart",
|
|
1945
|
-
"description": "One part of agent event content (text, function_call, or function_response)."
|
|
1796
|
+
"__schema197": {
|
|
1797
|
+
"description": "Reference tool-call trajectory.",
|
|
1798
|
+
"$ref": "#/$defs/ProtojsonTrajectory"
|
|
1946
1799
|
},
|
|
1947
|
-
"
|
|
1948
|
-
"description": "
|
|
1949
|
-
"$ref": "#/$defs/
|
|
1800
|
+
"__schema198": {
|
|
1801
|
+
"description": "In-order match instance.",
|
|
1802
|
+
"$ref": "#/$defs/TrajectoryPairInstanceJson"
|
|
1950
1803
|
},
|
|
1951
|
-
"
|
|
1952
|
-
"
|
|
1804
|
+
"__schema199": {
|
|
1805
|
+
"description": "Any-order match instance.",
|
|
1806
|
+
"$ref": "#/$defs/TrajectoryPairInstanceJson"
|
|
1953
1807
|
},
|
|
1954
|
-
"
|
|
1955
|
-
"description": "
|
|
1956
|
-
"$ref": "#/$defs/
|
|
1808
|
+
"__schema200": {
|
|
1809
|
+
"description": "Precision instance.",
|
|
1810
|
+
"$ref": "#/$defs/TrajectoryPairInstanceJson"
|
|
1957
1811
|
},
|
|
1958
|
-
"
|
|
1959
|
-
"
|
|
1960
|
-
"
|
|
1961
|
-
"name": {
|
|
1962
|
-
"type": "string",
|
|
1963
|
-
"description": "Function or tool name."
|
|
1964
|
-
},
|
|
1965
|
-
"args": {
|
|
1966
|
-
"description": "Function arguments."
|
|
1967
|
-
}
|
|
1968
|
-
},
|
|
1969
|
-
"required": [
|
|
1970
|
-
"name",
|
|
1971
|
-
"args"
|
|
1972
|
-
],
|
|
1973
|
-
"additionalProperties": false
|
|
1812
|
+
"__schema201": {
|
|
1813
|
+
"description": "Recall instance.",
|
|
1814
|
+
"$ref": "#/$defs/TrajectoryPairInstanceJson"
|
|
1974
1815
|
},
|
|
1975
|
-
"
|
|
1976
|
-
"description": "
|
|
1977
|
-
"$ref": "#/$defs/
|
|
1816
|
+
"__schema202": {
|
|
1817
|
+
"description": "Single tool use instance.",
|
|
1818
|
+
"$ref": "#/$defs/TrajectorySingleToolUseInstanceJson"
|
|
1978
1819
|
},
|
|
1979
|
-
"
|
|
1820
|
+
"TrajectorySingleToolUseInstanceJson": {
|
|
1980
1821
|
"type": "object",
|
|
1981
1822
|
"properties": {
|
|
1982
|
-
"
|
|
1983
|
-
"
|
|
1984
|
-
"description": "Function or tool name."
|
|
1985
|
-
},
|
|
1986
|
-
"response": {
|
|
1987
|
-
"description": "Function result payload."
|
|
1823
|
+
"predictedTrajectory": {
|
|
1824
|
+
"$ref": "#/$defs/__schema203"
|
|
1988
1825
|
}
|
|
1989
1826
|
},
|
|
1990
1827
|
"required": [
|
|
1991
|
-
"
|
|
1992
|
-
"response"
|
|
1828
|
+
"predictedTrajectory"
|
|
1993
1829
|
],
|
|
1994
|
-
"additionalProperties": false
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
"description": "ISO 8601 timestamp when the event occurred.",
|
|
1998
|
-
"$ref": "#/$defs/__schema221"
|
|
1999
|
-
},
|
|
2000
|
-
"__schema221": {
|
|
2001
|
-
"type": "string"
|
|
2002
|
-
},
|
|
2003
|
-
"__schema222": {
|
|
2004
|
-
"description": "Session state changes associated with this event.",
|
|
2005
|
-
"$ref": "#/$defs/__schema223"
|
|
2006
|
-
},
|
|
2007
|
-
"__schema223": {
|
|
2008
|
-
"type": "object",
|
|
2009
|
-
"propertyNames": {
|
|
2010
|
-
"type": "string"
|
|
2011
|
-
},
|
|
2012
|
-
"additionalProperties": {}
|
|
2013
|
-
},
|
|
2014
|
-
"__schema224": {
|
|
2015
|
-
"description": "Tools available to the agent at event time.",
|
|
2016
|
-
"$ref": "#/$defs/__schema225"
|
|
2017
|
-
},
|
|
2018
|
-
"__schema225": {
|
|
2019
|
-
"type": "array",
|
|
2020
|
-
"items": {
|
|
2021
|
-
"type": "object",
|
|
2022
|
-
"properties": {
|
|
2023
|
-
"name": {
|
|
2024
|
-
"type": "string",
|
|
2025
|
-
"description": "Tool name."
|
|
2026
|
-
}
|
|
2027
|
-
},
|
|
2028
|
-
"required": [
|
|
2029
|
-
"name"
|
|
2030
|
-
],
|
|
2031
|
-
"additionalProperties": false
|
|
2032
|
-
}
|
|
2033
|
-
},
|
|
2034
|
-
"__schema226": {
|
|
2035
|
-
"description": "Session latency in seconds (interchange field).",
|
|
2036
|
-
"$ref": "#/$defs/__schema227"
|
|
2037
|
-
},
|
|
2038
|
-
"__schema227": {
|
|
2039
|
-
"type": "number"
|
|
2040
|
-
},
|
|
2041
|
-
"__schema228": {
|
|
2042
|
-
"description": "1 when the harness run failed, 0 on success.",
|
|
2043
|
-
"$ref": "#/$defs/__schema229"
|
|
1830
|
+
"additionalProperties": false,
|
|
1831
|
+
"title": "TrajectorySingleToolUseInstanceJson",
|
|
1832
|
+
"description": "Vertex TrajectorySingleToolUseInstance wire format."
|
|
2044
1833
|
},
|
|
2045
|
-
"
|
|
2046
|
-
"
|
|
2047
|
-
|
|
2048
|
-
"type": "number",
|
|
2049
|
-
"const": 0
|
|
2050
|
-
},
|
|
2051
|
-
{
|
|
2052
|
-
"type": "number",
|
|
2053
|
-
"const": 1
|
|
2054
|
-
}
|
|
2055
|
-
]
|
|
1834
|
+
"__schema203": {
|
|
1835
|
+
"description": "Predicted tool-call trajectory.",
|
|
1836
|
+
"$ref": "#/$defs/ProtojsonTrajectory"
|
|
2056
1837
|
},
|
|
2057
|
-
"
|
|
2058
|
-
"
|
|
2059
|
-
"$ref": "#/$defs/TrajectoryMetrics"
|
|
1838
|
+
"__schema204": {
|
|
1839
|
+
"$ref": "#/$defs/HarnessMetrics"
|
|
2060
1840
|
},
|
|
2061
|
-
"
|
|
1841
|
+
"HarnessMetrics": {
|
|
2062
1842
|
"type": "object",
|
|
2063
1843
|
"properties": {
|
|
2064
|
-
"
|
|
2065
|
-
"$ref": "#/$defs/
|
|
1844
|
+
"trajectoryExactMatch": {
|
|
1845
|
+
"$ref": "#/$defs/__schema205"
|
|
2066
1846
|
},
|
|
2067
|
-
"
|
|
2068
|
-
"$ref": "#/$defs/
|
|
1847
|
+
"trajectoryInOrderMatch": {
|
|
1848
|
+
"$ref": "#/$defs/__schema206"
|
|
2069
1849
|
},
|
|
2070
|
-
"
|
|
2071
|
-
"$ref": "#/$defs/
|
|
1850
|
+
"trajectoryAnyOrderMatch": {
|
|
1851
|
+
"$ref": "#/$defs/__schema207"
|
|
2072
1852
|
},
|
|
2073
|
-
"
|
|
2074
|
-
"$ref": "#/$defs/
|
|
1853
|
+
"trajectoryPrecision": {
|
|
1854
|
+
"$ref": "#/$defs/__schema208"
|
|
2075
1855
|
},
|
|
2076
|
-
"
|
|
2077
|
-
"$ref": "#/$defs/
|
|
1856
|
+
"trajectoryRecall": {
|
|
1857
|
+
"$ref": "#/$defs/__schema209"
|
|
2078
1858
|
},
|
|
2079
|
-
"
|
|
2080
|
-
"$ref": "#/$defs/
|
|
1859
|
+
"trajectorySingleToolUse": {
|
|
1860
|
+
"$ref": "#/$defs/__schema210"
|
|
2081
1861
|
}
|
|
2082
1862
|
},
|
|
2083
1863
|
"required": [
|
|
2084
|
-
"
|
|
2085
|
-
"
|
|
2086
|
-
"
|
|
2087
|
-
"
|
|
2088
|
-
"
|
|
2089
|
-
"
|
|
1864
|
+
"trajectoryExactMatch",
|
|
1865
|
+
"trajectoryInOrderMatch",
|
|
1866
|
+
"trajectoryAnyOrderMatch",
|
|
1867
|
+
"trajectoryPrecision",
|
|
1868
|
+
"trajectoryRecall",
|
|
1869
|
+
"trajectorySingleToolUse"
|
|
2090
1870
|
],
|
|
2091
1871
|
"additionalProperties": false,
|
|
2092
|
-
"title": "
|
|
2093
|
-
"description": "
|
|
1872
|
+
"title": "HarnessMetrics",
|
|
1873
|
+
"description": "Harness-precomputed trajectory metric scores."
|
|
2094
1874
|
},
|
|
2095
|
-
"
|
|
1875
|
+
"__schema205": {
|
|
2096
1876
|
"type": "number",
|
|
2097
1877
|
"description": "Exact trajectory match score (0 or 1)."
|
|
2098
1878
|
},
|
|
2099
|
-
"
|
|
1879
|
+
"__schema206": {
|
|
2100
1880
|
"type": "number",
|
|
2101
1881
|
"description": "In-order trajectory match score (0 or 1)."
|
|
2102
1882
|
},
|
|
2103
|
-
"
|
|
1883
|
+
"__schema207": {
|
|
2104
1884
|
"type": "number",
|
|
2105
1885
|
"description": "Any-order trajectory match score (0 or 1)."
|
|
2106
1886
|
},
|
|
2107
|
-
"
|
|
1887
|
+
"__schema208": {
|
|
2108
1888
|
"type": "number",
|
|
2109
1889
|
"description": "Trajectory precision (0..1)."
|
|
2110
1890
|
},
|
|
2111
|
-
"
|
|
1891
|
+
"__schema209": {
|
|
2112
1892
|
"type": "number",
|
|
2113
1893
|
"description": "Trajectory recall (0..1)."
|
|
2114
1894
|
},
|
|
2115
|
-
"
|
|
1895
|
+
"__schema210": {
|
|
2116
1896
|
"type": "number",
|
|
2117
1897
|
"description": "Single-tool-use match score (0 or 1)."
|
|
2118
1898
|
},
|
|
2119
|
-
"
|
|
2120
|
-
"description": "
|
|
2121
|
-
"$ref": "#/$defs/
|
|
2122
|
-
},
|
|
2123
|
-
"ToolCallMetrics": {
|
|
2124
|
-
"type": "object",
|
|
2125
|
-
"properties": {
|
|
2126
|
-
"tool_call_valid": {
|
|
2127
|
-
"$ref": "#/$defs/__schema238"
|
|
2128
|
-
},
|
|
2129
|
-
"tool_name_match": {
|
|
2130
|
-
"$ref": "#/$defs/__schema239"
|
|
2131
|
-
},
|
|
2132
|
-
"tool_parameter_key_match": {
|
|
2133
|
-
"$ref": "#/$defs/__schema240"
|
|
2134
|
-
},
|
|
2135
|
-
"tool_parameter_kv_match": {
|
|
2136
|
-
"$ref": "#/$defs/__schema241"
|
|
2137
|
-
}
|
|
2138
|
-
},
|
|
2139
|
-
"required": [
|
|
2140
|
-
"tool_call_valid",
|
|
2141
|
-
"tool_name_match",
|
|
2142
|
-
"tool_parameter_key_match",
|
|
2143
|
-
"tool_parameter_kv_match"
|
|
2144
|
-
],
|
|
2145
|
-
"additionalProperties": false,
|
|
2146
|
-
"title": "ToolCallMetrics",
|
|
2147
|
-
"description": "Tool-call-level metric scores for one repetition."
|
|
2148
|
-
},
|
|
2149
|
-
"__schema238": {
|
|
2150
|
-
"type": "number",
|
|
2151
|
-
"description": "Tool call validity score (0..1)."
|
|
1899
|
+
"__schema211": {
|
|
1900
|
+
"description": "Session latency in seconds.",
|
|
1901
|
+
"$ref": "#/$defs/__schema212"
|
|
2152
1902
|
},
|
|
2153
|
-
"
|
|
2154
|
-
"type": "number"
|
|
2155
|
-
"description": "Tool name match score (0..1)."
|
|
1903
|
+
"__schema212": {
|
|
1904
|
+
"type": "number"
|
|
2156
1905
|
},
|
|
2157
|
-
"
|
|
2158
|
-
"
|
|
2159
|
-
"
|
|
1906
|
+
"__schema213": {
|
|
1907
|
+
"description": "1 when the harness run failed, 0 on success.",
|
|
1908
|
+
"$ref": "#/$defs/__schema214"
|
|
2160
1909
|
},
|
|
2161
|
-
"
|
|
2162
|
-
"
|
|
2163
|
-
|
|
1910
|
+
"__schema214": {
|
|
1911
|
+
"anyOf": [
|
|
1912
|
+
{
|
|
1913
|
+
"type": "number",
|
|
1914
|
+
"const": 0
|
|
1915
|
+
},
|
|
1916
|
+
{
|
|
1917
|
+
"type": "number",
|
|
1918
|
+
"const": 1
|
|
1919
|
+
}
|
|
1920
|
+
]
|
|
2164
1921
|
},
|
|
2165
|
-
"
|
|
1922
|
+
"__schema215": {
|
|
2166
1923
|
"description": "Present when the harness failed without producing a trajectory.",
|
|
2167
1924
|
"$ref": "#/$defs/RepetitionError"
|
|
2168
1925
|
},
|
|
@@ -2170,10 +1927,10 @@
|
|
|
2170
1927
|
"type": "object",
|
|
2171
1928
|
"properties": {
|
|
2172
1929
|
"message": {
|
|
2173
|
-
"$ref": "#/$defs/
|
|
1930
|
+
"$ref": "#/$defs/__schema216"
|
|
2174
1931
|
},
|
|
2175
1932
|
"diagnostics": {
|
|
2176
|
-
"$ref": "#/$defs/
|
|
1933
|
+
"$ref": "#/$defs/__schema217"
|
|
2177
1934
|
}
|
|
2178
1935
|
},
|
|
2179
1936
|
"required": [
|
|
@@ -2183,11 +1940,11 @@
|
|
|
2183
1940
|
"title": "RepetitionError",
|
|
2184
1941
|
"description": "Harness failure for one repetition without a usable TrajectoryView."
|
|
2185
1942
|
},
|
|
2186
|
-
"
|
|
1943
|
+
"__schema216": {
|
|
2187
1944
|
"type": "string",
|
|
2188
1945
|
"description": "Harness failure message for this repetition."
|
|
2189
1946
|
},
|
|
2190
|
-
"
|
|
1947
|
+
"__schema217": {
|
|
2191
1948
|
"description": "Adapter diagnostics when the harness failed before producing a trajectory.",
|
|
2192
1949
|
"$ref": "#/$defs/AdapterDiagnostics"
|
|
2193
1950
|
}
|