@bolt-foundry/gambit 0.8.6-rc.1 → 0.8.6-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/CHANGELOG.md +60 -1
  2. package/esm/gambit/simulator-ui/dist/bundle.js +2033 -964
  3. package/esm/gambit/simulator-ui/dist/bundle.js.map +4 -4
  4. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceConversationSessionStart/parameters_type.d.ts +2 -1
  5. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceConversationSessionStart/parameters_type.d.ts.map +1 -1
  6. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/normalization_ast.d.ts.map +1 -1
  7. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/normalization_ast.js +288 -101
  8. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/param_type.d.ts +84 -45
  9. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/param_type.d.ts.map +1 -1
  10. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/parameters_type.d.ts +3 -2
  11. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/parameters_type.d.ts.map +1 -1
  12. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/query_text.d.ts +1 -1
  13. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/query_text.d.ts.map +1 -1
  14. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/query_text.js +85 -47
  15. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/raw_response_type.d.ts +86 -48
  16. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/raw_response_type.d.ts.map +1 -1
  17. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/resolver_reader.d.ts.map +1 -1
  18. package/esm/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/resolver_reader.js +363 -97
  19. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/normalization_ast.d.ts.map +1 -1
  20. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/normalization_ast.js +154 -76
  21. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/query_text.d.ts +1 -1
  22. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/query_text.d.ts.map +1 -1
  23. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/query_text.js +45 -29
  24. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/raw_response_type.d.ts +46 -30
  25. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/raw_response_type.d.ts.map +1 -1
  26. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/normalization_ast.d.ts.map +1 -1
  27. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/normalization_ast.js +154 -76
  28. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/param_type.d.ts +45 -28
  29. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/param_type.d.ts.map +1 -1
  30. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/query_text.d.ts +1 -1
  31. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/query_text.d.ts.map +1 -1
  32. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/query_text.js +45 -29
  33. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/raw_response_type.d.ts +46 -30
  34. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/raw_response_type.d.ts.map +1 -1
  35. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/resolver_reader.d.ts.map +1 -1
  36. package/esm/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/resolver_reader.js +194 -78
  37. package/esm/simulator-ui/__generated__/__isograph/Workspace/VerifyTab/param_type.d.ts +45 -28
  38. package/esm/simulator-ui/__generated__/__isograph/Workspace/VerifyTab/param_type.d.ts.map +1 -1
  39. package/esm/simulator-ui/__generated__/__isograph/Workspace/VerifyTab/resolver_reader.d.ts.map +1 -1
  40. package/esm/simulator-ui/__generated__/__isograph/Workspace/VerifyTab/resolver_reader.js +194 -78
  41. package/esm/simulator-ui/isograph/components/Query/SimulatorVerifyPage.d.ts.map +1 -1
  42. package/esm/simulator-ui/isograph/components/Query/SimulatorVerifyPage.js +257 -203
  43. package/esm/simulator-ui/isograph/components/WorkspaceConversationRun/WorkbenchConversationRunChat.d.ts.map +1 -1
  44. package/esm/simulator-ui/isograph/components/WorkspaceConversationRun/WorkbenchConversationRunChat.js +8 -3
  45. package/esm/simulator-ui/isograph/entrypoints/EntrypointWorkspaceVerifyLiveWrite.d.ts.map +1 -1
  46. package/esm/simulator-ui/isograph/entrypoints/EntrypointWorkspaceVerifyLiveWrite.js +45 -28
  47. package/esm/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreate.d.ts +84 -45
  48. package/esm/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreate.d.ts.map +1 -1
  49. package/esm/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreate.js +85 -46
  50. package/esm/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreateMutation.d.ts +84 -45
  51. package/esm/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreateMutation.d.ts.map +1 -1
  52. package/esm/simulator-ui/src/AppShell.d.ts.map +1 -1
  53. package/esm/simulator-ui/src/AppShell.js +6 -4
  54. package/esm/simulator-ui/src/verify_unified.d.ts +17 -0
  55. package/esm/simulator-ui/src/verify_unified.d.ts.map +1 -0
  56. package/esm/simulator-ui/src/verify_unified.js +25 -0
  57. package/esm/simulator-ui/subscriptions/__generated__/workspaceVerifyLiveSubscriptionQuery.d.ts +1 -1
  58. package/esm/simulator-ui/subscriptions/__generated__/workspaceVerifyLiveSubscriptionQuery.d.ts.map +1 -1
  59. package/esm/simulator-ui/subscriptions/__generated__/workspaceVerifyLiveSubscriptionQuery.js +1 -1
  60. package/esm/src/server.d.ts.map +1 -1
  61. package/esm/src/server.js +227 -45
  62. package/esm/src/server_isograph_environment.d.ts +11 -4
  63. package/esm/src/server_isograph_environment.d.ts.map +1 -1
  64. package/esm/src/server_isograph_environment.js +5 -0
  65. package/esm/src/simulator_graphql.d.ts +11 -4
  66. package/esm/src/simulator_graphql.d.ts.map +1 -1
  67. package/esm/src/simulator_graphql.js +278 -245
  68. package/package.json +2 -2
  69. package/script/gambit/simulator-ui/dist/bundle.js +2033 -964
  70. package/script/gambit/simulator-ui/dist/bundle.js.map +4 -4
  71. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceConversationSessionStart/parameters_type.d.ts +2 -1
  72. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceConversationSessionStart/parameters_type.d.ts.map +1 -1
  73. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/normalization_ast.d.ts.map +1 -1
  74. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/normalization_ast.js +288 -101
  75. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/param_type.d.ts +84 -45
  76. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/param_type.d.ts.map +1 -1
  77. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/parameters_type.d.ts +3 -2
  78. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/parameters_type.d.ts.map +1 -1
  79. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/query_text.d.ts +1 -1
  80. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/query_text.d.ts.map +1 -1
  81. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/query_text.js +85 -47
  82. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/raw_response_type.d.ts +86 -48
  83. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/raw_response_type.d.ts.map +1 -1
  84. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/resolver_reader.d.ts.map +1 -1
  85. package/script/simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/resolver_reader.js +363 -97
  86. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/normalization_ast.d.ts.map +1 -1
  87. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/normalization_ast.js +154 -76
  88. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/query_text.d.ts +1 -1
  89. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/query_text.d.ts.map +1 -1
  90. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/query_text.js +45 -29
  91. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/raw_response_type.d.ts +46 -30
  92. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/raw_response_type.d.ts.map +1 -1
  93. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/normalization_ast.d.ts.map +1 -1
  94. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/normalization_ast.js +154 -76
  95. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/param_type.d.ts +45 -28
  96. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/param_type.d.ts.map +1 -1
  97. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/query_text.d.ts +1 -1
  98. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/query_text.d.ts.map +1 -1
  99. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/query_text.js +45 -29
  100. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/raw_response_type.d.ts +46 -30
  101. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/raw_response_type.d.ts.map +1 -1
  102. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/resolver_reader.d.ts.map +1 -1
  103. package/script/simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/resolver_reader.js +194 -78
  104. package/script/simulator-ui/__generated__/__isograph/Workspace/VerifyTab/param_type.d.ts +45 -28
  105. package/script/simulator-ui/__generated__/__isograph/Workspace/VerifyTab/param_type.d.ts.map +1 -1
  106. package/script/simulator-ui/__generated__/__isograph/Workspace/VerifyTab/resolver_reader.d.ts.map +1 -1
  107. package/script/simulator-ui/__generated__/__isograph/Workspace/VerifyTab/resolver_reader.js +194 -78
  108. package/script/simulator-ui/isograph/components/Query/SimulatorVerifyPage.d.ts.map +1 -1
  109. package/script/simulator-ui/isograph/components/Query/SimulatorVerifyPage.js +256 -202
  110. package/script/simulator-ui/isograph/components/WorkspaceConversationRun/WorkbenchConversationRunChat.d.ts.map +1 -1
  111. package/script/simulator-ui/isograph/components/WorkspaceConversationRun/WorkbenchConversationRunChat.js +8 -3
  112. package/script/simulator-ui/isograph/entrypoints/EntrypointWorkspaceVerifyLiveWrite.d.ts.map +1 -1
  113. package/script/simulator-ui/isograph/entrypoints/EntrypointWorkspaceVerifyLiveWrite.js +45 -28
  114. package/script/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreate.d.ts +84 -45
  115. package/script/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreate.d.ts.map +1 -1
  116. package/script/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreate.js +85 -46
  117. package/script/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreateMutation.d.ts +84 -45
  118. package/script/simulator-ui/mutations/GambitWorkspaceVerifyBatchRunCreateMutation.d.ts.map +1 -1
  119. package/script/simulator-ui/src/AppShell.d.ts.map +1 -1
  120. package/script/simulator-ui/src/AppShell.js +6 -4
  121. package/script/simulator-ui/src/verify_unified.d.ts +17 -0
  122. package/script/simulator-ui/src/verify_unified.d.ts.map +1 -0
  123. package/script/simulator-ui/src/verify_unified.js +29 -0
  124. package/script/simulator-ui/subscriptions/__generated__/workspaceVerifyLiveSubscriptionQuery.d.ts +1 -1
  125. package/script/simulator-ui/subscriptions/__generated__/workspaceVerifyLiveSubscriptionQuery.d.ts.map +1 -1
  126. package/script/simulator-ui/subscriptions/__generated__/workspaceVerifyLiveSubscriptionQuery.js +1 -1
  127. package/script/src/server.d.ts.map +1 -1
  128. package/script/src/server.js +227 -45
  129. package/script/src/server_isograph_environment.d.ts +11 -4
  130. package/script/src/server_isograph_environment.d.ts.map +1 -1
  131. package/script/src/server_isograph_environment.js +5 -0
  132. package/script/src/simulator_graphql.d.ts +11 -4
  133. package/script/src/simulator_graphql.d.ts.map +1 -1
  134. package/script/src/simulator_graphql.js +278 -245
  135. package/esm/simulator-ui/src/verify_metrics.d.ts +0 -50
  136. package/esm/simulator-ui/src/verify_metrics.d.ts.map +0 -1
  137. package/esm/simulator-ui/src/verify_metrics.js +0 -260
  138. package/script/simulator-ui/src/verify_metrics.d.ts +0 -50
  139. package/script/simulator-ui/src/verify_metrics.d.ts.map +0 -1
  140. package/script/simulator-ui/src/verify_metrics.js +0 -264
@@ -43951,16 +43951,11 @@ var GambitWorkspaceVerifyBatchRunCreateMutation = iso(`
43951
43951
  workspaceVerifyBatchRunCreate(input: $input) {
43952
43952
  workspace {
43953
43953
  id
43954
- scenarioRuns(first: 50) {
43955
- edges {
43956
- node {
43957
- id
43958
- status
43959
- startedAt
43960
- finishedAt
43961
- error
43962
- }
43963
- }
43954
+ scenarioDecks {
43955
+ id
43956
+ label
43957
+ description
43958
+ path
43964
43959
  }
43965
43960
  verification {
43966
43961
  graderDecks(first: 50) {
@@ -43978,8 +43973,10 @@ var GambitWorkspaceVerifyBatchRunCreateMutation = iso(`
43978
43973
  node {
43979
43974
  id
43980
43975
  workspaceId
43976
+ scenarioDeckId
43981
43977
  graderId
43982
- scenarioRunId
43978
+ scenarioRuns
43979
+ graderRepeatsPerScenario
43983
43980
  status
43984
43981
  startedAt
43985
43982
  finishedAt
@@ -43987,10 +43984,13 @@ var GambitWorkspaceVerifyBatchRunCreateMutation = iso(`
43987
43984
  active
43988
43985
  completed
43989
43986
  failed
43990
- requests(first: 50) {
43987
+ scenarioRunsCompleted
43988
+ scenarioRunsFailed
43989
+ requests(first: 200) {
43991
43990
  edges {
43992
43991
  node {
43993
43992
  id
43993
+ scenarioRunId
43994
43994
  status
43995
43995
  runId
43996
43996
  error
@@ -43998,31 +43998,48 @@ var GambitWorkspaceVerifyBatchRunCreateMutation = iso(`
43998
43998
  }
43999
43999
  }
44000
44000
  metrics {
44001
- sampleSize
44002
- agreementRate
44003
- scoreSpreadMin
44004
- scoreSpreadMedian
44005
- scoreSpreadMax
44006
- instabilityCount
44007
- verdict
44008
- verdictReason
44009
- outliers(first: 25) {
44001
+ scenarioRunCountRequested
44002
+ scenarioRunCountCompleted
44003
+ scenarioRunCountFailed
44004
+ gradeSampleCountRequested
44005
+ gradeSampleCountCompleted
44006
+ gradeSampleCountFailed
44007
+ executionFailureCount
44008
+ gradingFailureCount
44009
+ passRate
44010
+ scoreMin
44011
+ scoreMedian
44012
+ scoreMax
44013
+ scoreMean
44014
+ outlierScenarioRuns(first: 25) {
44010
44015
  edges {
44011
44016
  node {
44012
44017
  key
44013
- label
44014
- sampleSize
44015
- agreementRate
44016
- scoreDelta
44017
- passFlip
44018
- instability
44018
+ scenarioRunId
44019
+ gradeSampleCount
44020
+ completedSampleCount
44021
+ executionFailureCount
44022
+ gradingFailureCount
44023
+ averageScore
44024
+ minScore
44025
+ maxScore
44026
+ failed
44019
44027
  minRunId
44020
44028
  maxRunId
44021
- turnIndex
44022
44029
  messageRefId
44023
44030
  }
44024
44031
  }
44025
44032
  }
44033
+ failureReasons(first: 25) {
44034
+ edges {
44035
+ node {
44036
+ key
44037
+ kind
44038
+ reason
44039
+ count
44040
+ }
44041
+ }
44042
+ }
44026
44043
  }
44027
44044
  }
44028
44045
  }
@@ -44032,8 +44049,10 @@ var GambitWorkspaceVerifyBatchRunCreateMutation = iso(`
44032
44049
  batch {
44033
44050
  id
44034
44051
  workspaceId
44052
+ scenarioDeckId
44035
44053
  graderId
44036
- scenarioRunId
44054
+ scenarioRuns
44055
+ graderRepeatsPerScenario
44037
44056
  status
44038
44057
  startedAt
44039
44058
  finishedAt
@@ -44041,10 +44060,13 @@ var GambitWorkspaceVerifyBatchRunCreateMutation = iso(`
44041
44060
  active
44042
44061
  completed
44043
44062
  failed
44044
- requests(first: 50) {
44063
+ scenarioRunsCompleted
44064
+ scenarioRunsFailed
44065
+ requests(first: 200) {
44045
44066
  edges {
44046
44067
  node {
44047
44068
  id
44069
+ scenarioRunId
44048
44070
  status
44049
44071
  runId
44050
44072
  error
@@ -44052,31 +44074,48 @@ var GambitWorkspaceVerifyBatchRunCreateMutation = iso(`
44052
44074
  }
44053
44075
  }
44054
44076
  metrics {
44055
- sampleSize
44056
- agreementRate
44057
- scoreSpreadMin
44058
- scoreSpreadMedian
44059
- scoreSpreadMax
44060
- instabilityCount
44061
- verdict
44062
- verdictReason
44063
- outliers(first: 25) {
44077
+ scenarioRunCountRequested
44078
+ scenarioRunCountCompleted
44079
+ scenarioRunCountFailed
44080
+ gradeSampleCountRequested
44081
+ gradeSampleCountCompleted
44082
+ gradeSampleCountFailed
44083
+ executionFailureCount
44084
+ gradingFailureCount
44085
+ passRate
44086
+ scoreMin
44087
+ scoreMedian
44088
+ scoreMax
44089
+ scoreMean
44090
+ outlierScenarioRuns(first: 25) {
44064
44091
  edges {
44065
44092
  node {
44066
44093
  key
44067
- label
44068
- sampleSize
44069
- agreementRate
44070
- scoreDelta
44071
- passFlip
44072
- instability
44094
+ scenarioRunId
44095
+ gradeSampleCount
44096
+ completedSampleCount
44097
+ executionFailureCount
44098
+ gradingFailureCount
44099
+ averageScore
44100
+ minScore
44101
+ maxScore
44102
+ failed
44073
44103
  minRunId
44074
44104
  maxRunId
44075
- turnIndex
44076
44105
  messageRefId
44077
44106
  }
44078
44107
  }
44079
44108
  }
44109
+ failureReasons(first: 25) {
44110
+ edges {
44111
+ node {
44112
+ key
44113
+ kind
44114
+ reason
44115
+ count
44116
+ }
44117
+ }
44118
+ }
44080
44119
  }
44081
44120
  }
44082
44121
  }
@@ -44122,77 +44161,40 @@ var readerAst20 = [
44122
44161
  },
44123
44162
  {
44124
44163
  kind: "Linked",
44125
- fieldName: "scenarioRuns",
44164
+ fieldName: "scenarioDecks",
44126
44165
  alias: null,
44127
- arguments: [
44128
- [
44129
- "first",
44130
- {
44131
- kind: "Literal",
44132
- value: 50
44133
- }
44134
- ]
44135
- ],
44166
+ arguments: null,
44136
44167
  condition: null,
44137
44168
  isUpdatable: false,
44138
44169
  refetchQueryIndex: null,
44139
44170
  selections: [
44140
44171
  {
44141
- kind: "Linked",
44142
- fieldName: "edges",
44172
+ kind: "Scalar",
44173
+ fieldName: "id",
44143
44174
  alias: null,
44144
44175
  arguments: null,
44145
- condition: null,
44146
- isUpdatable: false,
44147
- refetchQueryIndex: null,
44148
- selections: [
44149
- {
44150
- kind: "Linked",
44151
- fieldName: "node",
44152
- alias: null,
44153
- arguments: null,
44154
- condition: null,
44155
- isUpdatable: false,
44156
- refetchQueryIndex: null,
44157
- selections: [
44158
- {
44159
- kind: "Scalar",
44160
- fieldName: "id",
44161
- alias: null,
44162
- arguments: null,
44163
- isUpdatable: false
44164
- },
44165
- {
44166
- kind: "Scalar",
44167
- fieldName: "status",
44168
- alias: null,
44169
- arguments: null,
44170
- isUpdatable: false
44171
- },
44172
- {
44173
- kind: "Scalar",
44174
- fieldName: "startedAt",
44175
- alias: null,
44176
- arguments: null,
44177
- isUpdatable: false
44178
- },
44179
- {
44180
- kind: "Scalar",
44181
- fieldName: "finishedAt",
44182
- alias: null,
44183
- arguments: null,
44184
- isUpdatable: false
44185
- },
44186
- {
44187
- kind: "Scalar",
44188
- fieldName: "error",
44189
- alias: null,
44190
- arguments: null,
44191
- isUpdatable: false
44192
- }
44193
- ]
44194
- }
44195
- ]
44176
+ isUpdatable: false
44177
+ },
44178
+ {
44179
+ kind: "Scalar",
44180
+ fieldName: "label",
44181
+ alias: null,
44182
+ arguments: null,
44183
+ isUpdatable: false
44184
+ },
44185
+ {
44186
+ kind: "Scalar",
44187
+ fieldName: "description",
44188
+ alias: null,
44189
+ arguments: null,
44190
+ isUpdatable: false
44191
+ },
44192
+ {
44193
+ kind: "Scalar",
44194
+ fieldName: "path",
44195
+ alias: null,
44196
+ arguments: null,
44197
+ isUpdatable: false
44196
44198
  }
44197
44199
  ]
44198
44200
  },
@@ -44323,6 +44325,13 @@ var readerAst20 = [
44323
44325
  arguments: null,
44324
44326
  isUpdatable: false
44325
44327
  },
44328
+ {
44329
+ kind: "Scalar",
44330
+ fieldName: "scenarioDeckId",
44331
+ alias: null,
44332
+ arguments: null,
44333
+ isUpdatable: false
44334
+ },
44326
44335
  {
44327
44336
  kind: "Scalar",
44328
44337
  fieldName: "graderId",
@@ -44332,7 +44341,14 @@ var readerAst20 = [
44332
44341
  },
44333
44342
  {
44334
44343
  kind: "Scalar",
44335
- fieldName: "scenarioRunId",
44344
+ fieldName: "scenarioRuns",
44345
+ alias: null,
44346
+ arguments: null,
44347
+ isUpdatable: false
44348
+ },
44349
+ {
44350
+ kind: "Scalar",
44351
+ fieldName: "graderRepeatsPerScenario",
44336
44352
  alias: null,
44337
44353
  arguments: null,
44338
44354
  isUpdatable: false
@@ -44386,6 +44402,20 @@ var readerAst20 = [
44386
44402
  arguments: null,
44387
44403
  isUpdatable: false
44388
44404
  },
44405
+ {
44406
+ kind: "Scalar",
44407
+ fieldName: "scenarioRunsCompleted",
44408
+ alias: null,
44409
+ arguments: null,
44410
+ isUpdatable: false
44411
+ },
44412
+ {
44413
+ kind: "Scalar",
44414
+ fieldName: "scenarioRunsFailed",
44415
+ alias: null,
44416
+ arguments: null,
44417
+ isUpdatable: false
44418
+ },
44389
44419
  {
44390
44420
  kind: "Linked",
44391
44421
  fieldName: "requests",
@@ -44395,7 +44425,7 @@ var readerAst20 = [
44395
44425
  "first",
44396
44426
  {
44397
44427
  kind: "Literal",
44398
- value: 50
44428
+ value: 200
44399
44429
  }
44400
44430
  ]
44401
44431
  ],
@@ -44428,6 +44458,13 @@ var readerAst20 = [
44428
44458
  arguments: null,
44429
44459
  isUpdatable: false
44430
44460
  },
44461
+ {
44462
+ kind: "Scalar",
44463
+ fieldName: "scenarioRunId",
44464
+ alias: null,
44465
+ arguments: null,
44466
+ isUpdatable: false
44467
+ },
44431
44468
  {
44432
44469
  kind: "Scalar",
44433
44470
  fieldName: "status",
@@ -44466,63 +44503,98 @@ var readerAst20 = [
44466
44503
  selections: [
44467
44504
  {
44468
44505
  kind: "Scalar",
44469
- fieldName: "sampleSize",
44506
+ fieldName: "scenarioRunCountRequested",
44507
+ alias: null,
44508
+ arguments: null,
44509
+ isUpdatable: false
44510
+ },
44511
+ {
44512
+ kind: "Scalar",
44513
+ fieldName: "scenarioRunCountCompleted",
44514
+ alias: null,
44515
+ arguments: null,
44516
+ isUpdatable: false
44517
+ },
44518
+ {
44519
+ kind: "Scalar",
44520
+ fieldName: "scenarioRunCountFailed",
44521
+ alias: null,
44522
+ arguments: null,
44523
+ isUpdatable: false
44524
+ },
44525
+ {
44526
+ kind: "Scalar",
44527
+ fieldName: "gradeSampleCountRequested",
44528
+ alias: null,
44529
+ arguments: null,
44530
+ isUpdatable: false
44531
+ },
44532
+ {
44533
+ kind: "Scalar",
44534
+ fieldName: "gradeSampleCountCompleted",
44535
+ alias: null,
44536
+ arguments: null,
44537
+ isUpdatable: false
44538
+ },
44539
+ {
44540
+ kind: "Scalar",
44541
+ fieldName: "gradeSampleCountFailed",
44470
44542
  alias: null,
44471
44543
  arguments: null,
44472
44544
  isUpdatable: false
44473
44545
  },
44474
44546
  {
44475
44547
  kind: "Scalar",
44476
- fieldName: "agreementRate",
44548
+ fieldName: "executionFailureCount",
44477
44549
  alias: null,
44478
44550
  arguments: null,
44479
44551
  isUpdatable: false
44480
44552
  },
44481
44553
  {
44482
44554
  kind: "Scalar",
44483
- fieldName: "scoreSpreadMin",
44555
+ fieldName: "gradingFailureCount",
44484
44556
  alias: null,
44485
44557
  arguments: null,
44486
44558
  isUpdatable: false
44487
44559
  },
44488
44560
  {
44489
44561
  kind: "Scalar",
44490
- fieldName: "scoreSpreadMedian",
44562
+ fieldName: "passRate",
44491
44563
  alias: null,
44492
44564
  arguments: null,
44493
44565
  isUpdatable: false
44494
44566
  },
44495
44567
  {
44496
44568
  kind: "Scalar",
44497
- fieldName: "scoreSpreadMax",
44569
+ fieldName: "scoreMin",
44498
44570
  alias: null,
44499
44571
  arguments: null,
44500
44572
  isUpdatable: false
44501
44573
  },
44502
44574
  {
44503
44575
  kind: "Scalar",
44504
- fieldName: "instabilityCount",
44576
+ fieldName: "scoreMedian",
44505
44577
  alias: null,
44506
44578
  arguments: null,
44507
44579
  isUpdatable: false
44508
44580
  },
44509
44581
  {
44510
44582
  kind: "Scalar",
44511
- fieldName: "verdict",
44583
+ fieldName: "scoreMax",
44512
44584
  alias: null,
44513
44585
  arguments: null,
44514
44586
  isUpdatable: false
44515
44587
  },
44516
44588
  {
44517
44589
  kind: "Scalar",
44518
- fieldName: "verdictReason",
44590
+ fieldName: "scoreMean",
44519
44591
  alias: null,
44520
44592
  arguments: null,
44521
44593
  isUpdatable: false
44522
44594
  },
44523
44595
  {
44524
44596
  kind: "Linked",
44525
- fieldName: "outliers",
44597
+ fieldName: "outlierScenarioRuns",
44526
44598
  alias: null,
44527
44599
  arguments: [
44528
44600
  [
@@ -44564,63 +44636,77 @@ var readerAst20 = [
44564
44636
  },
44565
44637
  {
44566
44638
  kind: "Scalar",
44567
- fieldName: "label",
44639
+ fieldName: "scenarioRunId",
44568
44640
  alias: null,
44569
44641
  arguments: null,
44570
44642
  isUpdatable: false
44571
44643
  },
44572
44644
  {
44573
44645
  kind: "Scalar",
44574
- fieldName: "sampleSize",
44646
+ fieldName: "gradeSampleCount",
44575
44647
  alias: null,
44576
44648
  arguments: null,
44577
44649
  isUpdatable: false
44578
44650
  },
44579
44651
  {
44580
44652
  kind: "Scalar",
44581
- fieldName: "agreementRate",
44653
+ fieldName: "completedSampleCount",
44582
44654
  alias: null,
44583
44655
  arguments: null,
44584
44656
  isUpdatable: false
44585
44657
  },
44586
44658
  {
44587
44659
  kind: "Scalar",
44588
- fieldName: "scoreDelta",
44660
+ fieldName: "executionFailureCount",
44589
44661
  alias: null,
44590
44662
  arguments: null,
44591
44663
  isUpdatable: false
44592
44664
  },
44593
44665
  {
44594
44666
  kind: "Scalar",
44595
- fieldName: "passFlip",
44667
+ fieldName: "gradingFailureCount",
44596
44668
  alias: null,
44597
44669
  arguments: null,
44598
44670
  isUpdatable: false
44599
44671
  },
44600
44672
  {
44601
44673
  kind: "Scalar",
44602
- fieldName: "instability",
44674
+ fieldName: "averageScore",
44603
44675
  alias: null,
44604
44676
  arguments: null,
44605
44677
  isUpdatable: false
44606
44678
  },
44607
44679
  {
44608
44680
  kind: "Scalar",
44609
- fieldName: "minRunId",
44681
+ fieldName: "minScore",
44610
44682
  alias: null,
44611
44683
  arguments: null,
44612
44684
  isUpdatable: false
44613
44685
  },
44614
44686
  {
44615
44687
  kind: "Scalar",
44616
- fieldName: "maxRunId",
44688
+ fieldName: "maxScore",
44689
+ alias: null,
44690
+ arguments: null,
44691
+ isUpdatable: false
44692
+ },
44693
+ {
44694
+ kind: "Scalar",
44695
+ fieldName: "failed",
44696
+ alias: null,
44697
+ arguments: null,
44698
+ isUpdatable: false
44699
+ },
44700
+ {
44701
+ kind: "Scalar",
44702
+ fieldName: "minRunId",
44617
44703
  alias: null,
44618
44704
  arguments: null,
44619
44705
  isUpdatable: false
44620
44706
  },
44621
44707
  {
44622
44708
  kind: "Scalar",
44623
- fieldName: "turnIndex",
44709
+ fieldName: "maxRunId",
44624
44710
  alias: null,
44625
44711
  arguments: null,
44626
44712
  isUpdatable: false
@@ -44637,65 +44723,148 @@ var readerAst20 = [
44637
44723
  ]
44638
44724
  }
44639
44725
  ]
44640
- }
44641
- ]
44642
- }
44643
- ]
44644
- }
44645
- ]
44646
- }
44647
- ]
44648
- }
44649
- ]
44650
- }
44651
- ]
44652
- },
44653
- {
44654
- kind: "Linked",
44655
- fieldName: "batch",
44656
- alias: null,
44657
- arguments: null,
44658
- condition: null,
44659
- isUpdatable: false,
44660
- refetchQueryIndex: null,
44661
- selections: [
44662
- {
44663
- kind: "Scalar",
44664
- fieldName: "id",
44665
- alias: null,
44666
- arguments: null,
44667
- isUpdatable: false
44668
- },
44669
- {
44670
- kind: "Scalar",
44671
- fieldName: "workspaceId",
44672
- alias: null,
44673
- arguments: null,
44674
- isUpdatable: false
44675
- },
44676
- {
44677
- kind: "Scalar",
44678
- fieldName: "graderId",
44679
- alias: null,
44680
- arguments: null,
44681
- isUpdatable: false
44682
- },
44683
- {
44684
- kind: "Scalar",
44685
- fieldName: "scenarioRunId",
44686
- alias: null,
44687
- arguments: null,
44688
- isUpdatable: false
44689
- },
44690
- {
44691
- kind: "Scalar",
44692
- fieldName: "status",
44693
- alias: null,
44694
- arguments: null,
44695
- isUpdatable: false
44696
- },
44697
- {
44698
- kind: "Scalar",
44726
+ },
44727
+ {
44728
+ kind: "Linked",
44729
+ fieldName: "failureReasons",
44730
+ alias: null,
44731
+ arguments: [
44732
+ [
44733
+ "first",
44734
+ {
44735
+ kind: "Literal",
44736
+ value: 25
44737
+ }
44738
+ ]
44739
+ ],
44740
+ condition: null,
44741
+ isUpdatable: false,
44742
+ refetchQueryIndex: null,
44743
+ selections: [
44744
+ {
44745
+ kind: "Linked",
44746
+ fieldName: "edges",
44747
+ alias: null,
44748
+ arguments: null,
44749
+ condition: null,
44750
+ isUpdatable: false,
44751
+ refetchQueryIndex: null,
44752
+ selections: [
44753
+ {
44754
+ kind: "Linked",
44755
+ fieldName: "node",
44756
+ alias: null,
44757
+ arguments: null,
44758
+ condition: null,
44759
+ isUpdatable: false,
44760
+ refetchQueryIndex: null,
44761
+ selections: [
44762
+ {
44763
+ kind: "Scalar",
44764
+ fieldName: "key",
44765
+ alias: null,
44766
+ arguments: null,
44767
+ isUpdatable: false
44768
+ },
44769
+ {
44770
+ kind: "Scalar",
44771
+ fieldName: "kind",
44772
+ alias: null,
44773
+ arguments: null,
44774
+ isUpdatable: false
44775
+ },
44776
+ {
44777
+ kind: "Scalar",
44778
+ fieldName: "reason",
44779
+ alias: null,
44780
+ arguments: null,
44781
+ isUpdatable: false
44782
+ },
44783
+ {
44784
+ kind: "Scalar",
44785
+ fieldName: "count",
44786
+ alias: null,
44787
+ arguments: null,
44788
+ isUpdatable: false
44789
+ }
44790
+ ]
44791
+ }
44792
+ ]
44793
+ }
44794
+ ]
44795
+ }
44796
+ ]
44797
+ }
44798
+ ]
44799
+ }
44800
+ ]
44801
+ }
44802
+ ]
44803
+ }
44804
+ ]
44805
+ }
44806
+ ]
44807
+ },
44808
+ {
44809
+ kind: "Linked",
44810
+ fieldName: "batch",
44811
+ alias: null,
44812
+ arguments: null,
44813
+ condition: null,
44814
+ isUpdatable: false,
44815
+ refetchQueryIndex: null,
44816
+ selections: [
44817
+ {
44818
+ kind: "Scalar",
44819
+ fieldName: "id",
44820
+ alias: null,
44821
+ arguments: null,
44822
+ isUpdatable: false
44823
+ },
44824
+ {
44825
+ kind: "Scalar",
44826
+ fieldName: "workspaceId",
44827
+ alias: null,
44828
+ arguments: null,
44829
+ isUpdatable: false
44830
+ },
44831
+ {
44832
+ kind: "Scalar",
44833
+ fieldName: "scenarioDeckId",
44834
+ alias: null,
44835
+ arguments: null,
44836
+ isUpdatable: false
44837
+ },
44838
+ {
44839
+ kind: "Scalar",
44840
+ fieldName: "graderId",
44841
+ alias: null,
44842
+ arguments: null,
44843
+ isUpdatable: false
44844
+ },
44845
+ {
44846
+ kind: "Scalar",
44847
+ fieldName: "scenarioRuns",
44848
+ alias: null,
44849
+ arguments: null,
44850
+ isUpdatable: false
44851
+ },
44852
+ {
44853
+ kind: "Scalar",
44854
+ fieldName: "graderRepeatsPerScenario",
44855
+ alias: null,
44856
+ arguments: null,
44857
+ isUpdatable: false
44858
+ },
44859
+ {
44860
+ kind: "Scalar",
44861
+ fieldName: "status",
44862
+ alias: null,
44863
+ arguments: null,
44864
+ isUpdatable: false
44865
+ },
44866
+ {
44867
+ kind: "Scalar",
44699
44868
  fieldName: "startedAt",
44700
44869
  alias: null,
44701
44870
  arguments: null,
@@ -44736,6 +44905,20 @@ var readerAst20 = [
44736
44905
  arguments: null,
44737
44906
  isUpdatable: false
44738
44907
  },
44908
+ {
44909
+ kind: "Scalar",
44910
+ fieldName: "scenarioRunsCompleted",
44911
+ alias: null,
44912
+ arguments: null,
44913
+ isUpdatable: false
44914
+ },
44915
+ {
44916
+ kind: "Scalar",
44917
+ fieldName: "scenarioRunsFailed",
44918
+ alias: null,
44919
+ arguments: null,
44920
+ isUpdatable: false
44921
+ },
44739
44922
  {
44740
44923
  kind: "Linked",
44741
44924
  fieldName: "requests",
@@ -44745,7 +44928,7 @@ var readerAst20 = [
44745
44928
  "first",
44746
44929
  {
44747
44930
  kind: "Literal",
44748
- value: 50
44931
+ value: 200
44749
44932
  }
44750
44933
  ]
44751
44934
  ],
@@ -44778,6 +44961,13 @@ var readerAst20 = [
44778
44961
  arguments: null,
44779
44962
  isUpdatable: false
44780
44963
  },
44964
+ {
44965
+ kind: "Scalar",
44966
+ fieldName: "scenarioRunId",
44967
+ alias: null,
44968
+ arguments: null,
44969
+ isUpdatable: false
44970
+ },
44781
44971
  {
44782
44972
  kind: "Scalar",
44783
44973
  fieldName: "status",
@@ -44816,63 +45006,98 @@ var readerAst20 = [
44816
45006
  selections: [
44817
45007
  {
44818
45008
  kind: "Scalar",
44819
- fieldName: "sampleSize",
45009
+ fieldName: "scenarioRunCountRequested",
44820
45010
  alias: null,
44821
45011
  arguments: null,
44822
45012
  isUpdatable: false
44823
45013
  },
44824
45014
  {
44825
45015
  kind: "Scalar",
44826
- fieldName: "agreementRate",
45016
+ fieldName: "scenarioRunCountCompleted",
44827
45017
  alias: null,
44828
45018
  arguments: null,
44829
45019
  isUpdatable: false
44830
45020
  },
44831
45021
  {
44832
45022
  kind: "Scalar",
44833
- fieldName: "scoreSpreadMin",
45023
+ fieldName: "scenarioRunCountFailed",
44834
45024
  alias: null,
44835
45025
  arguments: null,
44836
45026
  isUpdatable: false
44837
45027
  },
44838
45028
  {
44839
45029
  kind: "Scalar",
44840
- fieldName: "scoreSpreadMedian",
45030
+ fieldName: "gradeSampleCountRequested",
44841
45031
  alias: null,
44842
45032
  arguments: null,
44843
45033
  isUpdatable: false
44844
45034
  },
44845
45035
  {
44846
45036
  kind: "Scalar",
44847
- fieldName: "scoreSpreadMax",
45037
+ fieldName: "gradeSampleCountCompleted",
44848
45038
  alias: null,
44849
45039
  arguments: null,
44850
45040
  isUpdatable: false
44851
45041
  },
44852
45042
  {
44853
45043
  kind: "Scalar",
44854
- fieldName: "instabilityCount",
45044
+ fieldName: "gradeSampleCountFailed",
44855
45045
  alias: null,
44856
45046
  arguments: null,
44857
45047
  isUpdatable: false
44858
45048
  },
44859
45049
  {
44860
45050
  kind: "Scalar",
44861
- fieldName: "verdict",
45051
+ fieldName: "executionFailureCount",
44862
45052
  alias: null,
44863
45053
  arguments: null,
44864
45054
  isUpdatable: false
44865
45055
  },
44866
45056
  {
44867
45057
  kind: "Scalar",
44868
- fieldName: "verdictReason",
45058
+ fieldName: "gradingFailureCount",
45059
+ alias: null,
45060
+ arguments: null,
45061
+ isUpdatable: false
45062
+ },
45063
+ {
45064
+ kind: "Scalar",
45065
+ fieldName: "passRate",
45066
+ alias: null,
45067
+ arguments: null,
45068
+ isUpdatable: false
45069
+ },
45070
+ {
45071
+ kind: "Scalar",
45072
+ fieldName: "scoreMin",
45073
+ alias: null,
45074
+ arguments: null,
45075
+ isUpdatable: false
45076
+ },
45077
+ {
45078
+ kind: "Scalar",
45079
+ fieldName: "scoreMedian",
45080
+ alias: null,
45081
+ arguments: null,
45082
+ isUpdatable: false
45083
+ },
45084
+ {
45085
+ kind: "Scalar",
45086
+ fieldName: "scoreMax",
45087
+ alias: null,
45088
+ arguments: null,
45089
+ isUpdatable: false
45090
+ },
45091
+ {
45092
+ kind: "Scalar",
45093
+ fieldName: "scoreMean",
44869
45094
  alias: null,
44870
45095
  arguments: null,
44871
45096
  isUpdatable: false
44872
45097
  },
44873
45098
  {
44874
45099
  kind: "Linked",
44875
- fieldName: "outliers",
45100
+ fieldName: "outlierScenarioRuns",
44876
45101
  alias: null,
44877
45102
  arguments: [
44878
45103
  [
@@ -44914,42 +45139,63 @@ var readerAst20 = [
44914
45139
  },
44915
45140
  {
44916
45141
  kind: "Scalar",
44917
- fieldName: "label",
45142
+ fieldName: "scenarioRunId",
45143
+ alias: null,
45144
+ arguments: null,
45145
+ isUpdatable: false
45146
+ },
45147
+ {
45148
+ kind: "Scalar",
45149
+ fieldName: "gradeSampleCount",
44918
45150
  alias: null,
44919
45151
  arguments: null,
44920
45152
  isUpdatable: false
44921
45153
  },
44922
45154
  {
44923
45155
  kind: "Scalar",
44924
- fieldName: "sampleSize",
45156
+ fieldName: "completedSampleCount",
44925
45157
  alias: null,
44926
45158
  arguments: null,
44927
45159
  isUpdatable: false
44928
45160
  },
44929
45161
  {
44930
45162
  kind: "Scalar",
44931
- fieldName: "agreementRate",
45163
+ fieldName: "executionFailureCount",
44932
45164
  alias: null,
44933
45165
  arguments: null,
44934
45166
  isUpdatable: false
44935
45167
  },
44936
45168
  {
44937
45169
  kind: "Scalar",
44938
- fieldName: "scoreDelta",
45170
+ fieldName: "gradingFailureCount",
44939
45171
  alias: null,
44940
45172
  arguments: null,
44941
45173
  isUpdatable: false
44942
45174
  },
44943
45175
  {
44944
45176
  kind: "Scalar",
44945
- fieldName: "passFlip",
45177
+ fieldName: "averageScore",
44946
45178
  alias: null,
44947
45179
  arguments: null,
44948
45180
  isUpdatable: false
44949
45181
  },
44950
45182
  {
44951
45183
  kind: "Scalar",
44952
- fieldName: "instability",
45184
+ fieldName: "minScore",
45185
+ alias: null,
45186
+ arguments: null,
45187
+ isUpdatable: false
45188
+ },
45189
+ {
45190
+ kind: "Scalar",
45191
+ fieldName: "maxScore",
45192
+ alias: null,
45193
+ arguments: null,
45194
+ isUpdatable: false
45195
+ },
45196
+ {
45197
+ kind: "Scalar",
45198
+ fieldName: "failed",
44953
45199
  alias: null,
44954
45200
  arguments: null,
44955
45201
  isUpdatable: false
@@ -44970,14 +45216,76 @@ var readerAst20 = [
44970
45216
  },
44971
45217
  {
44972
45218
  kind: "Scalar",
44973
- fieldName: "turnIndex",
45219
+ fieldName: "messageRefId",
45220
+ alias: null,
45221
+ arguments: null,
45222
+ isUpdatable: false
45223
+ }
45224
+ ]
45225
+ }
45226
+ ]
45227
+ }
45228
+ ]
45229
+ },
45230
+ {
45231
+ kind: "Linked",
45232
+ fieldName: "failureReasons",
45233
+ alias: null,
45234
+ arguments: [
45235
+ [
45236
+ "first",
45237
+ {
45238
+ kind: "Literal",
45239
+ value: 25
45240
+ }
45241
+ ]
45242
+ ],
45243
+ condition: null,
45244
+ isUpdatable: false,
45245
+ refetchQueryIndex: null,
45246
+ selections: [
45247
+ {
45248
+ kind: "Linked",
45249
+ fieldName: "edges",
45250
+ alias: null,
45251
+ arguments: null,
45252
+ condition: null,
45253
+ isUpdatable: false,
45254
+ refetchQueryIndex: null,
45255
+ selections: [
45256
+ {
45257
+ kind: "Linked",
45258
+ fieldName: "node",
45259
+ alias: null,
45260
+ arguments: null,
45261
+ condition: null,
45262
+ isUpdatable: false,
45263
+ refetchQueryIndex: null,
45264
+ selections: [
45265
+ {
45266
+ kind: "Scalar",
45267
+ fieldName: "key",
45268
+ alias: null,
45269
+ arguments: null,
45270
+ isUpdatable: false
45271
+ },
45272
+ {
45273
+ kind: "Scalar",
45274
+ fieldName: "kind",
44974
45275
  alias: null,
44975
45276
  arguments: null,
44976
45277
  isUpdatable: false
44977
45278
  },
44978
45279
  {
44979
45280
  kind: "Scalar",
44980
- fieldName: "messageRefId",
45281
+ fieldName: "reason",
45282
+ alias: null,
45283
+ arguments: null,
45284
+ isUpdatable: false
45285
+ },
45286
+ {
45287
+ kind: "Scalar",
45288
+ fieldName: "count",
44981
45289
  alias: null,
44982
45290
  arguments: null,
44983
45291
  isUpdatable: false
@@ -45005,7 +45313,7 @@ var artifact32 = {
45005
45313
  var resolver_reader_default20 = artifact32;
45006
45314
 
45007
45315
  // simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/query_text.ts
45008
- var query_text_default13 = "mutation GambitWorkspaceVerifyBatchRunCreate($input: WorkspaceVerifyBatchRunCreateInput!) { workspaceVerifyBatchRunCreate____input___v_input: workspaceVerifyBatchRunCreate(input: $input) { batch { id, active, completed, failed, finishedAt, graderId, metrics { agreementRate, instabilityCount, outliers____first___l_25: outliers(first: 25) { edges { node { agreementRate, instability, key, label, maxRunId, messageRefId, minRunId, passFlip, sampleSize, scoreDelta, turnIndex, }, }, }, sampleSize, scoreSpreadMax, scoreSpreadMedian, scoreSpreadMin, verdict, verdictReason, }, requested, requests____first___l_50: requests(first: 50) { edges { node { id, error, runId, status, }, }, }, scenarioRunId, startedAt, status, workspaceId, }, workspace { id, scenarioRuns____first___l_50: scenarioRuns(first: 50) { edges { node { __typename, id, error, finishedAt, startedAt, status, }, }, }, verification { batches____first___l_50: batches(first: 50) { edges { node { id, active, completed, failed, finishedAt, graderId, metrics { agreementRate, instabilityCount, outliers____first___l_25: outliers(first: 25) { edges { node { agreementRate, instability, key, label, maxRunId, messageRefId, minRunId, passFlip, sampleSize, scoreDelta, turnIndex, }, }, }, sampleSize, scoreSpreadMax, scoreSpreadMedian, scoreSpreadMin, verdict, verdictReason, }, requested, requests____first___l_50: requests(first: 50) { edges { node { id, error, runId, status, }, }, }, scenarioRunId, startedAt, status, workspaceId, }, }, }, graderDecks____first___l_50: graderDecks(first: 50) { edges { node { id, description, label, path, }, }, }, }, }, },}";
45316
+ var query_text_default13 = "mutation GambitWorkspaceVerifyBatchRunCreate($input: WorkspaceVerifyBatchRunCreateInput!) { workspaceVerifyBatchRunCreate____input___v_input: workspaceVerifyBatchRunCreate(input: $input) { batch { id, active, completed, failed, finishedAt, graderId, graderRepeatsPerScenario, metrics { executionFailureCount, failureReasons____first___l_25: failureReasons(first: 25) { edges { node { count, key, kind, reason, }, }, }, gradeSampleCountCompleted, gradeSampleCountFailed, gradeSampleCountRequested, gradingFailureCount, outlierScenarioRuns____first___l_25: outlierScenarioRuns(first: 25) { edges { node { averageScore, completedSampleCount, executionFailureCount, failed, gradeSampleCount, gradingFailureCount, key, maxRunId, maxScore, messageRefId, minRunId, minScore, scenarioRunId, }, }, }, passRate, scenarioRunCountCompleted, scenarioRunCountFailed, scenarioRunCountRequested, scoreMax, scoreMean, scoreMedian, scoreMin, }, requested, requests____first___l_200: requests(first: 200) { edges { node { id, error, runId, scenarioRunId, status, }, }, }, scenarioDeckId, scenarioRuns, scenarioRunsCompleted, scenarioRunsFailed, startedAt, status, workspaceId, }, workspace { id, scenarioDecks { id, description, label, path, }, verification { batches____first___l_50: batches(first: 50) { edges { node { id, active, completed, failed, finishedAt, graderId, graderRepeatsPerScenario, metrics { executionFailureCount, failureReasons____first___l_25: failureReasons(first: 25) { edges { node { count, key, kind, reason, }, }, }, gradeSampleCountCompleted, gradeSampleCountFailed, gradeSampleCountRequested, gradingFailureCount, outlierScenarioRuns____first___l_25: outlierScenarioRuns(first: 25) { edges { node { averageScore, completedSampleCount, executionFailureCount, failed, gradeSampleCount, gradingFailureCount, key, maxRunId, maxScore, messageRefId, minRunId, minScore, scenarioRunId, }, }, }, passRate, scenarioRunCountCompleted, scenarioRunCountFailed, scenarioRunCountRequested, scoreMax, scoreMean, scoreMedian, scoreMin, }, requested, requests____first___l_200: requests(first: 200) { edges { node { id, error, runId, scenarioRunId, status, }, }, }, scenarioDeckId, scenarioRuns, scenarioRunsCompleted, scenarioRunsFailed, startedAt, status, workspaceId, }, }, }, graderDecks____first___l_50: graderDecks(first: 50) { edges { node { id, description, label, path, }, }, }, }, }, },}";
45009
45317
 
45010
45318
  // simulator-ui/__generated__/__isograph/Mutation/GambitWorkspaceVerifyBatchRunCreate/normalization_ast.ts
45011
45319
  var normalizationAst13 = {
@@ -45061,6 +45369,11 @@ var normalizationAst13 = {
45061
45369
  fieldName: "graderId",
45062
45370
  arguments: null
45063
45371
  },
45372
+ {
45373
+ kind: "Scalar",
45374
+ fieldName: "graderRepeatsPerScenario",
45375
+ arguments: null
45376
+ },
45064
45377
  {
45065
45378
  kind: "Linked",
45066
45379
  fieldName: "metrics",
@@ -45069,17 +45382,84 @@ var normalizationAst13 = {
45069
45382
  selections: [
45070
45383
  {
45071
45384
  kind: "Scalar",
45072
- fieldName: "agreementRate",
45385
+ fieldName: "executionFailureCount",
45073
45386
  arguments: null
45074
45387
  },
45388
+ {
45389
+ kind: "Linked",
45390
+ fieldName: "failureReasons",
45391
+ arguments: [
45392
+ [
45393
+ "first",
45394
+ {
45395
+ kind: "Literal",
45396
+ value: 25
45397
+ }
45398
+ ]
45399
+ ],
45400
+ concreteType: "WorkspaceVerifyMetricsFailureReasonsConnection",
45401
+ selections: [
45402
+ {
45403
+ kind: "Linked",
45404
+ fieldName: "edges",
45405
+ arguments: null,
45406
+ concreteType: "WorkspaceVerifyMetricsFailureReasonsConnectionEdge",
45407
+ selections: [
45408
+ {
45409
+ kind: "Linked",
45410
+ fieldName: "node",
45411
+ arguments: null,
45412
+ concreteType: "WorkspaceVerifyFailureReasonGroup",
45413
+ selections: [
45414
+ {
45415
+ kind: "Scalar",
45416
+ fieldName: "count",
45417
+ arguments: null
45418
+ },
45419
+ {
45420
+ kind: "Scalar",
45421
+ fieldName: "key",
45422
+ arguments: null
45423
+ },
45424
+ {
45425
+ kind: "Scalar",
45426
+ fieldName: "kind",
45427
+ arguments: null
45428
+ },
45429
+ {
45430
+ kind: "Scalar",
45431
+ fieldName: "reason",
45432
+ arguments: null
45433
+ }
45434
+ ]
45435
+ }
45436
+ ]
45437
+ }
45438
+ ]
45439
+ },
45075
45440
  {
45076
45441
  kind: "Scalar",
45077
- fieldName: "instabilityCount",
45442
+ fieldName: "gradeSampleCountCompleted",
45443
+ arguments: null
45444
+ },
45445
+ {
45446
+ kind: "Scalar",
45447
+ fieldName: "gradeSampleCountFailed",
45448
+ arguments: null
45449
+ },
45450
+ {
45451
+ kind: "Scalar",
45452
+ fieldName: "gradeSampleCountRequested",
45453
+ arguments: null
45454
+ },
45455
+ {
45456
+ kind: "Scalar",
45457
+ fieldName: "gradingFailureCount",
45078
45458
  arguments: null
45079
45459
  },
45080
45460
  {
45081
45461
  kind: "Linked",
45082
- fieldName: "outliers",
45462
+ fieldName: "outlierScenarioRuns",
45083
45463
  arguments: [
45084
45464
  [
45085
45465
  "first",
@@ -45089,73 +45469,83 @@ var normalizationAst13 = {
45089
45469
  }
45090
45470
  ]
45091
45471
  ],
45092
- concreteType: "WorkspaceVerifyMetricsOutliersConnection",
45472
+ concreteType: "WorkspaceVerifyMetricsOutlierScenarioRunsConnection",
45093
45473
  selections: [
45094
45474
  {
45095
45475
  kind: "Linked",
45096
45476
  fieldName: "edges",
45097
45477
  arguments: null,
45098
- concreteType: "WorkspaceVerifyMetricsOutliersConnectionEdge",
45478
+ concreteType: "WorkspaceVerifyMetricsOutlierScenarioRunsConnectionEdge",
45099
45479
  selections: [
45100
45480
  {
45101
45481
  kind: "Linked",
45102
45482
  fieldName: "node",
45103
45483
  arguments: null,
45104
- concreteType: "WorkspaceVerifyOutlier",
45484
+ concreteType: "WorkspaceVerifyScenarioOutlier",
45105
45485
  selections: [
45106
45486
  {
45107
45487
  kind: "Scalar",
45108
- fieldName: "agreementRate",
45488
+ fieldName: "averageScore",
45109
45489
  arguments: null
45110
45490
  },
45111
45491
  {
45112
45492
  kind: "Scalar",
45113
- fieldName: "instability",
45493
+ fieldName: "completedSampleCount",
45114
45494
  arguments: null
45115
45495
  },
45116
45496
  {
45117
45497
  kind: "Scalar",
45118
- fieldName: "key",
45498
+ fieldName: "executionFailureCount",
45119
45499
  arguments: null
45120
45500
  },
45121
45501
  {
45122
45502
  kind: "Scalar",
45123
- fieldName: "label",
45503
+ fieldName: "failed",
45124
45504
  arguments: null
45125
45505
  },
45126
45506
  {
45127
45507
  kind: "Scalar",
45128
- fieldName: "maxRunId",
45508
+ fieldName: "gradeSampleCount",
45129
45509
  arguments: null
45130
45510
  },
45131
45511
  {
45132
45512
  kind: "Scalar",
45133
- fieldName: "messageRefId",
45513
+ fieldName: "gradingFailureCount",
45134
45514
  arguments: null
45135
45515
  },
45136
45516
  {
45137
45517
  kind: "Scalar",
45138
- fieldName: "minRunId",
45518
+ fieldName: "key",
45519
+ arguments: null
45520
+ },
45521
+ {
45522
+ kind: "Scalar",
45523
+ fieldName: "maxRunId",
45139
45524
  arguments: null
45140
45525
  },
45141
45526
  {
45142
45527
  kind: "Scalar",
45143
- fieldName: "passFlip",
45528
+ fieldName: "maxScore",
45144
45529
  arguments: null
45145
45530
  },
45146
45531
  {
45147
45532
  kind: "Scalar",
45148
- fieldName: "sampleSize",
45533
+ fieldName: "messageRefId",
45149
45534
  arguments: null
45150
45535
  },
45151
45536
  {
45152
45537
  kind: "Scalar",
45153
- fieldName: "scoreDelta",
45538
+ fieldName: "minRunId",
45154
45539
  arguments: null
45155
45540
  },
45156
45541
  {
45157
45542
  kind: "Scalar",
45158
- fieldName: "turnIndex",
45543
+ fieldName: "minScore",
45544
+ arguments: null
45545
+ },
45546
+ {
45547
+ kind: "Scalar",
45548
+ fieldName: "scenarioRunId",
45159
45549
  arguments: null
45160
45550
  }
45161
45551
  ]
@@ -45166,32 +45556,42 @@ var normalizationAst13 = {
45166
45556
  },
45167
45557
  {
45168
45558
  kind: "Scalar",
45169
- fieldName: "sampleSize",
45559
+ fieldName: "passRate",
45560
+ arguments: null
45561
+ },
45562
+ {
45563
+ kind: "Scalar",
45564
+ fieldName: "scenarioRunCountCompleted",
45565
+ arguments: null
45566
+ },
45567
+ {
45568
+ kind: "Scalar",
45569
+ fieldName: "scenarioRunCountFailed",
45170
45570
  arguments: null
45171
45571
  },
45172
45572
  {
45173
45573
  kind: "Scalar",
45174
- fieldName: "scoreSpreadMax",
45574
+ fieldName: "scenarioRunCountRequested",
45175
45575
  arguments: null
45176
45576
  },
45177
45577
  {
45178
45578
  kind: "Scalar",
45179
- fieldName: "scoreSpreadMedian",
45579
+ fieldName: "scoreMax",
45180
45580
  arguments: null
45181
45581
  },
45182
45582
  {
45183
45583
  kind: "Scalar",
45184
- fieldName: "scoreSpreadMin",
45584
+ fieldName: "scoreMean",
45185
45585
  arguments: null
45186
45586
  },
45187
45587
  {
45188
45588
  kind: "Scalar",
45189
- fieldName: "verdict",
45589
+ fieldName: "scoreMedian",
45190
45590
  arguments: null
45191
45591
  },
45192
45592
  {
45193
45593
  kind: "Scalar",
45194
- fieldName: "verdictReason",
45594
+ fieldName: "scoreMin",
45195
45595
  arguments: null
45196
45596
  }
45197
45597
  ]
@@ -45209,7 +45609,7 @@ var normalizationAst13 = {
45209
45609
  "first",
45210
45610
  {
45211
45611
  kind: "Literal",
45212
- value: 50
45612
+ value: 200
45213
45613
  }
45214
45614
  ]
45215
45615
  ],
@@ -45242,6 +45642,11 @@ var normalizationAst13 = {
45242
45642
  fieldName: "runId",
45243
45643
  arguments: null
45244
45644
  },
45645
+ {
45646
+ kind: "Scalar",
45647
+ fieldName: "scenarioRunId",
45648
+ arguments: null
45649
+ },
45245
45650
  {
45246
45651
  kind: "Scalar",
45247
45652
  fieldName: "status",
@@ -45255,7 +45660,22 @@ var normalizationAst13 = {
45255
45660
  },
45256
45661
  {
45257
45662
  kind: "Scalar",
45258
- fieldName: "scenarioRunId",
45663
+ fieldName: "scenarioDeckId",
45664
+ arguments: null
45665
+ },
45666
+ {
45667
+ kind: "Scalar",
45668
+ fieldName: "scenarioRuns",
45669
+ arguments: null
45670
+ },
45671
+ {
45672
+ kind: "Scalar",
45673
+ fieldName: "scenarioRunsCompleted",
45674
+ arguments: null
45675
+ },
45676
+ {
45677
+ kind: "Scalar",
45678
+ fieldName: "scenarioRunsFailed",
45259
45679
  arguments: null
45260
45680
  },
45261
45681
  {
@@ -45288,63 +45708,29 @@ var normalizationAst13 = {
45288
45708
  },
45289
45709
  {
45290
45710
  kind: "Linked",
45291
- fieldName: "scenarioRuns",
45292
- arguments: [
45293
- [
45294
- "first",
45295
- {
45296
- kind: "Literal",
45297
- value: 50
45298
- }
45299
- ]
45300
- ],
45301
- concreteType: "WorkspaceScenarioRunsConnection",
45711
+ fieldName: "scenarioDecks",
45712
+ arguments: null,
45713
+ concreteType: "WorkspaceScenarioDeck",
45302
45714
  selections: [
45303
45715
  {
45304
- kind: "Linked",
45305
- fieldName: "edges",
45306
- arguments: null,
45307
- concreteType: "WorkspaceScenarioRunsConnectionEdge",
45308
- selections: [
45309
- {
45310
- kind: "Linked",
45311
- fieldName: "node",
45312
- arguments: null,
45313
- concreteType: null,
45314
- selections: [
45315
- {
45316
- kind: "Scalar",
45317
- fieldName: "__typename",
45318
- arguments: null
45319
- },
45320
- {
45321
- kind: "Scalar",
45322
- fieldName: "id",
45323
- arguments: null
45324
- },
45325
- {
45326
- kind: "Scalar",
45327
- fieldName: "error",
45328
- arguments: null
45329
- },
45330
- {
45331
- kind: "Scalar",
45332
- fieldName: "finishedAt",
45333
- arguments: null
45334
- },
45335
- {
45336
- kind: "Scalar",
45337
- fieldName: "startedAt",
45338
- arguments: null
45339
- },
45340
- {
45341
- kind: "Scalar",
45342
- fieldName: "status",
45343
- arguments: null
45344
- }
45345
- ]
45346
- }
45347
- ]
45716
+ kind: "Scalar",
45717
+ fieldName: "id",
45718
+ arguments: null
45719
+ },
45720
+ {
45721
+ kind: "Scalar",
45722
+ fieldName: "description",
45723
+ arguments: null
45724
+ },
45725
+ {
45726
+ kind: "Scalar",
45727
+ fieldName: "label",
45728
+ arguments: null
45729
+ },
45730
+ {
45731
+ kind: "Scalar",
45732
+ fieldName: "path",
45733
+ arguments: null
45348
45734
  }
45349
45735
  ]
45350
45736
  },
@@ -45410,6 +45796,11 @@ var normalizationAst13 = {
45410
45796
  fieldName: "graderId",
45411
45797
  arguments: null
45412
45798
  },
45799
+ {
45800
+ kind: "Scalar",
45801
+ fieldName: "graderRepeatsPerScenario",
45802
+ arguments: null
45803
+ },
45413
45804
  {
45414
45805
  kind: "Linked",
45415
45806
  fieldName: "metrics",
@@ -45418,17 +45809,84 @@ var normalizationAst13 = {
45418
45809
  selections: [
45419
45810
  {
45420
45811
  kind: "Scalar",
45421
- fieldName: "agreementRate",
45812
+ fieldName: "executionFailureCount",
45813
+ arguments: null
45814
+ },
45815
+ {
45816
+ kind: "Linked",
45817
+ fieldName: "failureReasons",
45818
+ arguments: [
45819
+ [
45820
+ "first",
45821
+ {
45822
+ kind: "Literal",
45823
+ value: 25
45824
+ }
45825
+ ]
45826
+ ],
45827
+ concreteType: "WorkspaceVerifyMetricsFailureReasonsConnection",
45828
+ selections: [
45829
+ {
45830
+ kind: "Linked",
45831
+ fieldName: "edges",
45832
+ arguments: null,
45833
+ concreteType: "WorkspaceVerifyMetricsFailureReasonsConnectionEdge",
45834
+ selections: [
45835
+ {
45836
+ kind: "Linked",
45837
+ fieldName: "node",
45838
+ arguments: null,
45839
+ concreteType: "WorkspaceVerifyFailureReasonGroup",
45840
+ selections: [
45841
+ {
45842
+ kind: "Scalar",
45843
+ fieldName: "count",
45844
+ arguments: null
45845
+ },
45846
+ {
45847
+ kind: "Scalar",
45848
+ fieldName: "key",
45849
+ arguments: null
45850
+ },
45851
+ {
45852
+ kind: "Scalar",
45853
+ fieldName: "kind",
45854
+ arguments: null
45855
+ },
45856
+ {
45857
+ kind: "Scalar",
45858
+ fieldName: "reason",
45859
+ arguments: null
45860
+ }
45861
+ ]
45862
+ }
45863
+ ]
45864
+ }
45865
+ ]
45866
+ },
45867
+ {
45868
+ kind: "Scalar",
45869
+ fieldName: "gradeSampleCountCompleted",
45422
45870
  arguments: null
45423
45871
  },
45424
45872
  {
45425
45873
  kind: "Scalar",
45426
- fieldName: "instabilityCount",
45874
+ fieldName: "gradeSampleCountFailed",
45875
+ arguments: null
45876
+ },
45877
+ {
45878
+ kind: "Scalar",
45879
+ fieldName: "gradeSampleCountRequested",
45880
+ arguments: null
45881
+ },
45882
+ {
45883
+ kind: "Scalar",
45884
+ fieldName: "gradingFailureCount",
45427
45885
  arguments: null
45428
45886
  },
45429
45887
  {
45430
45888
  kind: "Linked",
45431
- fieldName: "outliers",
45889
+ fieldName: "outlierScenarioRuns",
45432
45890
  arguments: [
45433
45891
  [
45434
45892
  "first",
@@ -45438,73 +45896,83 @@ var normalizationAst13 = {
45438
45896
  }
45439
45897
  ]
45440
45898
  ],
45441
- concreteType: "WorkspaceVerifyMetricsOutliersConnection",
45899
+ concreteType: "WorkspaceVerifyMetricsOutlierScenarioRunsConnection",
45442
45900
  selections: [
45443
45901
  {
45444
45902
  kind: "Linked",
45445
45903
  fieldName: "edges",
45446
45904
  arguments: null,
45447
- concreteType: "WorkspaceVerifyMetricsOutliersConnectionEdge",
45905
+ concreteType: "WorkspaceVerifyMetricsOutlierScenarioRunsConnectionEdge",
45448
45906
  selections: [
45449
45907
  {
45450
45908
  kind: "Linked",
45451
45909
  fieldName: "node",
45452
45910
  arguments: null,
45453
- concreteType: "WorkspaceVerifyOutlier",
45911
+ concreteType: "WorkspaceVerifyScenarioOutlier",
45454
45912
  selections: [
45455
45913
  {
45456
45914
  kind: "Scalar",
45457
- fieldName: "agreementRate",
45915
+ fieldName: "averageScore",
45458
45916
  arguments: null
45459
45917
  },
45460
45918
  {
45461
45919
  kind: "Scalar",
45462
- fieldName: "instability",
45920
+ fieldName: "completedSampleCount",
45463
45921
  arguments: null
45464
45922
  },
45465
45923
  {
45466
45924
  kind: "Scalar",
45467
- fieldName: "key",
45925
+ fieldName: "executionFailureCount",
45468
45926
  arguments: null
45469
45927
  },
45470
45928
  {
45471
45929
  kind: "Scalar",
45472
- fieldName: "label",
45930
+ fieldName: "failed",
45473
45931
  arguments: null
45474
45932
  },
45475
45933
  {
45476
45934
  kind: "Scalar",
45477
- fieldName: "maxRunId",
45935
+ fieldName: "gradeSampleCount",
45478
45936
  arguments: null
45479
45937
  },
45480
45938
  {
45481
45939
  kind: "Scalar",
45482
- fieldName: "messageRefId",
45940
+ fieldName: "gradingFailureCount",
45483
45941
  arguments: null
45484
45942
  },
45485
45943
  {
45486
45944
  kind: "Scalar",
45487
- fieldName: "minRunId",
45945
+ fieldName: "key",
45946
+ arguments: null
45947
+ },
45948
+ {
45949
+ kind: "Scalar",
45950
+ fieldName: "maxRunId",
45951
+ arguments: null
45952
+ },
45953
+ {
45954
+ kind: "Scalar",
45955
+ fieldName: "maxScore",
45488
45956
  arguments: null
45489
45957
  },
45490
45958
  {
45491
45959
  kind: "Scalar",
45492
- fieldName: "passFlip",
45960
+ fieldName: "messageRefId",
45493
45961
  arguments: null
45494
45962
  },
45495
45963
  {
45496
45964
  kind: "Scalar",
45497
- fieldName: "sampleSize",
45965
+ fieldName: "minRunId",
45498
45966
  arguments: null
45499
45967
  },
45500
45968
  {
45501
45969
  kind: "Scalar",
45502
- fieldName: "scoreDelta",
45970
+ fieldName: "minScore",
45503
45971
  arguments: null
45504
45972
  },
45505
45973
  {
45506
45974
  kind: "Scalar",
45507
- fieldName: "turnIndex",
45975
+ fieldName: "scenarioRunId",
45508
45976
  arguments: null
45509
45977
  }
45510
45978
  ]
@@ -45515,32 +45983,42 @@ var normalizationAst13 = {
45515
45983
  },
45516
45984
  {
45517
45985
  kind: "Scalar",
45518
- fieldName: "sampleSize",
45986
+ fieldName: "passRate",
45987
+ arguments: null
45988
+ },
45989
+ {
45990
+ kind: "Scalar",
45991
+ fieldName: "scenarioRunCountCompleted",
45992
+ arguments: null
45993
+ },
45994
+ {
45995
+ kind: "Scalar",
45996
+ fieldName: "scenarioRunCountFailed",
45519
45997
  arguments: null
45520
45998
  },
45521
45999
  {
45522
46000
  kind: "Scalar",
45523
- fieldName: "scoreSpreadMax",
46001
+ fieldName: "scenarioRunCountRequested",
45524
46002
  arguments: null
45525
46003
  },
45526
46004
  {
45527
46005
  kind: "Scalar",
45528
- fieldName: "scoreSpreadMedian",
46006
+ fieldName: "scoreMax",
45529
46007
  arguments: null
45530
46008
  },
45531
46009
  {
45532
46010
  kind: "Scalar",
45533
- fieldName: "scoreSpreadMin",
46011
+ fieldName: "scoreMean",
45534
46012
  arguments: null
45535
46013
  },
45536
46014
  {
45537
46015
  kind: "Scalar",
45538
- fieldName: "verdict",
46016
+ fieldName: "scoreMedian",
45539
46017
  arguments: null
45540
46018
  },
45541
46019
  {
45542
46020
  kind: "Scalar",
45543
- fieldName: "verdictReason",
46021
+ fieldName: "scoreMin",
45544
46022
  arguments: null
45545
46023
  }
45546
46024
  ]
@@ -45558,7 +46036,7 @@ var normalizationAst13 = {
45558
46036
  "first",
45559
46037
  {
45560
46038
  kind: "Literal",
45561
- value: 50
46039
+ value: 200
45562
46040
  }
45563
46041
  ]
45564
46042
  ],
@@ -45591,6 +46069,11 @@ var normalizationAst13 = {
45591
46069
  fieldName: "runId",
45592
46070
  arguments: null
45593
46071
  },
46072
+ {
46073
+ kind: "Scalar",
46074
+ fieldName: "scenarioRunId",
46075
+ arguments: null
46076
+ },
45594
46077
  {
45595
46078
  kind: "Scalar",
45596
46079
  fieldName: "status",
@@ -45604,7 +46087,22 @@ var normalizationAst13 = {
45604
46087
  },
45605
46088
  {
45606
46089
  kind: "Scalar",
45607
- fieldName: "scenarioRunId",
46090
+ fieldName: "scenarioDeckId",
46091
+ arguments: null
46092
+ },
46093
+ {
46094
+ kind: "Scalar",
46095
+ fieldName: "scenarioRuns",
46096
+ arguments: null
46097
+ },
46098
+ {
46099
+ kind: "Scalar",
46100
+ fieldName: "scenarioRunsCompleted",
46101
+ arguments: null
46102
+ },
46103
+ {
46104
+ kind: "Scalar",
46105
+ fieldName: "scenarioRunsFailed",
45608
46106
  arguments: null
45609
46107
  },
45610
46108
  {
@@ -52591,16 +53089,11 @@ var EntrypointWorkspaceVerifyLiveWrite = iso(`
52591
53089
  field Query.EntrypointWorkspaceVerifyLiveWrite($workspaceId: ID!) {
52592
53090
  workspace(id: $workspaceId) {
52593
53091
  id
52594
- scenarioRuns(first: 50) {
52595
- edges {
52596
- node {
52597
- id
52598
- status
52599
- startedAt
52600
- finishedAt
52601
- error
52602
- }
52603
- }
53092
+ scenarioDecks {
53093
+ id
53094
+ label
53095
+ description
53096
+ path
52604
53097
  }
52605
53098
  verification {
52606
53099
  graderDecks(first: 50) {
@@ -52618,8 +53111,10 @@ var EntrypointWorkspaceVerifyLiveWrite = iso(`
52618
53111
  node {
52619
53112
  id
52620
53113
  workspaceId
53114
+ scenarioDeckId
52621
53115
  graderId
52622
- scenarioRunId
53116
+ scenarioRuns
53117
+ graderRepeatsPerScenario
52623
53118
  status
52624
53119
  startedAt
52625
53120
  finishedAt
@@ -52627,10 +53122,13 @@ var EntrypointWorkspaceVerifyLiveWrite = iso(`
52627
53122
  active
52628
53123
  completed
52629
53124
  failed
52630
- requests(first: 50) {
53125
+ scenarioRunsCompleted
53126
+ scenarioRunsFailed
53127
+ requests(first: 200) {
52631
53128
  edges {
52632
53129
  node {
52633
53130
  id
53131
+ scenarioRunId
52634
53132
  status
52635
53133
  runId
52636
53134
  error
@@ -52638,31 +53136,48 @@ var EntrypointWorkspaceVerifyLiveWrite = iso(`
52638
53136
  }
52639
53137
  }
52640
53138
  metrics {
52641
- sampleSize
52642
- agreementRate
52643
- scoreSpreadMin
52644
- scoreSpreadMedian
52645
- scoreSpreadMax
52646
- instabilityCount
52647
- verdict
52648
- verdictReason
52649
- outliers(first: 25) {
53139
+ scenarioRunCountRequested
53140
+ scenarioRunCountCompleted
53141
+ scenarioRunCountFailed
53142
+ gradeSampleCountRequested
53143
+ gradeSampleCountCompleted
53144
+ gradeSampleCountFailed
53145
+ executionFailureCount
53146
+ gradingFailureCount
53147
+ passRate
53148
+ scoreMin
53149
+ scoreMedian
53150
+ scoreMax
53151
+ scoreMean
53152
+ outlierScenarioRuns(first: 25) {
52650
53153
  edges {
52651
53154
  node {
52652
53155
  key
52653
- label
52654
- sampleSize
52655
- agreementRate
52656
- scoreDelta
52657
- passFlip
52658
- instability
53156
+ scenarioRunId
53157
+ gradeSampleCount
53158
+ completedSampleCount
53159
+ executionFailureCount
53160
+ gradingFailureCount
53161
+ averageScore
53162
+ minScore
53163
+ maxScore
53164
+ failed
52659
53165
  minRunId
52660
53166
  maxRunId
52661
- turnIndex
52662
53167
  messageRefId
52663
53168
  }
52664
53169
  }
52665
53170
  }
53171
+ failureReasons(first: 25) {
53172
+ edges {
53173
+ node {
53174
+ key
53175
+ kind
53176
+ reason
53177
+ count
53178
+ }
53179
+ }
53180
+ }
52666
53181
  }
52667
53182
  }
52668
53183
  }
@@ -52705,77 +53220,40 @@ var readerAst35 = [
52705
53220
  },
52706
53221
  {
52707
53222
  kind: "Linked",
52708
- fieldName: "scenarioRuns",
53223
+ fieldName: "scenarioDecks",
52709
53224
  alias: null,
52710
- arguments: [
52711
- [
52712
- "first",
52713
- {
52714
- kind: "Literal",
52715
- value: 50
52716
- }
52717
- ]
52718
- ],
53225
+ arguments: null,
52719
53226
  condition: null,
52720
53227
  isUpdatable: false,
52721
53228
  refetchQueryIndex: null,
52722
53229
  selections: [
52723
53230
  {
52724
- kind: "Linked",
52725
- fieldName: "edges",
53231
+ kind: "Scalar",
53232
+ fieldName: "id",
52726
53233
  alias: null,
52727
53234
  arguments: null,
52728
- condition: null,
52729
- isUpdatable: false,
52730
- refetchQueryIndex: null,
52731
- selections: [
52732
- {
52733
- kind: "Linked",
52734
- fieldName: "node",
52735
- alias: null,
52736
- arguments: null,
52737
- condition: null,
52738
- isUpdatable: false,
52739
- refetchQueryIndex: null,
52740
- selections: [
52741
- {
52742
- kind: "Scalar",
52743
- fieldName: "id",
52744
- alias: null,
52745
- arguments: null,
52746
- isUpdatable: false
52747
- },
52748
- {
52749
- kind: "Scalar",
52750
- fieldName: "status",
52751
- alias: null,
52752
- arguments: null,
52753
- isUpdatable: false
52754
- },
52755
- {
52756
- kind: "Scalar",
52757
- fieldName: "startedAt",
52758
- alias: null,
52759
- arguments: null,
52760
- isUpdatable: false
52761
- },
52762
- {
52763
- kind: "Scalar",
52764
- fieldName: "finishedAt",
52765
- alias: null,
52766
- arguments: null,
52767
- isUpdatable: false
52768
- },
52769
- {
52770
- kind: "Scalar",
52771
- fieldName: "error",
52772
- alias: null,
52773
- arguments: null,
52774
- isUpdatable: false
52775
- }
52776
- ]
52777
- }
52778
- ]
53235
+ isUpdatable: false
53236
+ },
53237
+ {
53238
+ kind: "Scalar",
53239
+ fieldName: "label",
53240
+ alias: null,
53241
+ arguments: null,
53242
+ isUpdatable: false
53243
+ },
53244
+ {
53245
+ kind: "Scalar",
53246
+ fieldName: "description",
53247
+ alias: null,
53248
+ arguments: null,
53249
+ isUpdatable: false
53250
+ },
53251
+ {
53252
+ kind: "Scalar",
53253
+ fieldName: "path",
53254
+ alias: null,
53255
+ arguments: null,
53256
+ isUpdatable: false
52779
53257
  }
52780
53258
  ]
52781
53259
  },
@@ -52906,6 +53384,13 @@ var readerAst35 = [
52906
53384
  arguments: null,
52907
53385
  isUpdatable: false
52908
53386
  },
53387
+ {
53388
+ kind: "Scalar",
53389
+ fieldName: "scenarioDeckId",
53390
+ alias: null,
53391
+ arguments: null,
53392
+ isUpdatable: false
53393
+ },
52909
53394
  {
52910
53395
  kind: "Scalar",
52911
53396
  fieldName: "graderId",
@@ -52915,7 +53400,14 @@ var readerAst35 = [
52915
53400
  },
52916
53401
  {
52917
53402
  kind: "Scalar",
52918
- fieldName: "scenarioRunId",
53403
+ fieldName: "scenarioRuns",
53404
+ alias: null,
53405
+ arguments: null,
53406
+ isUpdatable: false
53407
+ },
53408
+ {
53409
+ kind: "Scalar",
53410
+ fieldName: "graderRepeatsPerScenario",
52919
53411
  alias: null,
52920
53412
  arguments: null,
52921
53413
  isUpdatable: false
@@ -52969,6 +53461,20 @@ var readerAst35 = [
52969
53461
  arguments: null,
52970
53462
  isUpdatable: false
52971
53463
  },
53464
+ {
53465
+ kind: "Scalar",
53466
+ fieldName: "scenarioRunsCompleted",
53467
+ alias: null,
53468
+ arguments: null,
53469
+ isUpdatable: false
53470
+ },
53471
+ {
53472
+ kind: "Scalar",
53473
+ fieldName: "scenarioRunsFailed",
53474
+ alias: null,
53475
+ arguments: null,
53476
+ isUpdatable: false
53477
+ },
52972
53478
  {
52973
53479
  kind: "Linked",
52974
53480
  fieldName: "requests",
@@ -52978,7 +53484,7 @@ var readerAst35 = [
52978
53484
  "first",
52979
53485
  {
52980
53486
  kind: "Literal",
52981
- value: 50
53487
+ value: 200
52982
53488
  }
52983
53489
  ]
52984
53490
  ],
@@ -53011,6 +53517,13 @@ var readerAst35 = [
53011
53517
  arguments: null,
53012
53518
  isUpdatable: false
53013
53519
  },
53520
+ {
53521
+ kind: "Scalar",
53522
+ fieldName: "scenarioRunId",
53523
+ alias: null,
53524
+ arguments: null,
53525
+ isUpdatable: false
53526
+ },
53014
53527
  {
53015
53528
  kind: "Scalar",
53016
53529
  fieldName: "status",
@@ -53049,63 +53562,98 @@ var readerAst35 = [
53049
53562
  selections: [
53050
53563
  {
53051
53564
  kind: "Scalar",
53052
- fieldName: "sampleSize",
53565
+ fieldName: "scenarioRunCountRequested",
53566
+ alias: null,
53567
+ arguments: null,
53568
+ isUpdatable: false
53569
+ },
53570
+ {
53571
+ kind: "Scalar",
53572
+ fieldName: "scenarioRunCountCompleted",
53573
+ alias: null,
53574
+ arguments: null,
53575
+ isUpdatable: false
53576
+ },
53577
+ {
53578
+ kind: "Scalar",
53579
+ fieldName: "scenarioRunCountFailed",
53580
+ alias: null,
53581
+ arguments: null,
53582
+ isUpdatable: false
53583
+ },
53584
+ {
53585
+ kind: "Scalar",
53586
+ fieldName: "gradeSampleCountRequested",
53587
+ alias: null,
53588
+ arguments: null,
53589
+ isUpdatable: false
53590
+ },
53591
+ {
53592
+ kind: "Scalar",
53593
+ fieldName: "gradeSampleCountCompleted",
53053
53594
  alias: null,
53054
53595
  arguments: null,
53055
53596
  isUpdatable: false
53056
53597
  },
53057
53598
  {
53058
53599
  kind: "Scalar",
53059
- fieldName: "agreementRate",
53600
+ fieldName: "gradeSampleCountFailed",
53060
53601
  alias: null,
53061
53602
  arguments: null,
53062
53603
  isUpdatable: false
53063
53604
  },
53064
53605
  {
53065
53606
  kind: "Scalar",
53066
- fieldName: "scoreSpreadMin",
53607
+ fieldName: "executionFailureCount",
53067
53608
  alias: null,
53068
53609
  arguments: null,
53069
53610
  isUpdatable: false
53070
53611
  },
53071
53612
  {
53072
53613
  kind: "Scalar",
53073
- fieldName: "scoreSpreadMedian",
53614
+ fieldName: "gradingFailureCount",
53074
53615
  alias: null,
53075
53616
  arguments: null,
53076
53617
  isUpdatable: false
53077
53618
  },
53078
53619
  {
53079
53620
  kind: "Scalar",
53080
- fieldName: "scoreSpreadMax",
53621
+ fieldName: "passRate",
53081
53622
  alias: null,
53082
53623
  arguments: null,
53083
53624
  isUpdatable: false
53084
53625
  },
53085
53626
  {
53086
53627
  kind: "Scalar",
53087
- fieldName: "instabilityCount",
53628
+ fieldName: "scoreMin",
53088
53629
  alias: null,
53089
53630
  arguments: null,
53090
53631
  isUpdatable: false
53091
53632
  },
53092
53633
  {
53093
53634
  kind: "Scalar",
53094
- fieldName: "verdict",
53635
+ fieldName: "scoreMedian",
53095
53636
  alias: null,
53096
53637
  arguments: null,
53097
53638
  isUpdatable: false
53098
53639
  },
53099
53640
  {
53100
53641
  kind: "Scalar",
53101
- fieldName: "verdictReason",
53642
+ fieldName: "scoreMax",
53643
+ alias: null,
53644
+ arguments: null,
53645
+ isUpdatable: false
53646
+ },
53647
+ {
53648
+ kind: "Scalar",
53649
+ fieldName: "scoreMean",
53102
53650
  alias: null,
53103
53651
  arguments: null,
53104
53652
  isUpdatable: false
53105
53653
  },
53106
53654
  {
53107
53655
  kind: "Linked",
53108
- fieldName: "outliers",
53656
+ fieldName: "outlierScenarioRuns",
53109
53657
  alias: null,
53110
53658
  arguments: [
53111
53659
  [
@@ -53147,42 +53695,63 @@ var readerAst35 = [
53147
53695
  },
53148
53696
  {
53149
53697
  kind: "Scalar",
53150
- fieldName: "label",
53698
+ fieldName: "scenarioRunId",
53699
+ alias: null,
53700
+ arguments: null,
53701
+ isUpdatable: false
53702
+ },
53703
+ {
53704
+ kind: "Scalar",
53705
+ fieldName: "gradeSampleCount",
53706
+ alias: null,
53707
+ arguments: null,
53708
+ isUpdatable: false
53709
+ },
53710
+ {
53711
+ kind: "Scalar",
53712
+ fieldName: "completedSampleCount",
53713
+ alias: null,
53714
+ arguments: null,
53715
+ isUpdatable: false
53716
+ },
53717
+ {
53718
+ kind: "Scalar",
53719
+ fieldName: "executionFailureCount",
53151
53720
  alias: null,
53152
53721
  arguments: null,
53153
53722
  isUpdatable: false
53154
53723
  },
53155
53724
  {
53156
53725
  kind: "Scalar",
53157
- fieldName: "sampleSize",
53726
+ fieldName: "gradingFailureCount",
53158
53727
  alias: null,
53159
53728
  arguments: null,
53160
53729
  isUpdatable: false
53161
53730
  },
53162
53731
  {
53163
53732
  kind: "Scalar",
53164
- fieldName: "agreementRate",
53733
+ fieldName: "averageScore",
53165
53734
  alias: null,
53166
53735
  arguments: null,
53167
53736
  isUpdatable: false
53168
53737
  },
53169
53738
  {
53170
53739
  kind: "Scalar",
53171
- fieldName: "scoreDelta",
53740
+ fieldName: "minScore",
53172
53741
  alias: null,
53173
53742
  arguments: null,
53174
53743
  isUpdatable: false
53175
53744
  },
53176
53745
  {
53177
53746
  kind: "Scalar",
53178
- fieldName: "passFlip",
53747
+ fieldName: "maxScore",
53179
53748
  alias: null,
53180
53749
  arguments: null,
53181
53750
  isUpdatable: false
53182
53751
  },
53183
53752
  {
53184
53753
  kind: "Scalar",
53185
- fieldName: "instability",
53754
+ fieldName: "failed",
53186
53755
  alias: null,
53187
53756
  arguments: null,
53188
53757
  isUpdatable: false
@@ -53203,14 +53772,76 @@ var readerAst35 = [
53203
53772
  },
53204
53773
  {
53205
53774
  kind: "Scalar",
53206
- fieldName: "turnIndex",
53775
+ fieldName: "messageRefId",
53776
+ alias: null,
53777
+ arguments: null,
53778
+ isUpdatable: false
53779
+ }
53780
+ ]
53781
+ }
53782
+ ]
53783
+ }
53784
+ ]
53785
+ },
53786
+ {
53787
+ kind: "Linked",
53788
+ fieldName: "failureReasons",
53789
+ alias: null,
53790
+ arguments: [
53791
+ [
53792
+ "first",
53793
+ {
53794
+ kind: "Literal",
53795
+ value: 25
53796
+ }
53797
+ ]
53798
+ ],
53799
+ condition: null,
53800
+ isUpdatable: false,
53801
+ refetchQueryIndex: null,
53802
+ selections: [
53803
+ {
53804
+ kind: "Linked",
53805
+ fieldName: "edges",
53806
+ alias: null,
53807
+ arguments: null,
53808
+ condition: null,
53809
+ isUpdatable: false,
53810
+ refetchQueryIndex: null,
53811
+ selections: [
53812
+ {
53813
+ kind: "Linked",
53814
+ fieldName: "node",
53815
+ alias: null,
53816
+ arguments: null,
53817
+ condition: null,
53818
+ isUpdatable: false,
53819
+ refetchQueryIndex: null,
53820
+ selections: [
53821
+ {
53822
+ kind: "Scalar",
53823
+ fieldName: "key",
53824
+ alias: null,
53825
+ arguments: null,
53826
+ isUpdatable: false
53827
+ },
53828
+ {
53829
+ kind: "Scalar",
53830
+ fieldName: "kind",
53207
53831
  alias: null,
53208
53832
  arguments: null,
53209
53833
  isUpdatable: false
53210
53834
  },
53211
53835
  {
53212
53836
  kind: "Scalar",
53213
- fieldName: "messageRefId",
53837
+ fieldName: "reason",
53838
+ alias: null,
53839
+ arguments: null,
53840
+ isUpdatable: false
53841
+ },
53842
+ {
53843
+ kind: "Scalar",
53844
+ fieldName: "count",
53214
53845
  alias: null,
53215
53846
  arguments: null,
53216
53847
  isUpdatable: false
@@ -53244,7 +53875,7 @@ var artifact57 = {
53244
53875
  var resolver_reader_default35 = artifact57;
53245
53876
 
53246
53877
  // simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/query_text.ts
53247
- var query_text_default23 = "query EntrypointWorkspaceVerifyLiveWrite($workspaceId: ID!) { workspace____id___v_workspaceId: workspace(id: $workspaceId) { id, scenarioRuns____first___l_50: scenarioRuns(first: 50) { edges { node { __typename, id, error, finishedAt, startedAt, status, }, }, }, verification { batches____first___l_50: batches(first: 50) { edges { node { id, active, completed, failed, finishedAt, graderId, metrics { agreementRate, instabilityCount, outliers____first___l_25: outliers(first: 25) { edges { node { agreementRate, instability, key, label, maxRunId, messageRefId, minRunId, passFlip, sampleSize, scoreDelta, turnIndex, }, }, }, sampleSize, scoreSpreadMax, scoreSpreadMedian, scoreSpreadMin, verdict, verdictReason, }, requested, requests____first___l_50: requests(first: 50) { edges { node { id, error, runId, status, }, }, }, scenarioRunId, startedAt, status, workspaceId, }, }, }, graderDecks____first___l_50: graderDecks(first: 50) { edges { node { id, description, label, path, }, }, }, }, },}";
53878
+ var query_text_default23 = "query EntrypointWorkspaceVerifyLiveWrite($workspaceId: ID!) { workspace____id___v_workspaceId: workspace(id: $workspaceId) { id, scenarioDecks { id, description, label, path, }, verification { batches____first___l_50: batches(first: 50) { edges { node { id, active, completed, failed, finishedAt, graderId, graderRepeatsPerScenario, metrics { executionFailureCount, failureReasons____first___l_25: failureReasons(first: 25) { edges { node { count, key, kind, reason, }, }, }, gradeSampleCountCompleted, gradeSampleCountFailed, gradeSampleCountRequested, gradingFailureCount, outlierScenarioRuns____first___l_25: outlierScenarioRuns(first: 25) { edges { node { averageScore, completedSampleCount, executionFailureCount, failed, gradeSampleCount, gradingFailureCount, key, maxRunId, maxScore, messageRefId, minRunId, minScore, scenarioRunId, }, }, }, passRate, scenarioRunCountCompleted, scenarioRunCountFailed, scenarioRunCountRequested, scoreMax, scoreMean, scoreMedian, scoreMin, }, requested, requests____first___l_200: requests(first: 200) { edges { node { id, error, runId, scenarioRunId, status, }, }, }, scenarioDeckId, scenarioRuns, scenarioRunsCompleted, scenarioRunsFailed, startedAt, status, workspaceId, }, }, }, graderDecks____first___l_50: graderDecks(first: 50) { edges { node { id, description, label, path, }, }, }, }, },}";
53248
53879
 
53249
53880
  // simulator-ui/__generated__/__isograph/Query/EntrypointWorkspaceVerifyLiveWrite/normalization_ast.ts
53250
53881
  var normalizationAst23 = {
@@ -53271,63 +53902,29 @@ var normalizationAst23 = {
53271
53902
  },
53272
53903
  {
53273
53904
  kind: "Linked",
53274
- fieldName: "scenarioRuns",
53275
- arguments: [
53276
- [
53277
- "first",
53278
- {
53279
- kind: "Literal",
53280
- value: 50
53281
- }
53282
- ]
53283
- ],
53284
- concreteType: "WorkspaceScenarioRunsConnection",
53905
+ fieldName: "scenarioDecks",
53906
+ arguments: null,
53907
+ concreteType: "WorkspaceScenarioDeck",
53285
53908
  selections: [
53286
53909
  {
53287
- kind: "Linked",
53288
- fieldName: "edges",
53289
- arguments: null,
53290
- concreteType: "WorkspaceScenarioRunsConnectionEdge",
53291
- selections: [
53292
- {
53293
- kind: "Linked",
53294
- fieldName: "node",
53295
- arguments: null,
53296
- concreteType: null,
53297
- selections: [
53298
- {
53299
- kind: "Scalar",
53300
- fieldName: "__typename",
53301
- arguments: null
53302
- },
53303
- {
53304
- kind: "Scalar",
53305
- fieldName: "id",
53306
- arguments: null
53307
- },
53308
- {
53309
- kind: "Scalar",
53310
- fieldName: "error",
53311
- arguments: null
53312
- },
53313
- {
53314
- kind: "Scalar",
53315
- fieldName: "finishedAt",
53316
- arguments: null
53317
- },
53318
- {
53319
- kind: "Scalar",
53320
- fieldName: "startedAt",
53321
- arguments: null
53322
- },
53323
- {
53324
- kind: "Scalar",
53325
- fieldName: "status",
53326
- arguments: null
53327
- }
53328
- ]
53329
- }
53330
- ]
53910
+ kind: "Scalar",
53911
+ fieldName: "id",
53912
+ arguments: null
53913
+ },
53914
+ {
53915
+ kind: "Scalar",
53916
+ fieldName: "description",
53917
+ arguments: null
53918
+ },
53919
+ {
53920
+ kind: "Scalar",
53921
+ fieldName: "label",
53922
+ arguments: null
53923
+ },
53924
+ {
53925
+ kind: "Scalar",
53926
+ fieldName: "path",
53927
+ arguments: null
53331
53928
  }
53332
53929
  ]
53333
53930
  },
@@ -53393,6 +53990,11 @@ var normalizationAst23 = {
53393
53990
  fieldName: "graderId",
53394
53991
  arguments: null
53395
53992
  },
53993
+ {
53994
+ kind: "Scalar",
53995
+ fieldName: "graderRepeatsPerScenario",
53996
+ arguments: null
53997
+ },
53396
53998
  {
53397
53999
  kind: "Linked",
53398
54000
  fieldName: "metrics",
@@ -53401,17 +54003,84 @@ var normalizationAst23 = {
53401
54003
  selections: [
53402
54004
  {
53403
54005
  kind: "Scalar",
53404
- fieldName: "agreementRate",
54006
+ fieldName: "executionFailureCount",
54007
+ arguments: null
54008
+ },
54009
+ {
54010
+ kind: "Linked",
54011
+ fieldName: "failureReasons",
54012
+ arguments: [
54013
+ [
54014
+ "first",
54015
+ {
54016
+ kind: "Literal",
54017
+ value: 25
54018
+ }
54019
+ ]
54020
+ ],
54021
+ concreteType: "WorkspaceVerifyMetricsFailureReasonsConnection",
54022
+ selections: [
54023
+ {
54024
+ kind: "Linked",
54025
+ fieldName: "edges",
54026
+ arguments: null,
54027
+ concreteType: "WorkspaceVerifyMetricsFailureReasonsConnectionEdge",
54028
+ selections: [
54029
+ {
54030
+ kind: "Linked",
54031
+ fieldName: "node",
54032
+ arguments: null,
54033
+ concreteType: "WorkspaceVerifyFailureReasonGroup",
54034
+ selections: [
54035
+ {
54036
+ kind: "Scalar",
54037
+ fieldName: "count",
54038
+ arguments: null
54039
+ },
54040
+ {
54041
+ kind: "Scalar",
54042
+ fieldName: "key",
54043
+ arguments: null
54044
+ },
54045
+ {
54046
+ kind: "Scalar",
54047
+ fieldName: "kind",
54048
+ arguments: null
54049
+ },
54050
+ {
54051
+ kind: "Scalar",
54052
+ fieldName: "reason",
54053
+ arguments: null
54054
+ }
54055
+ ]
54056
+ }
54057
+ ]
54058
+ }
54059
+ ]
54060
+ },
54061
+ {
54062
+ kind: "Scalar",
54063
+ fieldName: "gradeSampleCountCompleted",
54064
+ arguments: null
54065
+ },
54066
+ {
54067
+ kind: "Scalar",
54068
+ fieldName: "gradeSampleCountFailed",
54069
+ arguments: null
54070
+ },
54071
+ {
54072
+ kind: "Scalar",
54073
+ fieldName: "gradeSampleCountRequested",
53405
54074
  arguments: null
53406
54075
  },
53407
54076
  {
53408
54077
  kind: "Scalar",
53409
- fieldName: "instabilityCount",
54078
+ fieldName: "gradingFailureCount",
53410
54079
  arguments: null
53411
54080
  },
53412
54081
  {
53413
54082
  kind: "Linked",
53414
- fieldName: "outliers",
54083
+ fieldName: "outlierScenarioRuns",
53415
54084
  arguments: [
53416
54085
  [
53417
54086
  "first",
@@ -53421,73 +54090,83 @@ var normalizationAst23 = {
53421
54090
  }
53422
54091
  ]
53423
54092
  ],
53424
- concreteType: "WorkspaceVerifyMetricsOutliersConnection",
54093
+ concreteType: "WorkspaceVerifyMetricsOutlierScenarioRunsConnection",
53425
54094
  selections: [
53426
54095
  {
53427
54096
  kind: "Linked",
53428
54097
  fieldName: "edges",
53429
54098
  arguments: null,
53430
- concreteType: "WorkspaceVerifyMetricsOutliersConnectionEdge",
54099
+ concreteType: "WorkspaceVerifyMetricsOutlierScenarioRunsConnectionEdge",
53431
54100
  selections: [
53432
54101
  {
53433
54102
  kind: "Linked",
53434
54103
  fieldName: "node",
53435
54104
  arguments: null,
53436
- concreteType: "WorkspaceVerifyOutlier",
54105
+ concreteType: "WorkspaceVerifyScenarioOutlier",
53437
54106
  selections: [
53438
54107
  {
53439
54108
  kind: "Scalar",
53440
- fieldName: "agreementRate",
54109
+ fieldName: "averageScore",
53441
54110
  arguments: null
53442
54111
  },
53443
54112
  {
53444
54113
  kind: "Scalar",
53445
- fieldName: "instability",
54114
+ fieldName: "completedSampleCount",
53446
54115
  arguments: null
53447
54116
  },
53448
54117
  {
53449
54118
  kind: "Scalar",
53450
- fieldName: "key",
54119
+ fieldName: "executionFailureCount",
53451
54120
  arguments: null
53452
54121
  },
53453
54122
  {
53454
54123
  kind: "Scalar",
53455
- fieldName: "label",
54124
+ fieldName: "failed",
53456
54125
  arguments: null
53457
54126
  },
53458
54127
  {
53459
54128
  kind: "Scalar",
53460
- fieldName: "maxRunId",
54129
+ fieldName: "gradeSampleCount",
53461
54130
  arguments: null
53462
54131
  },
53463
54132
  {
53464
54133
  kind: "Scalar",
53465
- fieldName: "messageRefId",
54134
+ fieldName: "gradingFailureCount",
53466
54135
  arguments: null
53467
54136
  },
53468
54137
  {
53469
54138
  kind: "Scalar",
53470
- fieldName: "minRunId",
54139
+ fieldName: "key",
54140
+ arguments: null
54141
+ },
54142
+ {
54143
+ kind: "Scalar",
54144
+ fieldName: "maxRunId",
53471
54145
  arguments: null
53472
54146
  },
53473
54147
  {
53474
54148
  kind: "Scalar",
53475
- fieldName: "passFlip",
54149
+ fieldName: "maxScore",
53476
54150
  arguments: null
53477
54151
  },
53478
54152
  {
53479
54153
  kind: "Scalar",
53480
- fieldName: "sampleSize",
54154
+ fieldName: "messageRefId",
54155
+ arguments: null
54156
+ },
54157
+ {
54158
+ kind: "Scalar",
54159
+ fieldName: "minRunId",
53481
54160
  arguments: null
53482
54161
  },
53483
54162
  {
53484
54163
  kind: "Scalar",
53485
- fieldName: "scoreDelta",
54164
+ fieldName: "minScore",
53486
54165
  arguments: null
53487
54166
  },
53488
54167
  {
53489
54168
  kind: "Scalar",
53490
- fieldName: "turnIndex",
54169
+ fieldName: "scenarioRunId",
53491
54170
  arguments: null
53492
54171
  }
53493
54172
  ]
@@ -53498,32 +54177,42 @@ var normalizationAst23 = {
53498
54177
  },
53499
54178
  {
53500
54179
  kind: "Scalar",
53501
- fieldName: "sampleSize",
54180
+ fieldName: "passRate",
54181
+ arguments: null
54182
+ },
54183
+ {
54184
+ kind: "Scalar",
54185
+ fieldName: "scenarioRunCountCompleted",
54186
+ arguments: null
54187
+ },
54188
+ {
54189
+ kind: "Scalar",
54190
+ fieldName: "scenarioRunCountFailed",
53502
54191
  arguments: null
53503
54192
  },
53504
54193
  {
53505
54194
  kind: "Scalar",
53506
- fieldName: "scoreSpreadMax",
54195
+ fieldName: "scenarioRunCountRequested",
53507
54196
  arguments: null
53508
54197
  },
53509
54198
  {
53510
54199
  kind: "Scalar",
53511
- fieldName: "scoreSpreadMedian",
54200
+ fieldName: "scoreMax",
53512
54201
  arguments: null
53513
54202
  },
53514
54203
  {
53515
54204
  kind: "Scalar",
53516
- fieldName: "scoreSpreadMin",
54205
+ fieldName: "scoreMean",
53517
54206
  arguments: null
53518
54207
  },
53519
54208
  {
53520
54209
  kind: "Scalar",
53521
- fieldName: "verdict",
54210
+ fieldName: "scoreMedian",
53522
54211
  arguments: null
53523
54212
  },
53524
54213
  {
53525
54214
  kind: "Scalar",
53526
- fieldName: "verdictReason",
54215
+ fieldName: "scoreMin",
53527
54216
  arguments: null
53528
54217
  }
53529
54218
  ]
@@ -53541,7 +54230,7 @@ var normalizationAst23 = {
53541
54230
  "first",
53542
54231
  {
53543
54232
  kind: "Literal",
53544
- value: 50
54233
+ value: 200
53545
54234
  }
53546
54235
  ]
53547
54236
  ],
@@ -53574,6 +54263,11 @@ var normalizationAst23 = {
53574
54263
  fieldName: "runId",
53575
54264
  arguments: null
53576
54265
  },
54266
+ {
54267
+ kind: "Scalar",
54268
+ fieldName: "scenarioRunId",
54269
+ arguments: null
54270
+ },
53577
54271
  {
53578
54272
  kind: "Scalar",
53579
54273
  fieldName: "status",
@@ -53587,7 +54281,22 @@ var normalizationAst23 = {
53587
54281
  },
53588
54282
  {
53589
54283
  kind: "Scalar",
53590
- fieldName: "scenarioRunId",
54284
+ fieldName: "scenarioDeckId",
54285
+ arguments: null
54286
+ },
54287
+ {
54288
+ kind: "Scalar",
54289
+ fieldName: "scenarioRuns",
54290
+ arguments: null
54291
+ },
54292
+ {
54293
+ kind: "Scalar",
54294
+ fieldName: "scenarioRunsCompleted",
54295
+ arguments: null
54296
+ },
54297
+ {
54298
+ kind: "Scalar",
54299
+ fieldName: "scenarioRunsFailed",
53591
54300
  arguments: null
53592
54301
  },
53593
54302
  {
@@ -53694,7 +54403,7 @@ var entrypoint_default23 = artifact58;
53694
54403
 
53695
54404
  // simulator-ui/subscriptions/__generated__/workspaceVerifyLiveSubscriptionQuery.ts
53696
54405
  var WORKSPACE_VERIFY_LIVE_WRITE_ROOT_KEY = "workspace____id___v_workspaceId";
53697
- var WORKSPACE_VERIFY_LIVE_SUBSCRIPTION_QUERY = "subscription WorkspaceVerifyLiveSubscription($workspaceId: ID!, $fromOffset: Int) {\n workspaceVerifyLive(workspaceId: $workspaceId, fromOffset: $fromOffset) {\n cursor\n sourceOffset\n occurredAt\n node {\n id\n scenarioRuns____first___l_50: scenarioRuns(first: 50) {\n edges {\n node {\n __typename\n id\n error\n finishedAt\n startedAt\n status\n }\n }\n }\n verification {\n batches____first___l_50: batches(first: 50) {\n edges {\n node {\n id\n active\n completed\n failed\n finishedAt\n graderId\n metrics {\n agreementRate\n instabilityCount\n outliers____first___l_25: outliers(first: 25) {\n edges {\n node {\n agreementRate\n instability\n key\n label\n maxRunId\n messageRefId\n minRunId\n passFlip\n sampleSize\n scoreDelta\n turnIndex\n }\n }\n }\n sampleSize\n scoreSpreadMax\n scoreSpreadMedian\n scoreSpreadMin\n verdict\n verdictReason\n }\n requested\n requests____first___l_50: requests(first: 50) {\n edges {\n node {\n id\n error\n runId\n status\n }\n }\n }\n scenarioRunId\n startedAt\n status\n workspaceId\n }\n }\n }\n graderDecks____first___l_50: graderDecks(first: 50) {\n edges {\n node {\n id\n description\n label\n path\n }\n }\n }\n }\n }\n }\n}\n";
54406
+ var WORKSPACE_VERIFY_LIVE_SUBSCRIPTION_QUERY = "subscription WorkspaceVerifyLiveSubscription($workspaceId: ID!, $fromOffset: Int) {\n workspaceVerifyLive(workspaceId: $workspaceId, fromOffset: $fromOffset) {\n cursor\n sourceOffset\n occurredAt\n node {\n id\n scenarioDecks {\n id\n description\n label\n path\n }\n verification {\n batches____first___l_50: batches(first: 50) {\n edges {\n node {\n id\n active\n completed\n failed\n finishedAt\n graderId\n graderRepeatsPerScenario\n metrics {\n executionFailureCount\n failureReasons____first___l_25: failureReasons(first: 25) {\n edges {\n node {\n count\n key\n kind\n reason\n }\n }\n }\n gradeSampleCountCompleted\n gradeSampleCountFailed\n gradeSampleCountRequested\n gradingFailureCount\n outlierScenarioRuns____first___l_25: outlierScenarioRuns(first: 25) {\n edges {\n node {\n averageScore\n completedSampleCount\n executionFailureCount\n failed\n gradeSampleCount\n gradingFailureCount\n key\n maxRunId\n maxScore\n messageRefId\n minRunId\n minScore\n scenarioRunId\n }\n }\n }\n passRate\n scenarioRunCountCompleted\n scenarioRunCountFailed\n scenarioRunCountRequested\n scoreMax\n scoreMean\n scoreMedian\n scoreMin\n }\n requested\n requests____first___l_200: requests(first: 200) {\n edges {\n node {\n id\n error\n runId\n scenarioRunId\n status\n }\n }\n }\n scenarioDeckId\n scenarioRuns\n scenarioRunsCompleted\n scenarioRunsFailed\n startedAt\n status\n workspaceId\n }\n }\n }\n graderDecks____first___l_50: graderDecks(first: 50) {\n edges {\n node {\n id\n description\n label\n path\n }\n }\n }\n }\n }\n }\n}\n";
53698
54407
 
53699
54408
  // simulator-ui/subscriptions/GambitWorkspaceVerifyLiveSubscription.ts
53700
54409
  var WORKSPACE_ROOT_KEY4 = WORKSPACE_VERIFY_LIVE_WRITE_ROOT_KEY;
@@ -53727,12 +54436,36 @@ var gambitWorkspaceVerifyLiveSubscription = defineGambitSubscription({
53727
54436
  });
53728
54437
  var GambitWorkspaceVerifyLiveSubscription_default = gambitWorkspaceVerifyLiveSubscription;
53729
54438
 
54439
+ // simulator-ui/src/verify_unified.ts
54440
+ var VERIFY_LIMITS = {
54441
+ scenarioRunsMax: 24,
54442
+ graderRepeatsMax: 24,
54443
+ concurrencyMax: 6
54444
+ };
54445
+ var VERIFY_DEFAULTS = {
54446
+ scenarioRuns: 10,
54447
+ graderRepeatsPerScenario: 10,
54448
+ concurrency: 4
54449
+ };
54450
+ function sortVerifyOutlierScenarioRuns(rows) {
54451
+ return [
54452
+ ...rows
54453
+ ].sort((left, right) => {
54454
+ if (left.failed !== right.failed) return left.failed ? -1 : 1;
54455
+ const leftScore = typeof left.averageScore === "number" ? left.averageScore : Number.POSITIVE_INFINITY;
54456
+ const rightScore = typeof right.averageScore === "number" ? right.averageScore : Number.POSITIVE_INFINITY;
54457
+ if (leftScore !== rightScore) return leftScore - rightScore;
54458
+ return left.scenarioRunId.localeCompare(right.scenarioRunId);
54459
+ });
54460
+ }
54461
+
53730
54462
  // simulator-ui/isograph/components/Query/SimulatorVerifyPage.tsx
53731
- var MAX_BATCH_SIZE2 = 24;
53732
- var MAX_BATCH_CONCURRENCY2 = 6;
53733
- var DEFAULT_BATCH_SIZE2 = 8;
53734
- var DEFAULT_BATCH_CONCURRENCY2 = 3;
53735
- var NO_SCENARIO_RUN_VALUE2 = "__workspace_context__";
54463
+ var MAX_SCENARIO_RUNS = VERIFY_LIMITS.scenarioRunsMax;
54464
+ var MAX_GRADER_REPEATS = VERIFY_LIMITS.graderRepeatsMax;
54465
+ var MAX_BATCH_CONCURRENCY2 = VERIFY_LIMITS.concurrencyMax;
54466
+ var DEFAULT_SCENARIO_RUNS = VERIFY_DEFAULTS.scenarioRuns;
54467
+ var DEFAULT_GRADER_REPEATS = VERIFY_DEFAULTS.graderRepeatsPerScenario;
54468
+ var DEFAULT_BATCH_CONCURRENCY2 = VERIFY_DEFAULTS.concurrency;
53736
54469
  function getRoutePrefix2(path) {
53737
54470
  return path === "/isograph" || path.startsWith("/isograph/") ? "/isograph" : "";
53738
54471
  }
@@ -53754,19 +54487,18 @@ function clampInt2(value, min, max) {
53754
54487
  const rounded = Number.isFinite(value) ? Math.round(value) : min;
53755
54488
  return Math.max(min, Math.min(max, rounded));
53756
54489
  }
54490
+ function formatPercent(value) {
54491
+ if (typeof value !== "number" || !Number.isFinite(value)) return "-";
54492
+ return `${Math.round(value * 100)}%`;
54493
+ }
53757
54494
  var SimulatorVerifyPage = iso(`
53758
54495
  field Workspace.VerifyTab @component {
53759
54496
  id
53760
- scenarioRuns(first: 50) {
53761
- edges {
53762
- node {
53763
- id
53764
- status
53765
- startedAt
53766
- finishedAt
53767
- error
53768
- }
53769
- }
54497
+ scenarioDecks {
54498
+ id
54499
+ label
54500
+ description
54501
+ path
53770
54502
  }
53771
54503
  verification {
53772
54504
  graderDecks(first: 50) {
@@ -53784,8 +54516,10 @@ var SimulatorVerifyPage = iso(`
53784
54516
  node {
53785
54517
  id
53786
54518
  workspaceId
54519
+ scenarioDeckId
53787
54520
  graderId
53788
- scenarioRunId
54521
+ scenarioRuns
54522
+ graderRepeatsPerScenario
53789
54523
  status
53790
54524
  startedAt
53791
54525
  finishedAt
@@ -53793,10 +54527,13 @@ var SimulatorVerifyPage = iso(`
53793
54527
  active
53794
54528
  completed
53795
54529
  failed
53796
- requests(first: 50) {
54530
+ scenarioRunsCompleted
54531
+ scenarioRunsFailed
54532
+ requests(first: 200) {
53797
54533
  edges {
53798
54534
  node {
53799
54535
  id
54536
+ scenarioRunId
53800
54537
  status
53801
54538
  runId
53802
54539
  error
@@ -53804,31 +54541,48 @@ var SimulatorVerifyPage = iso(`
53804
54541
  }
53805
54542
  }
53806
54543
  metrics {
53807
- sampleSize
53808
- agreementRate
53809
- scoreSpreadMin
53810
- scoreSpreadMedian
53811
- scoreSpreadMax
53812
- instabilityCount
53813
- verdict
53814
- verdictReason
53815
- outliers(first: 25) {
54544
+ scenarioRunCountRequested
54545
+ scenarioRunCountCompleted
54546
+ scenarioRunCountFailed
54547
+ gradeSampleCountRequested
54548
+ gradeSampleCountCompleted
54549
+ gradeSampleCountFailed
54550
+ executionFailureCount
54551
+ gradingFailureCount
54552
+ passRate
54553
+ scoreMin
54554
+ scoreMedian
54555
+ scoreMax
54556
+ scoreMean
54557
+ outlierScenarioRuns(first: 25) {
53816
54558
  edges {
53817
54559
  node {
53818
54560
  key
53819
- label
53820
- sampleSize
53821
- agreementRate
53822
- scoreDelta
53823
- passFlip
53824
- instability
54561
+ scenarioRunId
54562
+ gradeSampleCount
54563
+ completedSampleCount
54564
+ executionFailureCount
54565
+ gradingFailureCount
54566
+ averageScore
54567
+ minScore
54568
+ maxScore
54569
+ failed
53825
54570
  minRunId
53826
54571
  maxRunId
53827
- turnIndex
53828
54572
  messageRefId
53829
54573
  }
53830
54574
  }
53831
54575
  }
54576
+ failureReasons(first: 25) {
54577
+ edges {
54578
+ node {
54579
+ key
54580
+ kind
54581
+ reason
54582
+ count
54583
+ }
54584
+ }
54585
+ }
53832
54586
  }
53833
54587
  }
53834
54588
  }
@@ -53848,6 +54602,19 @@ var SimulatorVerifyPage = iso(`
53848
54602
  useGambitTypedSubscription(GambitWorkspaceVerifyLiveSubscription_default, workspaceId ? {
53849
54603
  workspaceId
53850
54604
  } : null);
54605
+ const scenarioDecks = (0, import_react65.useMemo)(() => data.scenarioDecks?.flatMap((deck) => {
54606
+ if (!deck?.id || !deck.label) return [];
54607
+ return [
54608
+ {
54609
+ id: deck.id,
54610
+ label: deck.label,
54611
+ description: deck.description ?? null,
54612
+ path: deck.path ?? ""
54613
+ }
54614
+ ];
54615
+ }) ?? [], [
54616
+ data.scenarioDecks
54617
+ ]);
53851
54618
  const graders = (0, import_react65.useMemo)(() => (data.verification?.graderDecks?.edges ?? []).flatMap((edge) => {
53852
54619
  const grader = edge?.node;
53853
54620
  if (!grader?.id || !grader.label) return [];
@@ -53862,33 +54629,16 @@ var SimulatorVerifyPage = iso(`
53862
54629
  }), [
53863
54630
  data.verification?.graderDecks?.edges
53864
54631
  ]);
53865
- const scenarioRuns = (0, import_react65.useMemo)(() => (data.scenarioRuns?.edges ?? []).flatMap((edge) => {
53866
- const run = edge?.node;
53867
- if (!run?.id) return [];
53868
- return [
53869
- {
53870
- id: run.id,
53871
- status: toBatchStatus(run.status),
53872
- startedAt: run.startedAt ?? null,
53873
- finishedAt: run.finishedAt ?? null,
53874
- error: run.error ?? null
53875
- }
53876
- ];
53877
- }).sort((left, right) => {
53878
- const leftKey = left.finishedAt ?? left.startedAt ?? left.id;
53879
- const rightKey = right.finishedAt ?? right.startedAt ?? right.id;
53880
- return rightKey.localeCompare(leftKey);
53881
- }), [
53882
- data.scenarioRuns?.edges
53883
- ]);
53884
54632
  const batches = (0, import_react65.useMemo)(() => (data.verification?.batches?.edges ?? []).flatMap((edge) => {
53885
54633
  const batch = edge?.node;
53886
54634
  if (!batch?.id || !batch.graderId) return [];
53887
54635
  return [
53888
54636
  {
53889
54637
  id: batch.id,
54638
+ scenarioDeckId: batch.scenarioDeckId ?? null,
53890
54639
  graderId: batch.graderId,
53891
- scenarioRunId: batch.scenarioRunId ?? null,
54640
+ scenarioRuns: batch.scenarioRuns ?? 0,
54641
+ graderRepeatsPerScenario: batch.graderRepeatsPerScenario ?? 0,
53892
54642
  status: toBatchStatus(batch.status),
53893
54643
  startedAt: batch.startedAt ?? null,
53894
54644
  finishedAt: batch.finishedAt ?? null,
@@ -53896,12 +54646,15 @@ var SimulatorVerifyPage = iso(`
53896
54646
  active: batch.active ?? 0,
53897
54647
  completed: batch.completed ?? 0,
53898
54648
  failed: batch.failed ?? 0,
54649
+ scenarioRunsCompleted: batch.scenarioRunsCompleted ?? 0,
54650
+ scenarioRunsFailed: batch.scenarioRunsFailed ?? 0,
53899
54651
  requests: (batch.requests?.edges ?? []).flatMap((requestEdge) => {
53900
54652
  const request = requestEdge?.node;
53901
54653
  if (!request?.id) return [];
53902
54654
  return [
53903
54655
  {
53904
54656
  id: request.id,
54657
+ scenarioRunId: request.scenarioRunId ?? void 0,
53905
54658
  status: toBatchRequestStatus(request.status),
53906
54659
  runId: request.runId ?? void 0,
53907
54660
  error: request.error ?? void 0
@@ -53909,32 +54662,51 @@ var SimulatorVerifyPage = iso(`
53909
54662
  ];
53910
54663
  }),
53911
54664
  metrics: batch.metrics ? {
53912
- sampleSize: batch.metrics.sampleSize ?? 0,
53913
- agreementRate: typeof batch.metrics.agreementRate === "number" ? batch.metrics.agreementRate : null,
53914
- scoreSpreadMin: typeof batch.metrics.scoreSpreadMin === "number" ? batch.metrics.scoreSpreadMin : null,
53915
- scoreSpreadMedian: typeof batch.metrics.scoreSpreadMedian === "number" ? batch.metrics.scoreSpreadMedian : null,
53916
- scoreSpreadMax: typeof batch.metrics.scoreSpreadMax === "number" ? batch.metrics.scoreSpreadMax : null,
53917
- instabilityCount: batch.metrics.instabilityCount ?? 0,
53918
- verdict: batch.metrics.verdict === "PASS" || batch.metrics.verdict === "WARN" || batch.metrics.verdict === "FAIL" ? batch.metrics.verdict : "WARN",
53919
- verdictReason: batch.metrics.verdictReason ?? "Verify batch completed.",
53920
- outliers: (batch.metrics.outliers?.edges ?? []).flatMap((outlierEdge) => {
54665
+ scenarioRunCountRequested: batch.metrics.scenarioRunCountRequested ?? 0,
54666
+ scenarioRunCountCompleted: batch.metrics.scenarioRunCountCompleted ?? 0,
54667
+ scenarioRunCountFailed: batch.metrics.scenarioRunCountFailed ?? 0,
54668
+ gradeSampleCountRequested: batch.metrics.gradeSampleCountRequested ?? 0,
54669
+ gradeSampleCountCompleted: batch.metrics.gradeSampleCountCompleted ?? 0,
54670
+ gradeSampleCountFailed: batch.metrics.gradeSampleCountFailed ?? 0,
54671
+ executionFailureCount: batch.metrics.executionFailureCount ?? 0,
54672
+ gradingFailureCount: batch.metrics.gradingFailureCount ?? 0,
54673
+ passRate: typeof batch.metrics.passRate === "number" ? batch.metrics.passRate : null,
54674
+ scoreMin: typeof batch.metrics.scoreMin === "number" ? batch.metrics.scoreMin : null,
54675
+ scoreMedian: typeof batch.metrics.scoreMedian === "number" ? batch.metrics.scoreMedian : null,
54676
+ scoreMax: typeof batch.metrics.scoreMax === "number" ? batch.metrics.scoreMax : null,
54677
+ scoreMean: typeof batch.metrics.scoreMean === "number" ? batch.metrics.scoreMean : null,
54678
+ outlierScenarioRuns: (batch.metrics.outlierScenarioRuns?.edges ?? []).flatMap((outlierEdge) => {
53921
54679
  const outlier = outlierEdge?.node;
53922
- if (!outlier?.key || !outlier.label) return [];
54680
+ if (!outlier?.key || !outlier.scenarioRunId) return [];
53923
54681
  return [
53924
54682
  {
53925
54683
  key: outlier.key,
53926
- label: outlier.label,
53927
- sampleSize: outlier.sampleSize ?? 0,
53928
- agreementRate: typeof outlier.agreementRate === "number" ? outlier.agreementRate : null,
53929
- scoreDelta: typeof outlier.scoreDelta === "number" ? outlier.scoreDelta : null,
53930
- passFlip: Boolean(outlier.passFlip),
53931
- instability: Boolean(outlier.instability),
54684
+ scenarioRunId: outlier.scenarioRunId,
54685
+ gradeSampleCount: outlier.gradeSampleCount ?? 0,
54686
+ completedSampleCount: outlier.completedSampleCount ?? 0,
54687
+ executionFailureCount: outlier.executionFailureCount ?? 0,
54688
+ gradingFailureCount: outlier.gradingFailureCount ?? 0,
54689
+ averageScore: typeof outlier.averageScore === "number" ? outlier.averageScore : null,
54690
+ minScore: typeof outlier.minScore === "number" ? outlier.minScore : null,
54691
+ maxScore: typeof outlier.maxScore === "number" ? outlier.maxScore : null,
54692
+ failed: Boolean(outlier.failed),
53932
54693
  minRunId: outlier.minRunId ?? void 0,
53933
54694
  maxRunId: outlier.maxRunId ?? void 0,
53934
- turnIndex: typeof outlier.turnIndex === "number" ? outlier.turnIndex : void 0,
53935
54695
  messageRefId: outlier.messageRefId ?? void 0
53936
54696
  }
53937
54697
  ];
54698
+ }),
54699
+ failureReasons: (batch.metrics.failureReasons?.edges ?? []).flatMap((reasonEdge) => {
54700
+ const reason = reasonEdge?.node;
54701
+ if (!reason?.key || !reason.reason) return [];
54702
+ return [
54703
+ {
54704
+ key: reason.key,
54705
+ kind: reason.kind === "GRADING" ? "grading" : "execution",
54706
+ reason: reason.reason,
54707
+ count: reason.count ?? 0
54708
+ }
54709
+ ];
53938
54710
  })
53939
54711
  } : null
53940
54712
  }
@@ -53942,10 +54714,11 @@ var SimulatorVerifyPage = iso(`
53942
54714
  }), [
53943
54715
  data.verification?.batches?.edges
53944
54716
  ]);
53945
- const [selectedScenarioRunId, setSelectedScenarioRunId] = (0, import_react65.useState)(null);
54717
+ const [selectedScenarioDeckId, setSelectedScenarioDeckId] = (0, import_react65.useState)(null);
53946
54718
  const [selectedGraderId, setSelectedGraderId] = (0, import_react65.useState)(null);
53947
54719
  const [selectedBatchId, setSelectedBatchId] = (0, import_react65.useState)(null);
53948
- const [batchSize, setBatchSize] = (0, import_react65.useState)(DEFAULT_BATCH_SIZE2);
54720
+ const [scenarioRuns, setScenarioRuns] = (0, import_react65.useState)(DEFAULT_SCENARIO_RUNS);
54721
+ const [graderRepeatsPerScenario, setGraderRepeatsPerScenario] = (0, import_react65.useState)(DEFAULT_GRADER_REPEATS);
53949
54722
  const [batchConcurrency, setBatchConcurrency] = (0, import_react65.useState)(DEFAULT_BATCH_CONCURRENCY2);
53950
54723
  const [mutationError, setMutationError] = (0, import_react65.useState)(null);
53951
54724
  (0, import_react65.useEffect)(() => {
@@ -53958,18 +54731,18 @@ var SimulatorVerifyPage = iso(`
53958
54731
  selectedGraderId
53959
54732
  ]);
53960
54733
  (0, import_react65.useEffect)(() => {
53961
- if (selectedScenarioRunId && scenarioRuns.some((scenarioRun) => scenarioRun.id === selectedScenarioRunId)) {
54734
+ if (selectedScenarioDeckId && scenarioDecks.some((deck) => deck.id === selectedScenarioDeckId)) {
53962
54735
  return;
53963
54736
  }
53964
- setSelectedScenarioRunId(scenarioRuns[0]?.id ?? null);
54737
+ setSelectedScenarioDeckId(scenarioDecks[0]?.id ?? null);
53965
54738
  }, [
53966
- scenarioRuns,
53967
- selectedScenarioRunId
54739
+ scenarioDecks,
54740
+ selectedScenarioDeckId
53968
54741
  ]);
53969
54742
  const filteredBatches = (0, import_react65.useMemo)(() => {
53970
54743
  return batches.filter((batch) => {
53971
54744
  if (selectedGraderId && batch.graderId !== selectedGraderId) return false;
53972
- if (selectedScenarioRunId && batch.scenarioRunId !== selectedScenarioRunId) {
54745
+ if (selectedScenarioDeckId && batch.scenarioDeckId !== selectedScenarioDeckId) {
53973
54746
  return false;
53974
54747
  }
53975
54748
  return true;
@@ -53977,7 +54750,7 @@ var SimulatorVerifyPage = iso(`
53977
54750
  }, [
53978
54751
  batches,
53979
54752
  selectedGraderId,
53980
- selectedScenarioRunId
54753
+ selectedScenarioDeckId
53981
54754
  ]);
53982
54755
  const visibleBatches = filteredBatches.length > 0 ? filteredBatches : batches;
53983
54756
  (0, import_react65.useEffect)(() => {
@@ -54003,26 +54776,34 @@ var SimulatorVerifyPage = iso(`
54003
54776
  graders,
54004
54777
  selectedGraderId
54005
54778
  ]);
54779
+ const selectedScenarioDeck = (0, import_react65.useMemo)(() => scenarioDecks.find((deck) => deck.id === selectedScenarioDeckId) ?? null, [
54780
+ scenarioDecks,
54781
+ selectedScenarioDeckId
54782
+ ]);
54006
54783
  const queuedCount = (0, import_react65.useMemo)(() => (selectedBatch?.requests ?? []).filter((request) => request.status === "queued").length, [
54007
54784
  selectedBatch?.requests
54008
54785
  ]);
54009
54786
  const metrics = selectedBatch?.metrics ?? null;
54010
- const topOutliers = (metrics?.outliers ?? []).slice(0, 8);
54787
+ const topOutlierScenarioRuns = (0, import_react65.useMemo)(() => sortVerifyOutlierScenarioRuns(metrics?.outlierScenarioRuns ?? []).slice(0, 8), [
54788
+ metrics?.outlierScenarioRuns
54789
+ ]);
54011
54790
  const hasRunningBatch = visibleBatches.some((batch) => batch.status === "running");
54012
54791
  const canRun = Boolean(workspaceId && selectedGraderId && !runBatchMutation.inFlight && !hasRunningBatch);
54013
54792
  const runBatch = (0, import_react65.useCallback)(() => {
54014
54793
  if (!workspaceId || !selectedGraderId) return;
54015
54794
  setMutationError(null);
54016
- const nextBatchSize = clampInt2(batchSize, 1, MAX_BATCH_SIZE2);
54017
- const nextConcurrency = clampInt2(batchConcurrency, 1, Math.min(MAX_BATCH_CONCURRENCY2, nextBatchSize));
54795
+ const nextScenarioRuns = clampInt2(scenarioRuns, 1, MAX_SCENARIO_RUNS);
54796
+ const nextRepeats = clampInt2(graderRepeatsPerScenario, 1, MAX_GRADER_REPEATS);
54797
+ const nextConcurrency = clampInt2(batchConcurrency, 1, MAX_BATCH_CONCURRENCY2);
54018
54798
  runBatchMutation.commit({
54019
54799
  input: {
54020
54800
  workspaceId,
54021
- graderId: selectedGraderId,
54022
- ...selectedScenarioRunId ? {
54023
- scenarioRunId: selectedScenarioRunId
54801
+ ...selectedScenarioDeckId ? {
54802
+ scenarioDeckId: selectedScenarioDeckId
54024
54803
  } : {},
54025
- batchSize: nextBatchSize,
54804
+ graderId: selectedGraderId,
54805
+ scenarioRuns: nextScenarioRuns,
54806
+ graderRepeatsPerScenario: nextRepeats,
54026
54807
  concurrency: nextConcurrency
54027
54808
  }
54028
54809
  }, {
@@ -54038,10 +54819,11 @@ var SimulatorVerifyPage = iso(`
54038
54819
  });
54039
54820
  }, [
54040
54821
  batchConcurrency,
54041
- batchSize,
54822
+ graderRepeatsPerScenario,
54042
54823
  runBatchMutation,
54824
+ scenarioRuns,
54043
54825
  selectedGraderId,
54044
- selectedScenarioRunId,
54826
+ selectedScenarioDeckId,
54045
54827
  workspaceId
54046
54828
  ]);
54047
54829
  const navigateToGradeRun = (0, import_react65.useCallback)((runId) => {
@@ -54054,23 +54836,6 @@ var SimulatorVerifyPage = iso(`
54054
54836
  toPrefixedPath,
54055
54837
  workspaceId
54056
54838
  ]);
54057
- const scenarioOptions = (0, import_react65.useMemo)(() => [
54058
- {
54059
- value: NO_SCENARIO_RUN_VALUE2,
54060
- label: "Current workspace context",
54061
- meta: "Run without a prior scenario run binding"
54062
- },
54063
- ...scenarioRuns.map((run) => ({
54064
- value: run.id,
54065
- label: scenarioNameFromValue(run.id) ?? run.id,
54066
- meta: [
54067
- run.status,
54068
- run.finishedAt ?? run.startedAt
54069
- ].filter(Boolean).join(" \xB7 ")
54070
- }))
54071
- ], [
54072
- scenarioRuns
54073
- ]);
54074
54839
  return /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(PageShell, {
54075
54840
  className: "verify-shell",
54076
54841
  children: /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)(PageGrid, {
@@ -54085,22 +54850,25 @@ var SimulatorVerifyPage = iso(`
54085
54850
  className: "verify-controls-header",
54086
54851
  children: [
54087
54852
  /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("strong", {
54088
- children: "Verify consistency"
54853
+ children: "Verify repeated evidence"
54089
54854
  }),
54090
54855
  /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("span", {
54091
54856
  className: "secondary-note",
54092
- children: "Run repeated grading checks against one grader and scenario."
54857
+ children: "Generate scenario runs, then grade each run repeatedly."
54093
54858
  })
54094
54859
  ]
54095
54860
  }),
54096
- scenarioRuns.length > 0 && /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Listbox, {
54097
- label: "Scenario run",
54098
- value: selectedScenarioRunId ?? NO_SCENARIO_RUN_VALUE2,
54099
- onChange: (runId) => {
54100
- setSelectedScenarioRunId(runId === NO_SCENARIO_RUN_VALUE2 ? null : runId);
54101
- },
54102
- options: scenarioOptions,
54103
- placeholder: "Select scenario run"
54861
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Listbox, {
54862
+ label: "Scenario deck",
54863
+ value: selectedScenarioDeckId ?? "",
54864
+ onChange: (value) => setSelectedScenarioDeckId(value.length ? value : null),
54865
+ options: scenarioDecks.map((deck) => ({
54866
+ value: deck.id,
54867
+ label: deck.label,
54868
+ meta: deck.path
54869
+ })),
54870
+ placeholder: "Select scenario deck",
54871
+ disabled: scenarioDecks.length === 0
54104
54872
  }),
54105
54873
  /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Listbox, {
54106
54874
  label: "Grader",
@@ -54120,13 +54888,26 @@ var SimulatorVerifyPage = iso(`
54120
54888
  /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("label", {
54121
54889
  className: "verify-number-field",
54122
54890
  children: [
54123
- "Batch size",
54891
+ "Scenario runs",
54124
54892
  /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("input", {
54125
54893
  type: "number",
54126
54894
  min: 1,
54127
- max: MAX_BATCH_SIZE2,
54128
- value: batchSize,
54129
- onChange: (event) => setBatchSize(clampInt2(Number(event.target.value), 1, MAX_BATCH_SIZE2))
54895
+ max: MAX_SCENARIO_RUNS,
54896
+ value: scenarioRuns,
54897
+ onChange: (event) => setScenarioRuns(clampInt2(Number(event.target.value), 1, MAX_SCENARIO_RUNS))
54898
+ })
54899
+ ]
54900
+ }),
54901
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("label", {
54902
+ className: "verify-number-field",
54903
+ children: [
54904
+ "Grader repeats per scenario",
54905
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("input", {
54906
+ type: "number",
54907
+ min: 1,
54908
+ max: MAX_GRADER_REPEATS,
54909
+ value: graderRepeatsPerScenario,
54910
+ onChange: (event) => setGraderRepeatsPerScenario(clampInt2(Number(event.target.value), 1, MAX_GRADER_REPEATS))
54130
54911
  })
54131
54912
  ]
54132
54913
  }),
@@ -54150,7 +54931,17 @@ var SimulatorVerifyPage = iso(`
54150
54931
  variant: "primary",
54151
54932
  onClick: runBatch,
54152
54933
  disabled: !canRun,
54153
- children: hasRunningBatch || runBatchMutation.inFlight ? "Running consistency batch\u2026" : "Run consistency batch"
54934
+ children: hasRunningBatch || runBatchMutation.inFlight ? "Running verify batch..." : "Run verify batch"
54935
+ }),
54936
+ scenarioDecks.length === 0 && /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)(Callout, {
54937
+ children: [
54938
+ "No scenario decks are available. Add ",
54939
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("code", {
54940
+ children: "[[testDecks]]"
54941
+ }),
54942
+ " ",
54943
+ "entries to the active root deck."
54944
+ ]
54154
54945
  }),
54155
54946
  graders.length === 0 && /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)(Callout, {
54156
54947
  children: [
@@ -54162,6 +54953,9 @@ var SimulatorVerifyPage = iso(`
54162
54953
  "entries to the active root deck."
54163
54954
  ]
54164
54955
  }),
54956
+ selectedScenarioDeck?.description && /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Callout, {
54957
+ children: selectedScenarioDeck.description
54958
+ }),
54165
54959
  selectedGrader?.description && /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Callout, {
54166
54960
  children: selectedGrader.description
54167
54961
  }),
@@ -54180,172 +54974,196 @@ var SimulatorVerifyPage = iso(`
54180
54974
  className: "error",
54181
54975
  children: mutationError
54182
54976
  }),
54183
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54977
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54184
54978
  className: "verify-status-row",
54185
- children: [
54186
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54187
- className: "verify-status-main",
54188
- children: [
54189
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("strong", {
54190
- children: "Batch status"
54191
- }),
54192
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54193
- className: "verify-status-meta",
54194
- children: [
54195
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Badge, {
54196
- status: selectedBatch?.status ?? "idle",
54197
- children: selectedBatch?.status ?? "idle"
54198
- }),
54199
- selectedBatch?.startedAt ? ` \xB7 started ${formatTimestampShort(selectedBatch.startedAt)}` : "",
54200
- selectedBatch?.finishedAt ? ` \xB7 finished ${formatTimestampShort(selectedBatch.finishedAt)}` : ""
54201
- ]
54202
- })
54203
- ]
54204
- }),
54205
- metrics && metrics.sampleSize > 0 && /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("span", {
54206
- className: classNames("verify-verdict-badge", `verify-verdict-badge--${metrics.verdict.toLowerCase()}`),
54207
- children: metrics.verdict
54208
- })
54209
- ]
54210
- }),
54211
- selectedBatch && selectedBatch.requested > 0 && /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54212
- className: "verify-progress-row",
54213
- children: [
54214
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
54215
- children: [
54216
- "Queued: ",
54217
- queuedCount
54218
- ]
54219
- }),
54220
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
54221
- children: [
54222
- "Running: ",
54223
- selectedBatch.active
54224
- ]
54225
- }),
54226
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
54227
- children: [
54228
- "Completed: ",
54229
- selectedBatch.completed
54230
- ]
54231
- }),
54232
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
54233
- children: [
54234
- "Failed: ",
54235
- selectedBatch.failed
54236
- ]
54237
- })
54238
- ]
54979
+ children: /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54980
+ className: "verify-status-main",
54981
+ children: [
54982
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("strong", {
54983
+ children: "Batch status"
54984
+ }),
54985
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54986
+ className: "verify-status-meta",
54987
+ children: [
54988
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Badge, {
54989
+ status: selectedBatch?.status ?? "idle",
54990
+ children: selectedBatch?.status ?? "idle"
54991
+ }),
54992
+ selectedBatch?.startedAt ? ` \xB7 started ${formatTimestampShort(selectedBatch.startedAt)}` : "",
54993
+ selectedBatch?.finishedAt ? ` \xB7 finished ${formatTimestampShort(selectedBatch.finishedAt)}` : ""
54994
+ ]
54995
+ })
54996
+ ]
54997
+ })
54239
54998
  }),
54240
54999
  !selectedBatch && /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Callout, {
54241
- children: "Run a consistency batch to compute agreement, spread, and instability for the selected grader."
55000
+ children: "Run a verify batch to generate repeated grading evidence."
54242
55001
  }),
54243
- metrics && metrics.sampleSize > 0 && /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)(import_jsx_runtime63.Fragment, {
54244
- children: [
54245
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54246
- className: "verify-metric-grid",
54247
- children: [
54248
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54249
- className: "verify-metric-card",
54250
- children: [
54251
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54252
- className: "verify-metric-label",
54253
- children: "Sample size"
54254
- }),
54255
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54256
- className: "verify-metric-value",
54257
- children: metrics.sampleSize
54258
- })
54259
- ]
54260
- }),
54261
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54262
- className: "verify-metric-card",
54263
- children: [
54264
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54265
- className: "verify-metric-label",
54266
- children: "Agreement rate"
54267
- }),
54268
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54269
- className: "verify-metric-value",
54270
- children: metrics.agreementRate === null ? "\u2014" : `${Math.round(metrics.agreementRate * 100)}%`
54271
- })
54272
- ]
54273
- }),
54274
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54275
- className: "verify-metric-card",
54276
- children: [
54277
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54278
- className: "verify-metric-label",
54279
- children: "Score spread (min/median/max)"
54280
- }),
54281
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54282
- className: "verify-metric-value verify-metric-value--compact",
54283
- children: metrics.scoreSpreadMin === null ? "\u2014" : `${metrics.scoreSpreadMin} / ${metrics.scoreSpreadMedian ?? "\u2014"} / ${metrics.scoreSpreadMax ?? "\u2014"}`
54284
- })
54285
- ]
54286
- }),
54287
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54288
- className: "verify-metric-card",
54289
- children: [
54290
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54291
- className: "verify-metric-label",
54292
- children: "Instability count"
54293
- }),
54294
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54295
- className: "verify-metric-value",
54296
- children: metrics.instabilityCount
54297
- })
54298
- ]
54299
- })
54300
- ]
54301
- }),
54302
- /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Callout, {
54303
- variant: metrics.verdict === "FAIL" ? "danger" : metrics.verdict === "WARN" ? "emphasis" : "muted",
54304
- title: `Verdict: ${metrics.verdict}`,
54305
- children: metrics.verdictReason
54306
- })
54307
- ]
55002
+ selectedBatch && selectedBatch.requested > 0 && /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(import_jsx_runtime63.Fragment, {
55003
+ children: /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55004
+ className: "verify-progress-row",
55005
+ children: [
55006
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
55007
+ children: [
55008
+ "Scenario runs: ",
55009
+ selectedBatch.scenarioRunsCompleted,
55010
+ "/",
55011
+ selectedBatch.scenarioRuns
55012
+ ]
55013
+ }),
55014
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
55015
+ children: [
55016
+ "Scenario failures: ",
55017
+ selectedBatch.scenarioRunsFailed
55018
+ ]
55019
+ }),
55020
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
55021
+ children: [
55022
+ "Queued: ",
55023
+ queuedCount
55024
+ ]
55025
+ }),
55026
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
55027
+ children: [
55028
+ "Running: ",
55029
+ selectedBatch.active
55030
+ ]
55031
+ }),
55032
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
55033
+ children: [
55034
+ "Completed: ",
55035
+ selectedBatch.completed
55036
+ ]
55037
+ }),
55038
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
55039
+ children: [
55040
+ "Failed: ",
55041
+ selectedBatch.failed
55042
+ ]
55043
+ })
55044
+ ]
55045
+ })
54308
55046
  }),
54309
- /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)(Callout, {
54310
- title: "Thresholds in code",
54311
- children: [
54312
- "Min sample size: ",
54313
- VERIFY_CONSISTENCY_THRESHOLDS.minSampleSize,
54314
- " ",
54315
- "\xB7 PASS requires agreement \u2265 ",
54316
- Math.round(VERIFY_CONSISTENCY_THRESHOLDS.pass.agreementMin * 100),
54317
- "%, spread \u2264",
54318
- " ",
54319
- VERIFY_CONSISTENCY_THRESHOLDS.pass.maxSpread,
54320
- ", instability \u2264",
54321
- " ",
54322
- VERIFY_CONSISTENCY_THRESHOLDS.pass.maxInstabilityCount,
54323
- " ",
54324
- "\xB7 WARN allows agreement \u2265 ",
54325
- Math.round(VERIFY_CONSISTENCY_THRESHOLDS.warn.agreementMin * 100),
54326
- "%, spread \u2264",
54327
- " ",
54328
- VERIFY_CONSISTENCY_THRESHOLDS.warn.maxSpread,
54329
- ", instability \u2264",
54330
- " ",
54331
- VERIFY_CONSISTENCY_THRESHOLDS.warn.maxInstabilityCount,
54332
- "."
54333
- ]
55047
+ metrics && /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(import_jsx_runtime63.Fragment, {
55048
+ children: /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55049
+ className: "verify-metric-grid",
55050
+ children: [
55051
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55052
+ className: "verify-metric-card",
55053
+ children: [
55054
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55055
+ className: "verify-metric-label",
55056
+ children: "Scenario runs"
55057
+ }),
55058
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55059
+ className: "verify-metric-value",
55060
+ children: [
55061
+ metrics.scenarioRunCountCompleted,
55062
+ "/",
55063
+ metrics.scenarioRunCountRequested
55064
+ ]
55065
+ })
55066
+ ]
55067
+ }),
55068
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55069
+ className: "verify-metric-card",
55070
+ children: [
55071
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55072
+ className: "verify-metric-label",
55073
+ children: "Grade samples"
55074
+ }),
55075
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55076
+ className: "verify-metric-value",
55077
+ children: [
55078
+ metrics.gradeSampleCountCompleted,
55079
+ "/",
55080
+ metrics.gradeSampleCountRequested
55081
+ ]
55082
+ })
55083
+ ]
55084
+ }),
55085
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55086
+ className: "verify-metric-card",
55087
+ children: [
55088
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55089
+ className: "verify-metric-label",
55090
+ children: "Pass rate"
55091
+ }),
55092
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55093
+ className: "verify-metric-value",
55094
+ children: formatPercent(metrics.passRate)
55095
+ })
55096
+ ]
55097
+ }),
55098
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55099
+ className: "verify-metric-card",
55100
+ children: [
55101
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55102
+ className: "verify-metric-label",
55103
+ children: "Score mean"
55104
+ }),
55105
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55106
+ className: "verify-metric-value",
55107
+ children: metrics.scoreMean === null ? "-" : metrics.scoreMean
55108
+ })
55109
+ ]
55110
+ }),
55111
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55112
+ className: "verify-metric-card",
55113
+ children: [
55114
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55115
+ className: "verify-metric-label",
55116
+ children: "Score min/median/max"
55117
+ }),
55118
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55119
+ className: "verify-metric-value verify-metric-value--compact",
55120
+ children: metrics.scoreMin === null ? "-" : `${metrics.scoreMin} / ${metrics.scoreMedian ?? "-"} / ${metrics.scoreMax ?? "-"}`
55121
+ })
55122
+ ]
55123
+ }),
55124
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55125
+ className: "verify-metric-card",
55126
+ children: [
55127
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55128
+ className: "verify-metric-label",
55129
+ children: "Execution failures"
55130
+ }),
55131
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55132
+ className: "verify-metric-value",
55133
+ children: metrics.executionFailureCount
55134
+ })
55135
+ ]
55136
+ }),
55137
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55138
+ className: "verify-metric-card",
55139
+ children: [
55140
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55141
+ className: "verify-metric-label",
55142
+ children: "Grading failures"
55143
+ }),
55144
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
55145
+ className: "verify-metric-value",
55146
+ children: metrics.gradingFailureCount
55147
+ })
55148
+ ]
55149
+ })
55150
+ ]
55151
+ })
54334
55152
  }),
54335
55153
  /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54336
55154
  className: "verify-section",
54337
55155
  children: [
54338
55156
  /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("strong", {
54339
- children: "Most inconsistent examples"
55157
+ children: "Outlier scenario runs"
54340
55158
  }),
54341
- topOutliers.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Callout, {
54342
- children: "Inconsistent examples will appear here as soon as at least one completed run is available in this batch."
55159
+ topOutlierScenarioRuns.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Callout, {
55160
+ children: "Outlier scenario runs appear as soon as completed grade samples are available."
54343
55161
  }) : /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("div", {
54344
55162
  className: "verify-outlier-list",
54345
- children: topOutliers.map((outlier) => {
55163
+ children: topOutlierScenarioRuns.map((outlier) => {
54346
55164
  const runLinks = [
54347
- outlier.maxRunId,
54348
- outlier.minRunId
55165
+ outlier.minRunId,
55166
+ outlier.maxRunId
54349
55167
  ].filter((value) => Boolean(value));
54350
55168
  const uniqueRunLinks = [
54351
55169
  ...new Set(runLinks)
@@ -54357,26 +55175,35 @@ var SimulatorVerifyPage = iso(`
54357
55175
  className: "verify-outlier-header",
54358
55176
  children: [
54359
55177
  /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("strong", {
54360
- children: outlier.label
55178
+ children: scenarioNameFromValue(outlier.scenarioRunId) ?? outlier.scenarioRunId
54361
55179
  }),
54362
55180
  /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Badge, {
54363
- variant: outlier.instability ? "error" : "completed",
54364
- children: outlier.instability ? "Unstable" : "Stable"
55181
+ variant: outlier.failed ? "error" : "completed",
55182
+ children: outlier.failed ? "Failed" : "Scored"
54365
55183
  })
54366
55184
  ]
54367
55185
  }),
54368
55186
  /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54369
55187
  className: "verify-outlier-meta",
54370
55188
  children: [
54371
- "agreement ",
54372
- outlier.agreementRate === null ? "\u2014" : `${Math.round(outlier.agreementRate * 100)}%`,
55189
+ "avg ",
55190
+ outlier.averageScore ?? "-",
55191
+ " \xB7 min/max",
55192
+ " ",
55193
+ outlier.minScore ?? "-",
55194
+ "/",
55195
+ outlier.maxScore ?? "-",
55196
+ " ",
55197
+ "\xB7 samples ",
55198
+ outlier.completedSampleCount,
55199
+ "/",
55200
+ outlier.gradeSampleCount,
55201
+ " \xB7 execution failures",
54373
55202
  " ",
54374
- "\xB7 delta ",
54375
- outlier.scoreDelta ?? "\u2014",
54376
- " \xB7 samples",
55203
+ outlier.executionFailureCount,
55204
+ " \xB7 grading failures",
54377
55205
  " ",
54378
- outlier.sampleSize,
54379
- outlier.passFlip ? " \xB7 pass/fail flip" : "",
55206
+ outlier.gradingFailureCount,
54380
55207
  outlier.messageRefId ? ` \xB7 ref ${outlier.messageRefId}` : ""
54381
55208
  ]
54382
55209
  }),
@@ -54402,6 +55229,38 @@ var SimulatorVerifyPage = iso(`
54402
55229
  })
54403
55230
  ]
54404
55231
  }),
55232
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
55233
+ className: "verify-section",
55234
+ children: [
55235
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("strong", {
55236
+ children: "Failure reasons"
55237
+ }),
55238
+ !metrics || metrics.failureReasons.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Callout, {
55239
+ children: "No failure reasons captured yet."
55240
+ }) : /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("ul", {
55241
+ className: "verify-request-list",
55242
+ children: metrics.failureReasons.map((reason) => /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("li", {
55243
+ className: "verify-request-row",
55244
+ children: [
55245
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)(Badge, {
55246
+ variant: reason.kind === "execution" ? "error" : "running",
55247
+ children: reason.kind
55248
+ }),
55249
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("span", {
55250
+ children: reason.reason
55251
+ }),
55252
+ /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
55253
+ className: "secondary-note",
55254
+ children: [
55255
+ "x",
55256
+ reason.count
55257
+ ]
55258
+ })
55259
+ ]
55260
+ }, reason.key))
55261
+ })
55262
+ ]
55263
+ }),
54405
55264
  visibleBatches.length > 0 && /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("div", {
54406
55265
  className: "verify-section",
54407
55266
  children: [
@@ -54433,10 +55292,10 @@ var SimulatorVerifyPage = iso(`
54433
55292
  /* @__PURE__ */ (0, import_jsx_runtime63.jsxs)("span", {
54434
55293
  className: "secondary-note",
54435
55294
  children: [
54436
- batch.completed,
54437
- "/",
54438
- batch.requested,
54439
- " complete"
55295
+ batch.scenarioRuns,
55296
+ " runs \xD7",
55297
+ " ",
55298
+ batch.graderRepeatsPerScenario
54440
55299
  ]
54441
55300
  })
54442
55301
  ]
@@ -54466,6 +55325,10 @@ var SimulatorVerifyPage = iso(`
54466
55325
  status: request.status === "queued" ? "idle" : request.status,
54467
55326
  children: request.status
54468
55327
  }),
55328
+ request.scenarioRunId && /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("span", {
55329
+ className: "secondary-note",
55330
+ children: scenarioNameFromValue(request.scenarioRunId) ?? request.scenarioRunId
55331
+ }),
54469
55332
  request.runId ? /* @__PURE__ */ (0, import_jsx_runtime63.jsx)("a", {
54470
55333
  href: toPrefixedPath(buildWorkspacePath("grade", workspaceId, {
54471
55334
  runId: request.runId
@@ -54503,77 +55366,40 @@ var readerAst36 = [
54503
55366
  },
54504
55367
  {
54505
55368
  kind: "Linked",
54506
- fieldName: "scenarioRuns",
55369
+ fieldName: "scenarioDecks",
54507
55370
  alias: null,
54508
- arguments: [
54509
- [
54510
- "first",
54511
- {
54512
- kind: "Literal",
54513
- value: 50
54514
- }
54515
- ]
54516
- ],
55371
+ arguments: null,
54517
55372
  condition: null,
54518
55373
  isUpdatable: false,
54519
55374
  refetchQueryIndex: null,
54520
55375
  selections: [
54521
55376
  {
54522
- kind: "Linked",
54523
- fieldName: "edges",
55377
+ kind: "Scalar",
55378
+ fieldName: "id",
54524
55379
  alias: null,
54525
55380
  arguments: null,
54526
- condition: null,
54527
- isUpdatable: false,
54528
- refetchQueryIndex: null,
54529
- selections: [
54530
- {
54531
- kind: "Linked",
54532
- fieldName: "node",
54533
- alias: null,
54534
- arguments: null,
54535
- condition: null,
54536
- isUpdatable: false,
54537
- refetchQueryIndex: null,
54538
- selections: [
54539
- {
54540
- kind: "Scalar",
54541
- fieldName: "id",
54542
- alias: null,
54543
- arguments: null,
54544
- isUpdatable: false
54545
- },
54546
- {
54547
- kind: "Scalar",
54548
- fieldName: "status",
54549
- alias: null,
54550
- arguments: null,
54551
- isUpdatable: false
54552
- },
54553
- {
54554
- kind: "Scalar",
54555
- fieldName: "startedAt",
54556
- alias: null,
54557
- arguments: null,
54558
- isUpdatable: false
54559
- },
54560
- {
54561
- kind: "Scalar",
54562
- fieldName: "finishedAt",
54563
- alias: null,
54564
- arguments: null,
54565
- isUpdatable: false
54566
- },
54567
- {
54568
- kind: "Scalar",
54569
- fieldName: "error",
54570
- alias: null,
54571
- arguments: null,
54572
- isUpdatable: false
54573
- }
54574
- ]
54575
- }
54576
- ]
55381
+ isUpdatable: false
55382
+ },
55383
+ {
55384
+ kind: "Scalar",
55385
+ fieldName: "label",
55386
+ alias: null,
55387
+ arguments: null,
55388
+ isUpdatable: false
55389
+ },
55390
+ {
55391
+ kind: "Scalar",
55392
+ fieldName: "description",
55393
+ alias: null,
55394
+ arguments: null,
55395
+ isUpdatable: false
55396
+ },
55397
+ {
55398
+ kind: "Scalar",
55399
+ fieldName: "path",
55400
+ alias: null,
55401
+ arguments: null,
55402
+ isUpdatable: false
54577
55403
  }
54578
55404
  ]
54579
55405
  },
@@ -54704,6 +55530,13 @@ var readerAst36 = [
54704
55530
  arguments: null,
54705
55531
  isUpdatable: false
54706
55532
  },
55533
+ {
55534
+ kind: "Scalar",
55535
+ fieldName: "scenarioDeckId",
55536
+ alias: null,
55537
+ arguments: null,
55538
+ isUpdatable: false
55539
+ },
54707
55540
  {
54708
55541
  kind: "Scalar",
54709
55542
  fieldName: "graderId",
@@ -54713,7 +55546,14 @@ var readerAst36 = [
54713
55546
  },
54714
55547
  {
54715
55548
  kind: "Scalar",
54716
- fieldName: "scenarioRunId",
55549
+ fieldName: "scenarioRuns",
55550
+ alias: null,
55551
+ arguments: null,
55552
+ isUpdatable: false
55553
+ },
55554
+ {
55555
+ kind: "Scalar",
55556
+ fieldName: "graderRepeatsPerScenario",
54717
55557
  alias: null,
54718
55558
  arguments: null,
54719
55559
  isUpdatable: false
@@ -54767,6 +55607,20 @@ var readerAst36 = [
54767
55607
  arguments: null,
54768
55608
  isUpdatable: false
54769
55609
  },
55610
+ {
55611
+ kind: "Scalar",
55612
+ fieldName: "scenarioRunsCompleted",
55613
+ alias: null,
55614
+ arguments: null,
55615
+ isUpdatable: false
55616
+ },
55617
+ {
55618
+ kind: "Scalar",
55619
+ fieldName: "scenarioRunsFailed",
55620
+ alias: null,
55621
+ arguments: null,
55622
+ isUpdatable: false
55623
+ },
54770
55624
  {
54771
55625
  kind: "Linked",
54772
55626
  fieldName: "requests",
@@ -54776,7 +55630,7 @@ var readerAst36 = [
54776
55630
  "first",
54777
55631
  {
54778
55632
  kind: "Literal",
54779
- value: 50
55633
+ value: 200
54780
55634
  }
54781
55635
  ]
54782
55636
  ],
@@ -54809,6 +55663,13 @@ var readerAst36 = [
54809
55663
  arguments: null,
54810
55664
  isUpdatable: false
54811
55665
  },
55666
+ {
55667
+ kind: "Scalar",
55668
+ fieldName: "scenarioRunId",
55669
+ alias: null,
55670
+ arguments: null,
55671
+ isUpdatable: false
55672
+ },
54812
55673
  {
54813
55674
  kind: "Scalar",
54814
55675
  fieldName: "status",
@@ -54847,63 +55708,98 @@ var readerAst36 = [
54847
55708
  selections: [
54848
55709
  {
54849
55710
  kind: "Scalar",
54850
- fieldName: "sampleSize",
55711
+ fieldName: "scenarioRunCountRequested",
55712
+ alias: null,
55713
+ arguments: null,
55714
+ isUpdatable: false
55715
+ },
55716
+ {
55717
+ kind: "Scalar",
55718
+ fieldName: "scenarioRunCountCompleted",
55719
+ alias: null,
55720
+ arguments: null,
55721
+ isUpdatable: false
55722
+ },
55723
+ {
55724
+ kind: "Scalar",
55725
+ fieldName: "scenarioRunCountFailed",
55726
+ alias: null,
55727
+ arguments: null,
55728
+ isUpdatable: false
55729
+ },
55730
+ {
55731
+ kind: "Scalar",
55732
+ fieldName: "gradeSampleCountRequested",
55733
+ alias: null,
55734
+ arguments: null,
55735
+ isUpdatable: false
55736
+ },
55737
+ {
55738
+ kind: "Scalar",
55739
+ fieldName: "gradeSampleCountCompleted",
54851
55740
  alias: null,
54852
55741
  arguments: null,
54853
55742
  isUpdatable: false
54854
55743
  },
54855
55744
  {
54856
55745
  kind: "Scalar",
54857
- fieldName: "agreementRate",
55746
+ fieldName: "gradeSampleCountFailed",
54858
55747
  alias: null,
54859
55748
  arguments: null,
54860
55749
  isUpdatable: false
54861
55750
  },
54862
55751
  {
54863
55752
  kind: "Scalar",
54864
- fieldName: "scoreSpreadMin",
55753
+ fieldName: "executionFailureCount",
54865
55754
  alias: null,
54866
55755
  arguments: null,
54867
55756
  isUpdatable: false
54868
55757
  },
54869
55758
  {
54870
55759
  kind: "Scalar",
54871
- fieldName: "scoreSpreadMedian",
55760
+ fieldName: "gradingFailureCount",
54872
55761
  alias: null,
54873
55762
  arguments: null,
54874
55763
  isUpdatable: false
54875
55764
  },
54876
55765
  {
54877
55766
  kind: "Scalar",
54878
- fieldName: "scoreSpreadMax",
55767
+ fieldName: "passRate",
54879
55768
  alias: null,
54880
55769
  arguments: null,
54881
55770
  isUpdatable: false
54882
55771
  },
54883
55772
  {
54884
55773
  kind: "Scalar",
54885
- fieldName: "instabilityCount",
55774
+ fieldName: "scoreMin",
54886
55775
  alias: null,
54887
55776
  arguments: null,
54888
55777
  isUpdatable: false
54889
55778
  },
54890
55779
  {
54891
55780
  kind: "Scalar",
54892
- fieldName: "verdict",
55781
+ fieldName: "scoreMedian",
54893
55782
  alias: null,
54894
55783
  arguments: null,
54895
55784
  isUpdatable: false
54896
55785
  },
54897
55786
  {
54898
55787
  kind: "Scalar",
54899
- fieldName: "verdictReason",
55788
+ fieldName: "scoreMax",
55789
+ alias: null,
55790
+ arguments: null,
55791
+ isUpdatable: false
55792
+ },
55793
+ {
55794
+ kind: "Scalar",
55795
+ fieldName: "scoreMean",
54900
55796
  alias: null,
54901
55797
  arguments: null,
54902
55798
  isUpdatable: false
54903
55799
  },
54904
55800
  {
54905
55801
  kind: "Linked",
54906
- fieldName: "outliers",
55802
+ fieldName: "outlierScenarioRuns",
54907
55803
  alias: null,
54908
55804
  arguments: [
54909
55805
  [
@@ -54938,49 +55834,70 @@ var readerAst36 = [
54938
55834
  selections: [
54939
55835
  {
54940
55836
  kind: "Scalar",
54941
- fieldName: "key",
55837
+ fieldName: "key",
55838
+ alias: null,
55839
+ arguments: null,
55840
+ isUpdatable: false
55841
+ },
55842
+ {
55843
+ kind: "Scalar",
55844
+ fieldName: "scenarioRunId",
55845
+ alias: null,
55846
+ arguments: null,
55847
+ isUpdatable: false
55848
+ },
55849
+ {
55850
+ kind: "Scalar",
55851
+ fieldName: "gradeSampleCount",
55852
+ alias: null,
55853
+ arguments: null,
55854
+ isUpdatable: false
55855
+ },
55856
+ {
55857
+ kind: "Scalar",
55858
+ fieldName: "completedSampleCount",
54942
55859
  alias: null,
54943
55860
  arguments: null,
54944
55861
  isUpdatable: false
54945
55862
  },
54946
55863
  {
54947
55864
  kind: "Scalar",
54948
- fieldName: "label",
55865
+ fieldName: "executionFailureCount",
54949
55866
  alias: null,
54950
55867
  arguments: null,
54951
55868
  isUpdatable: false
54952
55869
  },
54953
55870
  {
54954
55871
  kind: "Scalar",
54955
- fieldName: "sampleSize",
55872
+ fieldName: "gradingFailureCount",
54956
55873
  alias: null,
54957
55874
  arguments: null,
54958
55875
  isUpdatable: false
54959
55876
  },
54960
55877
  {
54961
55878
  kind: "Scalar",
54962
- fieldName: "agreementRate",
55879
+ fieldName: "averageScore",
54963
55880
  alias: null,
54964
55881
  arguments: null,
54965
55882
  isUpdatable: false
54966
55883
  },
54967
55884
  {
54968
55885
  kind: "Scalar",
54969
- fieldName: "scoreDelta",
55886
+ fieldName: "minScore",
54970
55887
  alias: null,
54971
55888
  arguments: null,
54972
55889
  isUpdatable: false
54973
55890
  },
54974
55891
  {
54975
55892
  kind: "Scalar",
54976
- fieldName: "passFlip",
55893
+ fieldName: "maxScore",
54977
55894
  alias: null,
54978
55895
  arguments: null,
54979
55896
  isUpdatable: false
54980
55897
  },
54981
55898
  {
54982
55899
  kind: "Scalar",
54983
- fieldName: "instability",
55900
+ fieldName: "failed",
54984
55901
  alias: null,
54985
55902
  arguments: null,
54986
55903
  isUpdatable: false
@@ -55001,14 +55918,76 @@ var readerAst36 = [
55001
55918
  },
55002
55919
  {
55003
55920
  kind: "Scalar",
55004
- fieldName: "turnIndex",
55921
+ fieldName: "messageRefId",
55922
+ alias: null,
55923
+ arguments: null,
55924
+ isUpdatable: false
55925
+ }
55926
+ ]
55927
+ }
55928
+ ]
55929
+ }
55930
+ ]
55931
+ },
55932
+ {
55933
+ kind: "Linked",
55934
+ fieldName: "failureReasons",
55935
+ alias: null,
55936
+ arguments: [
55937
+ [
55938
+ "first",
55939
+ {
55940
+ kind: "Literal",
55941
+ value: 25
55942
+ }
55943
+ ]
55944
+ ],
55945
+ condition: null,
55946
+ isUpdatable: false,
55947
+ refetchQueryIndex: null,
55948
+ selections: [
55949
+ {
55950
+ kind: "Linked",
55951
+ fieldName: "edges",
55952
+ alias: null,
55953
+ arguments: null,
55954
+ condition: null,
55955
+ isUpdatable: false,
55956
+ refetchQueryIndex: null,
55957
+ selections: [
55958
+ {
55959
+ kind: "Linked",
55960
+ fieldName: "node",
55961
+ alias: null,
55962
+ arguments: null,
55963
+ condition: null,
55964
+ isUpdatable: false,
55965
+ refetchQueryIndex: null,
55966
+ selections: [
55967
+ {
55968
+ kind: "Scalar",
55969
+ fieldName: "key",
55970
+ alias: null,
55971
+ arguments: null,
55972
+ isUpdatable: false
55973
+ },
55974
+ {
55975
+ kind: "Scalar",
55976
+ fieldName: "kind",
55005
55977
  alias: null,
55006
55978
  arguments: null,
55007
55979
  isUpdatable: false
55008
55980
  },
55009
55981
  {
55010
55982
  kind: "Scalar",
55011
- fieldName: "messageRefId",
55983
+ fieldName: "reason",
55984
+ alias: null,
55985
+ arguments: null,
55986
+ isUpdatable: false
55987
+ },
55988
+ {
55989
+ kind: "Scalar",
55990
+ fieldName: "count",
55012
55991
  alias: null,
55013
55992
  arguments: null,
55014
55993
  isUpdatable: false
@@ -55078,7 +56057,7 @@ var artifact60 = {
55078
56057
  var resolver_reader_default37 = artifact60;
55079
56058
 
55080
56059
  // simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/query_text.ts
55081
- var query_text_default24 = "query EntrypointSimulatorVerifyPage($workspaceId: ID!) { workspace____id___v_workspaceId: workspace(id: $workspaceId) { id, scenarioRuns____first___l_50: scenarioRuns(first: 50) { edges { node { __typename, id, error, finishedAt, startedAt, status, }, }, }, verification { batches____first___l_50: batches(first: 50) { edges { node { id, active, completed, failed, finishedAt, graderId, metrics { agreementRate, instabilityCount, outliers____first___l_25: outliers(first: 25) { edges { node { agreementRate, instability, key, label, maxRunId, messageRefId, minRunId, passFlip, sampleSize, scoreDelta, turnIndex, }, }, }, sampleSize, scoreSpreadMax, scoreSpreadMedian, scoreSpreadMin, verdict, verdictReason, }, requested, requests____first___l_50: requests(first: 50) { edges { node { id, error, runId, status, }, }, }, scenarioRunId, startedAt, status, workspaceId, }, }, }, graderDecks____first___l_50: graderDecks(first: 50) { edges { node { id, description, label, path, }, }, }, }, },}";
56060
+ var query_text_default24 = "query EntrypointSimulatorVerifyPage($workspaceId: ID!) { workspace____id___v_workspaceId: workspace(id: $workspaceId) { id, scenarioDecks { id, description, label, path, }, verification { batches____first___l_50: batches(first: 50) { edges { node { id, active, completed, failed, finishedAt, graderId, graderRepeatsPerScenario, metrics { executionFailureCount, failureReasons____first___l_25: failureReasons(first: 25) { edges { node { count, key, kind, reason, }, }, }, gradeSampleCountCompleted, gradeSampleCountFailed, gradeSampleCountRequested, gradingFailureCount, outlierScenarioRuns____first___l_25: outlierScenarioRuns(first: 25) { edges { node { averageScore, completedSampleCount, executionFailureCount, failed, gradeSampleCount, gradingFailureCount, key, maxRunId, maxScore, messageRefId, minRunId, minScore, scenarioRunId, }, }, }, passRate, scenarioRunCountCompleted, scenarioRunCountFailed, scenarioRunCountRequested, scoreMax, scoreMean, scoreMedian, scoreMin, }, requested, requests____first___l_200: requests(first: 200) { edges { node { id, error, runId, scenarioRunId, status, }, }, }, scenarioDeckId, scenarioRuns, scenarioRunsCompleted, scenarioRunsFailed, startedAt, status, workspaceId, }, }, }, graderDecks____first___l_50: graderDecks(first: 50) { edges { node { id, description, label, path, }, }, }, }, },}";
55082
56061
 
55083
56062
  // simulator-ui/__generated__/__isograph/Query/EntrypointSimulatorVerifyPage/normalization_ast.ts
55084
56063
  var normalizationAst24 = {
@@ -55105,63 +56084,29 @@ var normalizationAst24 = {
55105
56084
  },
55106
56085
  {
55107
56086
  kind: "Linked",
55108
- fieldName: "scenarioRuns",
55109
- arguments: [
55110
- [
55111
- "first",
55112
- {
55113
- kind: "Literal",
55114
- value: 50
55115
- }
55116
- ]
55117
- ],
55118
- concreteType: "WorkspaceScenarioRunsConnection",
56087
+ fieldName: "scenarioDecks",
56088
+ arguments: null,
56089
+ concreteType: "WorkspaceScenarioDeck",
55119
56090
  selections: [
55120
56091
  {
55121
- kind: "Linked",
55122
- fieldName: "edges",
55123
- arguments: null,
55124
- concreteType: "WorkspaceScenarioRunsConnectionEdge",
55125
- selections: [
55126
- {
55127
- kind: "Linked",
55128
- fieldName: "node",
55129
- arguments: null,
55130
- concreteType: null,
55131
- selections: [
55132
- {
55133
- kind: "Scalar",
55134
- fieldName: "__typename",
55135
- arguments: null
55136
- },
55137
- {
55138
- kind: "Scalar",
55139
- fieldName: "id",
55140
- arguments: null
55141
- },
55142
- {
55143
- kind: "Scalar",
55144
- fieldName: "error",
55145
- arguments: null
55146
- },
55147
- {
55148
- kind: "Scalar",
55149
- fieldName: "finishedAt",
55150
- arguments: null
55151
- },
55152
- {
55153
- kind: "Scalar",
55154
- fieldName: "startedAt",
55155
- arguments: null
55156
- },
55157
- {
55158
- kind: "Scalar",
55159
- fieldName: "status",
55160
- arguments: null
55161
- }
55162
- ]
55163
- }
55164
- ]
56092
+ kind: "Scalar",
56093
+ fieldName: "id",
56094
+ arguments: null
56095
+ },
56096
+ {
56097
+ kind: "Scalar",
56098
+ fieldName: "description",
56099
+ arguments: null
56100
+ },
56101
+ {
56102
+ kind: "Scalar",
56103
+ fieldName: "label",
56104
+ arguments: null
56105
+ },
56106
+ {
56107
+ kind: "Scalar",
56108
+ fieldName: "path",
56109
+ arguments: null
55165
56110
  }
55166
56111
  ]
55167
56112
  },
@@ -55227,6 +56172,11 @@ var normalizationAst24 = {
55227
56172
  fieldName: "graderId",
55228
56173
  arguments: null
55229
56174
  },
56175
+ {
56176
+ kind: "Scalar",
56177
+ fieldName: "graderRepeatsPerScenario",
56178
+ arguments: null
56179
+ },
55230
56180
  {
55231
56181
  kind: "Linked",
55232
56182
  fieldName: "metrics",
@@ -55235,17 +56185,84 @@ var normalizationAst24 = {
55235
56185
  selections: [
55236
56186
  {
55237
56187
  kind: "Scalar",
55238
- fieldName: "agreementRate",
56188
+ fieldName: "executionFailureCount",
56189
+ arguments: null
56190
+ },
56191
+ {
56192
+ kind: "Linked",
56193
+ fieldName: "failureReasons",
56194
+ arguments: [
56195
+ [
56196
+ "first",
56197
+ {
56198
+ kind: "Literal",
56199
+ value: 25
56200
+ }
56201
+ ]
56202
+ ],
56203
+ concreteType: "WorkspaceVerifyMetricsFailureReasonsConnection",
56204
+ selections: [
56205
+ {
56206
+ kind: "Linked",
56207
+ fieldName: "edges",
56208
+ arguments: null,
56209
+ concreteType: "WorkspaceVerifyMetricsFailureReasonsConnectionEdge",
56210
+ selections: [
56211
+ {
56212
+ kind: "Linked",
56213
+ fieldName: "node",
56214
+ arguments: null,
56215
+ concreteType: "WorkspaceVerifyFailureReasonGroup",
56216
+ selections: [
56217
+ {
56218
+ kind: "Scalar",
56219
+ fieldName: "count",
56220
+ arguments: null
56221
+ },
56222
+ {
56223
+ kind: "Scalar",
56224
+ fieldName: "key",
56225
+ arguments: null
56226
+ },
56227
+ {
56228
+ kind: "Scalar",
56229
+ fieldName: "kind",
56230
+ arguments: null
56231
+ },
56232
+ {
56233
+ kind: "Scalar",
56234
+ fieldName: "reason",
56235
+ arguments: null
56236
+ }
56237
+ ]
56238
+ }
56239
+ ]
56240
+ }
56241
+ ]
56242
+ },
56243
+ {
56244
+ kind: "Scalar",
56245
+ fieldName: "gradeSampleCountCompleted",
55239
56246
  arguments: null
55240
56247
  },
55241
56248
  {
55242
56249
  kind: "Scalar",
55243
- fieldName: "instabilityCount",
56250
+ fieldName: "gradeSampleCountFailed",
56251
+ arguments: null
56252
+ },
56253
+ {
56254
+ kind: "Scalar",
56255
+ fieldName: "gradeSampleCountRequested",
56256
+ arguments: null
56257
+ },
56258
+ {
56259
+ kind: "Scalar",
56260
+ fieldName: "gradingFailureCount",
55244
56261
  arguments: null
55245
56262
  },
55246
56263
  {
55247
56264
  kind: "Linked",
55248
- fieldName: "outliers",
56265
+ fieldName: "outlierScenarioRuns",
55249
56266
  arguments: [
55250
56267
  [
55251
56268
  "first",
@@ -55255,73 +56272,83 @@ var normalizationAst24 = {
55255
56272
  }
55256
56273
  ]
55257
56274
  ],
55258
- concreteType: "WorkspaceVerifyMetricsOutliersConnection",
56275
+ concreteType: "WorkspaceVerifyMetricsOutlierScenarioRunsConnection",
55259
56276
  selections: [
55260
56277
  {
55261
56278
  kind: "Linked",
55262
56279
  fieldName: "edges",
55263
56280
  arguments: null,
55264
- concreteType: "WorkspaceVerifyMetricsOutliersConnectionEdge",
56281
+ concreteType: "WorkspaceVerifyMetricsOutlierScenarioRunsConnectionEdge",
55265
56282
  selections: [
55266
56283
  {
55267
56284
  kind: "Linked",
55268
56285
  fieldName: "node",
55269
56286
  arguments: null,
55270
- concreteType: "WorkspaceVerifyOutlier",
56287
+ concreteType: "WorkspaceVerifyScenarioOutlier",
55271
56288
  selections: [
55272
56289
  {
55273
56290
  kind: "Scalar",
55274
- fieldName: "agreementRate",
56291
+ fieldName: "averageScore",
55275
56292
  arguments: null
55276
56293
  },
55277
56294
  {
55278
56295
  kind: "Scalar",
55279
- fieldName: "instability",
56296
+ fieldName: "completedSampleCount",
55280
56297
  arguments: null
55281
56298
  },
55282
56299
  {
55283
56300
  kind: "Scalar",
55284
- fieldName: "key",
56301
+ fieldName: "executionFailureCount",
55285
56302
  arguments: null
55286
56303
  },
55287
56304
  {
55288
56305
  kind: "Scalar",
55289
- fieldName: "label",
56306
+ fieldName: "failed",
55290
56307
  arguments: null
55291
56308
  },
55292
56309
  {
55293
56310
  kind: "Scalar",
55294
- fieldName: "maxRunId",
56311
+ fieldName: "gradeSampleCount",
55295
56312
  arguments: null
55296
56313
  },
55297
56314
  {
55298
56315
  kind: "Scalar",
55299
- fieldName: "messageRefId",
56316
+ fieldName: "gradingFailureCount",
55300
56317
  arguments: null
55301
56318
  },
55302
56319
  {
55303
56320
  kind: "Scalar",
55304
- fieldName: "minRunId",
56321
+ fieldName: "key",
56322
+ arguments: null
56323
+ },
56324
+ {
56325
+ kind: "Scalar",
56326
+ fieldName: "maxRunId",
56327
+ arguments: null
56328
+ },
56329
+ {
56330
+ kind: "Scalar",
56331
+ fieldName: "maxScore",
55305
56332
  arguments: null
55306
56333
  },
55307
56334
  {
55308
56335
  kind: "Scalar",
55309
- fieldName: "passFlip",
56336
+ fieldName: "messageRefId",
55310
56337
  arguments: null
55311
56338
  },
55312
56339
  {
55313
56340
  kind: "Scalar",
55314
- fieldName: "sampleSize",
56341
+ fieldName: "minRunId",
55315
56342
  arguments: null
55316
56343
  },
55317
56344
  {
55318
56345
  kind: "Scalar",
55319
- fieldName: "scoreDelta",
56346
+ fieldName: "minScore",
55320
56347
  arguments: null
55321
56348
  },
55322
56349
  {
55323
56350
  kind: "Scalar",
55324
- fieldName: "turnIndex",
56351
+ fieldName: "scenarioRunId",
55325
56352
  arguments: null
55326
56353
  }
55327
56354
  ]
@@ -55332,32 +56359,42 @@ var normalizationAst24 = {
55332
56359
  },
55333
56360
  {
55334
56361
  kind: "Scalar",
55335
- fieldName: "sampleSize",
56362
+ fieldName: "passRate",
56363
+ arguments: null
56364
+ },
56365
+ {
56366
+ kind: "Scalar",
56367
+ fieldName: "scenarioRunCountCompleted",
56368
+ arguments: null
56369
+ },
56370
+ {
56371
+ kind: "Scalar",
56372
+ fieldName: "scenarioRunCountFailed",
55336
56373
  arguments: null
55337
56374
  },
55338
56375
  {
55339
56376
  kind: "Scalar",
55340
- fieldName: "scoreSpreadMax",
56377
+ fieldName: "scenarioRunCountRequested",
55341
56378
  arguments: null
55342
56379
  },
55343
56380
  {
55344
56381
  kind: "Scalar",
55345
- fieldName: "scoreSpreadMedian",
56382
+ fieldName: "scoreMax",
55346
56383
  arguments: null
55347
56384
  },
55348
56385
  {
55349
56386
  kind: "Scalar",
55350
- fieldName: "scoreSpreadMin",
56387
+ fieldName: "scoreMean",
55351
56388
  arguments: null
55352
56389
  },
55353
56390
  {
55354
56391
  kind: "Scalar",
55355
- fieldName: "verdict",
56392
+ fieldName: "scoreMedian",
55356
56393
  arguments: null
55357
56394
  },
55358
56395
  {
55359
56396
  kind: "Scalar",
55360
- fieldName: "verdictReason",
56397
+ fieldName: "scoreMin",
55361
56398
  arguments: null
55362
56399
  }
55363
56400
  ]
@@ -55375,7 +56412,7 @@ var normalizationAst24 = {
55375
56412
  "first",
55376
56413
  {
55377
56414
  kind: "Literal",
55378
- value: 50
56415
+ value: 200
55379
56416
  }
55380
56417
  ]
55381
56418
  ],
@@ -55408,6 +56445,11 @@ var normalizationAst24 = {
55408
56445
  fieldName: "runId",
55409
56446
  arguments: null
55410
56447
  },
56448
+ {
56449
+ kind: "Scalar",
56450
+ fieldName: "scenarioRunId",
56451
+ arguments: null
56452
+ },
55411
56453
  {
55412
56454
  kind: "Scalar",
55413
56455
  fieldName: "status",
@@ -55421,7 +56463,22 @@ var normalizationAst24 = {
55421
56463
  },
55422
56464
  {
55423
56465
  kind: "Scalar",
55424
- fieldName: "scenarioRunId",
56466
+ fieldName: "scenarioDeckId",
56467
+ arguments: null
56468
+ },
56469
+ {
56470
+ kind: "Scalar",
56471
+ fieldName: "scenarioRuns",
56472
+ arguments: null
56473
+ },
56474
+ {
56475
+ kind: "Scalar",
56476
+ fieldName: "scenarioRunsCompleted",
56477
+ arguments: null
56478
+ },
56479
+ {
56480
+ kind: "Scalar",
56481
+ fieldName: "scenarioRunsFailed",
55425
56482
  arguments: null
55426
56483
  },
55427
56484
  {
@@ -55826,18 +56883,19 @@ function AppShell(props) {
55826
56883
  /* @__PURE__ */ (0, import_jsx_runtime66.jsxs)("div", {
55827
56884
  className: "top-nav-left",
55828
56885
  children: [
55829
- /* @__PURE__ */ (0, import_jsx_runtime66.jsxs)(Button, {
56886
+ /* @__PURE__ */ (0, import_jsx_runtime66.jsx)(Button, {
55830
56887
  "data-testid": "nav-sessions",
55831
56888
  className: classNames("sessions-toggle", drawerOpen && "active"),
55832
56889
  variant: "secondary",
55833
56890
  onClick: () => setDrawerOpen(true),
55834
- children: [
55835
- /* @__PURE__ */ (0, import_jsx_runtime66.jsx)(Icon, {
55836
- name: "hamburgerMenu",
55837
- size: 14
55838
- }),
55839
- "Sessions"
55840
- ]
56891
+ "aria-label": "Open sessions drawer",
56892
+ children: /* @__PURE__ */ (0, import_jsx_runtime66.jsx)(Icon, {
56893
+ name: "hamburgerMenu",
56894
+ size: 17,
56895
+ style: {
56896
+ color: "var(--color-text)"
56897
+ }
56898
+ })
55841
56899
  }),
55842
56900
  /* @__PURE__ */ (0, import_jsx_runtime66.jsxs)("div", {
55843
56901
  className: "top-nav-buttons tab-anchor-group",
@@ -55925,7 +56983,14 @@ function AppShell(props) {
55925
56983
  variant: "secondary",
55926
56984
  onClick: () => setWorkbenchOpen((prev) => !prev),
55927
56985
  disabled: !canOpenWorkbench,
55928
- children: "Workbench"
56986
+ "aria-label": workbenchVisible ? "Close workbench drawer" : "Open workbench drawer",
56987
+ children: /* @__PURE__ */ (0, import_jsx_runtime66.jsx)(Icon, {
56988
+ name: "chat",
56989
+ size: 16,
56990
+ style: {
56991
+ color: "currentColor"
56992
+ }
56993
+ })
55929
56994
  })
55930
56995
  ]
55931
56996
  })
@@ -56417,8 +57482,12 @@ function isBuildChatDebugEnabled() {
56417
57482
  const value = new URLSearchParams(search).get("gambitBuildChatDebug");
56418
57483
  if (value === "1" || value === "true") return true;
56419
57484
  }
56420
- if (typeof globalThis.localStorage === "undefined") return false;
56421
- const stored = (globalThis.localStorage.getItem("gambit:build-chat-debug") ?? "").toLowerCase().trim();
57485
+ let stored = "";
57486
+ try {
57487
+ stored = (globalThis.localStorage?.getItem("gambit:build-chat-debug") ?? "").toLowerCase().trim();
57488
+ } catch {
57489
+ return false;
57490
+ }
56422
57491
  return stored === "1" || stored === "true" || stored === "yes";
56423
57492
  }
56424
57493
  var BUILD_CHAT_DEBUG = isBuildChatDebugEnabled();