python-flexeval 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,7 +49,6 @@ multimodels-langgraph-rubric-dependencies:
49
49
  rubric:
50
50
  # check if student requested a plot in their last message
51
51
  - name: is_request_for_plot
52
- # context_only: false
53
52
  metric_level: Turn
54
53
  depends_on:
55
54
  - name: is_role
@@ -134,7 +133,6 @@ test-multimodels-langgraph-rubric-dependencies:
134
133
 
135
134
  rubric:
136
135
  - name: is_request_for_plot
137
- # context_only: false
138
136
  metric_level: Turn
139
137
  depends_on:
140
138
  - name: is_role
@@ -181,7 +179,6 @@ test-multimodels-langgraph:
181
179
 
182
180
  rubric:
183
181
  - name: no_plot_after_student_requested
184
- context_only: false
185
182
  depends_on:
186
183
  - name: is_role
187
184
  type: function
@@ -267,7 +264,6 @@ issues-fixing-rubrics:
267
264
  metric_min_value: 1
268
265
 
269
266
  - name: no_plot_after_student_requested
270
- context_only: false
271
267
  depends_on:
272
268
  - name: is_role
273
269
  type: function
@@ -277,7 +273,6 @@ issues-fixing-rubrics:
277
273
  metric_min_value: 1
278
274
 
279
275
  - name: desmos_code_rendered_on_toolcall
280
- context_only: false
281
276
  metric_level: Turn
282
277
  depends_on:
283
278
  - name: count_tool_calls
@@ -326,7 +321,6 @@ test-multiple-rubrics-files:
326
321
 
327
322
  rubric:
328
323
  - name: plot_matches_description
329
- context_only: false
330
324
  metric_level: Turn
331
325
  depends_on:
332
326
  - name: count_tool_calls
@@ -348,7 +342,6 @@ test-multiple-rubrics-files:
348
342
  metric_min_value: 1
349
343
 
350
344
  - name: plot_matches_description_example_project
351
- context_only: false
352
345
  metric_level: Turn
353
346
  depends_on:
354
347
  - name: count_tool_calls
@@ -379,17 +372,12 @@ test-new:
379
372
  function:
380
373
  - name: string_length
381
374
 
382
- - name: openai_moderation_api #run for every turn by default
383
- context_only: false #this is the default
384
-
385
375
  - name: openai_moderation_api
386
- context_only: true
387
-
376
+
388
377
  rubric:
389
378
  - name: yeasayer_completion
390
379
  depends_on:
391
380
  - name: openai_moderation_api
392
- context_only: true
393
381
  metric_min_value: 0.1
394
382
 
395
383
  completion_llm:
@@ -451,11 +439,7 @@ example:
451
439
 
452
440
  - name: count_messages_per_role
453
441
 
454
- - name: openai_moderation_api #run for every turn by default
455
- context_only: false #this is the default
456
-
457
442
  - name: openai_moderation_api
458
- context_only: true
459
443
 
460
444
  - name: flesch_kincaid_grade
461
445
 
@@ -463,11 +447,9 @@ example:
463
447
  - name: yeasayer_completion
464
448
  depends_on:
465
449
  - name: openai_moderation_api
466
- context_only: true
467
450
  metric_min_value: 0.1
468
451
 
469
452
  - name: is_request_for_plot
470
- context_only: true
471
453
  depends_on:
472
454
  - name: is_role
473
455
  type: function
@@ -798,7 +780,6 @@ plotting_some_rubrics:
798
780
 
799
781
  rubric:
800
782
  - name: is_request_for_plot
801
- context_only: false
802
783
  metric_level: Turn
803
784
  depends_on:
804
785
  - name: is_role
@@ -808,7 +789,6 @@ plotting_some_rubrics:
808
789
  metric_min_value: 1
809
790
 
810
791
  - name: is_student_acting_as_tutor
811
- context_only: false
812
792
  metric_level: Turn
813
793
  depends_on:
814
794
  - name: is_role
@@ -818,14 +798,12 @@ plotting_some_rubrics:
818
798
  metric_min_value: 1
819
799
 
820
800
  - name: is_pedagogically_appropriate_plot
821
- context_only: false
822
801
  metric_level: Turn
823
802
  depends_on:
824
803
  - name: count_tool_calls
825
804
  metric_min_value: 1
826
-
805
+
827
806
  - name: plot_matches_description
828
- context_only: false
829
807
  metric_level: Turn
830
808
  depends_on:
831
809
  - name: count_tool_calls
@@ -837,7 +815,6 @@ plotting_some_rubrics:
837
815
  metric_max_value: 0
838
816
 
839
817
  - name: plot_bounds_are_sufficiently_large
840
- context_only: false
841
818
  metric_level: Turn
842
819
  depends_on:
843
820
  - name: count_tool_calls