PyPI - python-flexeval - Versions diffs - 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

python-flexeval 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

flexeval/__about__.py +1 -0
flexeval/__init__.py +2 -0
flexeval/classes/eval_runner.py +1 -1
flexeval/compute_metrics.py +1 -1
flexeval/configuration/function_metrics.py +2 -2
flexeval/db_utils.py +8 -1
flexeval/metrics/__init__.py +1 -1
flexeval/metrics/access.py +24 -2
flexeval/schema/eval_schema.py +10 -0
flexeval/schema/evalrun_schema.py +1 -1
flexeval/schema/rubric_schema.py +1 -1
{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/METADATA +7 -5
{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/RECORD +16 -15
{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/WHEEL +0 -0
{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/entry_points.txt +0 -0
{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/licenses/LICENSE +0 -0

flexeval/__about__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.2.0"

flexeval/__init__.py CHANGED Viewed

@@ -4,8 +4,10 @@ This top-level import exposes the :func:`~flexeval.runner.run` method."""
 from flexeval import metrics
 from flexeval.runner import run
+from .__about__ import __version__
 __all__ = [
     "metrics",
     "run",
+    "__version__",
 ]

flexeval/classes/eval_runner.py CHANGED Viewed

@@ -111,7 +111,7 @@ class EvalRunner:
     def load_evaluation_settings(self):
         """This function parses our eval suite and puts it in the data structure we'll need
-        for easy use at run-time
+        for easy use at run-time.
         """
         # if the current eval has a 'config' entry, overwrite configuration options with its entries
         if (

flexeval/compute_metrics.py CHANGED Viewed

@@ -42,7 +42,7 @@ class ObjectMetric:
 class MetricGraphBuilder:
-    """Builds :class:`networkx.DiGraph`\s of :class:`~flexeval.compute_metrics.ObjectMetric` instances that reflect any computational dependencies between them."""
+    """Builds :class:`networkx.DiGraph` s of :class:`~flexeval.compute_metrics.ObjectMetric` instances that reflect any computational dependencies between them."""
     def __init__(self):
         # key: tuple(metric_level, metric_id, object_id)

flexeval/configuration/function_metrics.py CHANGED Viewed

@@ -122,8 +122,8 @@ def is_role(object: Union[Turn, Message], role: str) -> dict:
     and 0 otherwise.
     Args:
-    object: the Turn or Message
-    role: a string with the desired role to check against
+        object: the Turn or Message
+        role: a string with the desired role to check against
     """
     return {role: int(object.role == role)}

flexeval/db_utils.py CHANGED Viewed

@@ -14,6 +14,11 @@ from flexeval.classes.turn import Turn
 DATABASE_TABLES = [EvalSetRun, Dataset, Thread, Turn, Message, ToolCall, Metric]
+def ensure_database(database_path: str):
+    if not classes_base.database.is_connection_usable():
+        initialize_database(database_path)
 def initialize_database(database_path: str, clear_tables: bool = False):
     classes_base.database.init(database_path)
     # classes_base.database.start()
@@ -34,5 +39,7 @@ def bind_to_database(database_path: str) -> pw.Database:
     new_database = classes_base.create_sqlite_database(database_path)
     new_database.bind(DATABASE_TABLES)
     # Verify the binding worked by checking one of the models
-    assert classes_base.BaseModel._meta.database == new_database
+    assert classes_base.BaseModel._meta.database == new_database, (
+        f"Binding to '{database_path}' failed."
+    )
     return new_database

flexeval/metrics/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Utility functions for accessing metrics."""
+"""Utility functions for working with metrics."""
 from flexeval.metrics import access, save

flexeval/metrics/access.py CHANGED Viewed

@@ -1,6 +1,8 @@
+"""Utility functions for accessing metrics."""
 from collections import Counter
-from flexeval.classes import metric
+from flexeval.classes import metric, message, turn, thread
 def count_dict_values(lst: list[dict]) -> dict[str, Counter]:
@@ -21,8 +23,28 @@ def count_dict_values(lst: list[dict]) -> dict[str, Counter]:
     return counts
-def get_all_metrics() -> list:
+def get_all_metrics() -> list[dict]:
     results = []
     for m in metric.Metric.select():
         results.append(m.__data__.copy())
     return results
+def get_first_user_message_for_threads(thread_ids: set) -> list[dict]:
+    """Get the first user message in each thread.
+    Args:
+        thread_ids (set): The set of thread IDs to retrieve messages for.
+    Returns:
+        list[dict]: An iterable of messages.
+    """
+    return (
+        message.Message.select()
+        .join(thread.Thread)
+        .where(thread.Thread.id.in_(thread_ids))
+        .where(message.Message.role == "user")
+        .join(turn.Turn)
+        .where(turn.Turn.index_in_thread == 0)
+        .dicts()
+    )

flexeval/schema/eval_schema.py CHANGED Viewed

@@ -16,6 +16,8 @@ MetricLevel = Literal["Message", "Turn", "Thread", "ToolCall"]
 class DependsOnItem(BaseModel):
+    """Defines a metric dependency."""
     class Config:
         extra = "forbid"
@@ -56,6 +58,8 @@ class DependsOnItem(BaseModel):
 class MetricItem(BaseModel):
+    "Defines a metric."
     name: str = Field(
         ...,
         description="The function to call or name of rubric to use to compute this metric.",
@@ -72,6 +76,8 @@ class MetricItem(BaseModel):
 class FunctionItem(MetricItem):
+    """Defines a metric computed from a Python function."""
     kwargs: schema_utils.OptionalDict = Field(
         default_factory=dict,
         description="Keyword arguments for the function. Each key must correspond to an argument in the function. Extra keys will cause an error.",
@@ -80,6 +86,8 @@ class FunctionItem(MetricItem):
 class RubricItem(MetricItem):
+    """Defines a metric computed from a rubric."""
     # TODO is RubricItem.kwargs actually used?
     kwargs: Optional[Dict[str, Any]] = Field(
         default_factory=dict,
@@ -115,6 +123,8 @@ class CompletionLlm(BaseModel):
 class GraderLlm(BaseModel):
+    """Defines the LLM used for evaluating rubrics."""
     class Config:
         extra = "forbid"

flexeval/schema/evalrun_schema.py CHANGED Viewed

@@ -37,7 +37,7 @@ class FileDataSource(DataSource):
 class FunctionsCollection(BaseModel):
-    """Collection of functions that can be used as :class:`~flexeval.schema.eval_schema.FunctionItem`\s."""
+    """Collection of functions that can be used as :class:`~flexeval.schema.eval_schema.FunctionItem` s."""
     functions: list[Callable] = Field(
         default_factory=list,

flexeval/schema/rubric_schema.py CHANGED Viewed

@@ -32,7 +32,7 @@ class Rubric(BaseModel):
 class RubricsCollection(BaseModel):
-    """Collection of rubrics that can be used as :class:`~flexeval.schema.eval_schema.RubricItem`\s."""
+    """Collection of rubrics that can be used as :class:`~flexeval.schema.eval_schema.RubricItem` s."""
     rubrics: dict[str, Rubric] = Field(
         default_factory=dict,

{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-flexeval
-Version: 0.1.5
+Version: 0.2.0
 Summary: FlexEval is a tool for designing custom metrics, completion functions, and LLM-graded rubrics for evaluating the behavior of LLM-powered systems.
 Project-URL: Homepage, https://digitalharborfoundation.github.io/FlexEval/
 Project-URL: GitHub, https://github.com/DigitalHarborFoundation/FlexEval
@@ -40,10 +40,12 @@ Description-Content-Type: text/markdown
 # FlexEval LLM Evals
+[![PyPi](https://img.shields.io/pypi/v/python-flexeval)](https://pypi.org/project/python-flexeval/)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12729993.svg)](https://doi.org/10.5281/zenodo.12729993)
 [![License](https://img.shields.io/github/license/DigitalHarborFoundation/FlexEval)](https://github.com/DigitalHarborFoundation/FlexEval/blob/main/LICENSE)
+[![GitHub issues](https://img.shields.io/badge/issue_tracking-github-blue.svg)](https://github.com/DigitalHarborFoundation/FlexEval/issues)
-![FlexEval banner](/docs/_static/flexeval_banner.svg)
+![FlexEval banner](https://raw.githubusercontent.com/DigitalHarborFoundation/FlexEval/refs/heads/main/docs/_static/flexeval_banner.svg)
 FlexEval is a tool for designing custom metrics, completion functions, and LLM-graded rubrics for evaluating the behavior of LLM-powered systems.
@@ -73,7 +75,7 @@ flexeval.run(eval_run)
 This example computes [Flesch reading ease](https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease) for every turn in a list of conversations provided in JSONL format. The metric values are stored in an SQLite database called `eval_results.db`.
-See additional usage examples in the [vignettes](/vignettes).
+See additional usage examples in the [vignettes](https://github.com/DigitalHarborFoundation/FlexEval/tree/main/vignettes).
 ## Installation
@@ -97,7 +99,7 @@ FlexEval is designed to be "batteries included" for many basic use cases. It sup
 - a set of useful rubrics
 - a set of useful Python functions
-Evaluation results are saved in an SQLite database. See the [Metric Analysis](/vignettes/metric_analysis.ipynb) vignette for a sample analysis demonstrating the structure and utility of the data saved by FlexEval.
+Evaluation results are saved in an SQLite database. See the [Metric Analysis](https://digitalharborfoundation.github.io/FlexEval/generated/vignettes/metric_analysis.html) vignette for a sample analysis demonstrating the structure and utility of the data saved by FlexEval.
 Read more in the [Getting Started](https://digitalharborfoundation.github.io/FlexEval/getting_started.html) guide.
@@ -115,4 +117,4 @@ Pull requests are welcome. Feel free to contribute:
 - Bug fixes
 - New features
-See [DEVELOPMENT.md](DEVELOPMENT.md).
+See [DEVELOPMENT.md](https://github.com/DigitalHarborFoundation/FlexEval/tree/main/DEVELOPMENT.md).

{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,12 @@
-flexeval/__init__.py,sha256=FIVIg06yxMU_RHPpx22QtX94hnS8Ce7gCjOcQ2pECMc,337
+flexeval/__about__.py,sha256=Zn1KFblwuFHiDRdRAiRnDBRkbPttWh44jKa5zG2ov0E,22
+flexeval/__init__.py,sha256=UXI_xdSxnGAK2plDODBbPF3df-N7E9YJ418QHK7XN-Q,391
 flexeval/__main__.py,sha256=c9NQqsea3e-_6b736gBeIO3O_zdXQ1wtY3-Scj5NiPg,126
 flexeval/cli.py,sha256=RwtRk121OivbLQyYpYxJ7PugPIYQ8J4qXHFN2SxxPy4,2985
 flexeval/completions.py,sha256=pi_tYK4m3vKSqAC1ym9Jc3e4srcQSXfx-mX4qI5qisQ,5686
-flexeval/compute_metrics.py,sha256=elQZvuh2jyateWzwIPm8RLHASq-XqFMinEIA0rlMkj8,37277
+flexeval/compute_metrics.py,sha256=4X6XFk0qUKcaCDllNeJreuhlnDHmfRPlsf0f8fWFOxA,37277
 flexeval/config.yaml,sha256=dpkFdW0rKf7StGoVeIGaCNw0n0yOfYWig0xmIfsDdbg,530
 flexeval/data_loader.py,sha256=EKc6wdpQuhrB2ai2U_fQxojzt1RR716ELisiZXpfu58,25311
-flexeval/db_utils.py,sha256=FKekqWAZ0oQbYNvw0bxuzHcZxlSsKKJkUhyfod-pMLg,1412
+flexeval/db_utils.py,sha256=2jgqexLCAqShvgPrImZz12UkMZtfERhP8iXjratXYok,1612
 flexeval/dependency_graph.py,sha256=SaG9gjkw2Q0NykqQWs4JzPkv5sMj2aXXmhjJ7yRkV4Q,10539
 flexeval/eval_schema.json,sha256=BQetj8O0_4rorj3Mpqk-sj_SCaRkGMrvBUcxhuw6zLE,13111
 flexeval/function_types.py,sha256=eH8NadQRw7XAOXAOKWYN6b7urjr57J5WzdiVyzh0Wb4,6898
@@ -17,7 +18,7 @@ flexeval/runner.py,sha256=X6ZfjfwIM3ymN_kHfRt_JSKPxpDxs_MWQPrvWhl2L7I,4340
 flexeval/classes/__init__.py,sha256=fywDMYX8W-nXFKRXolzn-RWd_7tiJr6FlouQJvYSoyE,347
 flexeval/classes/base.py,sha256=xxkTa8joPe39CFwveeTPW56LW-x7rsi5oBAIxrvM5iI,944
 flexeval/classes/dataset.py,sha256=Y_EdEIuhx526SSvkqk2tFBzkOgBkVY-5FeraYMtU5lo,2913
-flexeval/classes/eval_runner.py,sha256=-jkPlKhTWX0FpUDrzCaUIlIIlKsSAmDy06T4I1aB3Ds,6269
+flexeval/classes/eval_runner.py,sha256=ZvCpyaD7lorDK_mYJSZqQbvI6FfLbIWRFHNarWTAMQU,6270
 flexeval/classes/eval_set_run.py,sha256=fq_wBOaxuq7dLxiZIw76WGIwhRBNbQWDUhpiK0wDG_A,1116
 flexeval/classes/message.py,sha256=zuDm_v1gmK49Fw5m-HTWiqndrI_xtLotlXD8nhRDDTg,7518
 flexeval/classes/metric.py,sha256=d8l39_QwnQDmTJvy9TIulU4p0jqD7ldMUi4m5zfK2Es,2806
@@ -27,21 +28,21 @@ flexeval/classes/turn.py,sha256=kLmgnYQ-4a8sydzGK1HTQRyUDXZIedmt_NFR3shLJFE,8635
 flexeval/configuration/__init__.py,sha256=wP_gpYyaEp5DxCSH8-4KHchH07JMZZOk8eCFMfd5LBw,75
 flexeval/configuration/completion_functions.py,sha256=-N0iFAfcYcm35S78M3ES4MBkLXpDeEfy2Qq1ORHGBXE,7491
 flexeval/configuration/evals.yaml,sha256=3mbD3gEccTDotm8kj4doYTujqRD_PkGhCVhjQaSEqSs,22651
-flexeval/configuration/function_metrics.py,sha256=UqCCl_xoG6kH6jRset0m1FQoAfUrqt9bqipxAshN5_A,22419
+flexeval/configuration/function_metrics.py,sha256=SGCxCAfG5NfKop-d3_uJgF83nPrlfHAhd-TU0GpEPFY,22427
 flexeval/configuration/rubric_metrics.yaml,sha256=JfE6gPj4LtM2v0b5-Zge3NwM17YgJEBZXzTVn9UL7zk,9424
 flexeval/io/__init__.py,sha256=MqdgcPzkFpSnOEz-e2GNNd8XOI_DbyNjIP8AT5eqUqI,101
 flexeval/io/parsers/yaml_parser.py,sha256=2yE6j_RM_YG5nkNUWZckrymh61n28AG46lqnPSlWitk,1818
-flexeval/metrics/__init__.py,sha256=zBg-thOos5X1-YUH70PkdMqFnPdsrTM0Bt3fIjhfxDM,131
-flexeval/metrics/access.py,sha256=U-IhG_dhC8HZ9BMnBKHiEvHretUuAnzuUWJ288XuPiA,681
+flexeval/metrics/__init__.py,sha256=qrgUhTXzezAOoABhck3hMVN-c2Bwn7CTg-e_P2w7PlA,134
+flexeval/metrics/access.py,sha256=mP89IUNTWpHguMEdjjh_deMxdiyClb61hg3k7Jcus-o,1299
 flexeval/metrics/save.py,sha256=8x9ifRiHtQT7_WeMP0XmYK1zfourXMnHkGZy_iR0Xcc,1643
 flexeval/schema/__init__.py,sha256=4OA6Q7Dguz-uaulwoRsrtaoReFmyNsKqyi_CvfDV4-c,379
 flexeval/schema/config_schema.py,sha256=LkmtiOLfPsX1u_6Ey6gFbRr8tQwxqcuLcyf-xYcBf9o,1619
-flexeval/schema/eval_schema.py,sha256=95kCkiGS67TfpVUfUaBdBMoKIpUJoY1beUgLWwg5Ljk,6373
-flexeval/schema/evalrun_schema.py,sha256=LE6RmNHeRJIRye68xUMOaknWMNLcugfnQoUEkeP1JRs,3526
-flexeval/schema/rubric_schema.py,sha256=9DaqU-Av6XMig7iIy3EObLhEkhjtYIxeCqpovKLYfYw,1615
+flexeval/schema/eval_schema.py,sha256=iHMbanW4Ef_sp51KiaZKeP3Dn4Z6pWCGa7N2SPvsFK0,6607
+flexeval/schema/evalrun_schema.py,sha256=M7JY01DhlLzwZc2jJTIeGPs9vt6TFMPir51MFhtRllA,3526
+flexeval/schema/rubric_schema.py,sha256=uxcf7MHWKW3EmABUnWeCinGUP6LBjskiq7zkEPHmAvU,1615
 flexeval/schema/schema_utils.py,sha256=Fg1foqRA-9X-hl_vqIF3bpYdE51hNEgdw739Q-s3iQc,698
-python_flexeval-0.1.5.dist-info/METADATA,sha256=LPvBmYMMKpyxgStPchWxj1fhBYoNbbdb7-UgQX2b4CY,5095
-python_flexeval-0.1.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-python_flexeval-0.1.5.dist-info/entry_points.txt,sha256=wSyluqXhrX3xySVYAtM-Kv23p4OauKQCSBuNNfzEGtI,52
-python_flexeval-0.1.5.dist-info/licenses/LICENSE,sha256=OlAu_c13gw6-fJ9UdhZBMeNr5STLrnWG_0Hv0SCXtu4,1082
-python_flexeval-0.1.5.dist-info/RECORD,,
+python_flexeval-0.2.0.dist-info/METADATA,sha256=bEifn06Ok5-8YllS4uYxBN2KNuZvf7vJg8b_GarkttU,5599
+python_flexeval-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+python_flexeval-0.2.0.dist-info/entry_points.txt,sha256=wSyluqXhrX3xySVYAtM-Kv23p4OauKQCSBuNNfzEGtI,52
+python_flexeval-0.2.0.dist-info/licenses/LICENSE,sha256=OlAu_c13gw6-fJ9UdhZBMeNr5STLrnWG_0Hv0SCXtu4,1082
+python_flexeval-0.2.0.dist-info/RECORD,,

{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{python_flexeval-0.1.5.dist-info → python_flexeval-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

python-flexeval 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

python-flexeval 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl