PyPI - cbrkit - Versions diffs - 0.12.2__tar.gz → 0.13.0__tar.gz - Mend

cbrkit 0.12.2tar.gz → 0.13.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{cbrkit-0.12.2 → cbrkit-0.13.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cbrkit
-Version: 0.12.2
+Version: 0.13.0
 Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
 Home-page: https://wi2trier.github.io/cbrkit/
 License: MIT
@@ -299,9 +299,9 @@ result = cbrkit.retrieval.apply(casebase, query, (retriever1, retriever2))
 The result has the following two attributes:
 - `final`: Result of the last retriever in the list.
-- `intermediates`: A list of results for each retriever in the list.
+- `steps`: A list of results for each retriever in the list.
-Both `final` and each entry in `intermediates` have the same attributes as discussed previously.
+Both `final` and each entry in `steps` have the same attributes as discussed previously.
 The returned result also has these entries which are an alias for the corresponding entries in `final` (i.e., `result.ranking == result.final.ranking`).
 ## REST API and CLI

{cbrkit-0.12.2 → cbrkit-0.13.0}/README.md RENAMED Viewed

@@ -244,9 +244,9 @@ result = cbrkit.retrieval.apply(casebase, query, (retriever1, retriever2))
 The result has the following two attributes:
 - `final`: Result of the last retriever in the list.
-- `intermediates`: A list of results for each retriever in the list.
+- `steps`: A list of results for each retriever in the list.
-Both `final` and each entry in `intermediates` have the same attributes as discussed previously.
+Both `final` and each entry in `steps` have the same attributes as discussed previously.
 The returned result also has these entries which are an alias for the corresponding entries in `final` (i.e., `result.ranking == result.final.ranking`).
 ## REST API and CLI

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/api.py RENAMED Viewed

@@ -1,5 +1,8 @@
 from typing import Any
+from pydantic import ConfigDict
+from pydantic.dataclasses import dataclass
 try:
     from fastapi import FastAPI
     from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -9,6 +12,10 @@ except ModuleNotFoundError:
 import cbrkit
+ApiResult = dataclass(
+    cbrkit.retrieval.Result, config=ConfigDict(arbitrary_types_allowed=True)
+)
 class Settings(BaseSettings):
     model_config = SettingsConfigDict(env_prefix="cbrkit_")
@@ -33,7 +40,7 @@ elif settings.retriever_map is not None:
     retriever = list(retriever_map.values())
-@app.post("/retrieve", response_model=None)
+@app.post("/retrieve", response_model=dict[str, ApiResult])
 def all_retrievers(
     casebase: dict[str, Any], queries: dict[str, Any]
 ) -> dict[str, cbrkit.retrieval.Result]:
@@ -43,7 +50,7 @@ def all_retrievers(
     }
-@app.post("/retrieve/{retriever_name}", response_model=None)
+@app.post("/retrieve/{retriever_name}", response_model=dict[str, ApiResult])
 def named_retriever(
     retriever_name: str, casebase: dict[str, Any], queries: dict[str, Any]
 ) -> dict[str, cbrkit.retrieval.Result]:

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/cli.py RENAMED Viewed

@@ -2,6 +2,7 @@
 .. include:: ../cli.md
 """
+import json
 import os
 import sys
 from pathlib import Path
@@ -26,21 +27,34 @@ def app_callback():
     pass
+# py -m cbrkit retrieve data/cars-1k.csv data/cars-queries.csv examples.cars_retriever:retriever --output-path data/output.json
 @app.command()
 def retrieve(
     casebase_path: Path,
     queries_path: Path,
     retriever: str,
+    search_path: Annotated[list[Path], typer.Option(default_factory=list)],
     print_ranking: bool = True,
+    output_path: Path | None = None,
+    processes: int = 1,
 ) -> None:
+    sys.path.extend(str(x) for x in search_path)
     casebase = cbrkit.loaders.path(casebase_path)
     queries = cbrkit.loaders.path(queries_path)
     retrievers = cbrkit.retrieval.load(retriever)
-    for query_name, query in queries.items():
-        result = cbrkit.retrieval.apply(casebase, query, retrievers)
+    results = cbrkit.retrieval.mapply(casebase, queries, retrievers, processes)
+    if output_path:
+        results_dict = {
+            query_name: result.as_dict() for query_name, result in results.items()
+        }
+        with output_path.with_suffix(".json").open("w") as fp:
+            json.dump(results_dict, fp, indent=2)
-        if print_ranking:
+    if print_ranking:
+        for query_name, result in results.items():
             print(f"Query: {query_name}")
             print(result.ranking)
             print()

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/helpers.py RENAMED Viewed

@@ -1,4 +1,3 @@
-from abc import ABC
 from collections.abc import Collection, Iterable, Mapping, Sequence
 from inspect import signature as inspect_signature
 from typing import Any, cast
@@ -21,7 +20,6 @@ __all__ = [
     "sim2map",
     "unpack_sim",
     "unpack_sims",
-    "AbstractFloat",
     "singleton",
 ]
@@ -156,8 +154,3 @@ def unpack_sim(sim: AnyFloat) -> float:
 def unpack_sims(sims: Iterable[AnyFloat]) -> list[float]:
     return [unpack_sim(sim) for sim in sims]
-class AbstractFloat(ABC, float):
-    def __new__(cls, *args, **kwargs):
-        return float.__new__(cls, args[0])

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/loaders.py RENAMED Viewed

@@ -4,7 +4,6 @@ This module provides several loaders to read data from different file formats an
 import csv as csvlib
 import tomllib
-from collections import abc
 from collections.abc import Callable, Iterator, Mapping
 from importlib import import_module
 from pathlib import Path
@@ -58,7 +57,7 @@ def python(import_name: str) -> Any:
     return getattr(module, obj_name)
-class DataFrameCasebase(abc.Mapping):
+class DataFrameCasebase(Mapping):
     __slots__ = ("df",)
     df: DataFrame

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/retrieval.py RENAMED Viewed

@@ -1,5 +1,6 @@
+import multiprocessing as mp
 from collections.abc import Callable, Collection, Mapping, Sequence
-from dataclasses import dataclass
+from dataclasses import asdict, dataclass
 from typing import Any, Generic
 from cbrkit.helpers import sim2map, unpack_sim
@@ -16,11 +17,12 @@ from cbrkit.typing import (
 __all__ = [
     "build",
+    "mapply",
     "apply",
     "load",
     "load_map",
     "Result",
-    "SingleResult",
+    "ResultStep",
 ]
@@ -31,7 +33,7 @@ def _similarities2ranking(
 @dataclass(slots=True)
-class SingleResult(Generic[KeyType, ValueType, SimType]):
+class ResultStep(Generic[KeyType, ValueType, SimType]):
     similarities: SimMap[KeyType, SimType]
     ranking: list[KeyType]
     casebase: Casebase[KeyType, ValueType]
@@ -41,24 +43,32 @@ class SingleResult(Generic[KeyType, ValueType, SimType]):
         cls,
         similarities: SimMap[KeyType, SimType],
         full_casebase: Casebase[KeyType, ValueType],
-    ) -> "SingleResult[KeyType, ValueType, SimType]":
+    ) -> "ResultStep[KeyType, ValueType, SimType]":
         ranking = _similarities2ranking(similarities)
         casebase = {key: full_casebase[key] for key in ranking}
         return cls(similarities=similarities, ranking=ranking, casebase=casebase)
+    def as_dict(self) -> dict[str, Any]:
+        x = asdict(self)
+        del x["casebase"]
+        return x
 @dataclass(slots=True)
 class Result(Generic[KeyType, ValueType, SimType]):
-    final: SingleResult[KeyType, ValueType, SimType]
-    intermediates: list[SingleResult[KeyType, ValueType, SimType]]
+    steps: list[ResultStep[KeyType, ValueType, SimType]]
     def __init__(
         self,
-        results: list[SingleResult[KeyType, ValueType, SimType]],
+        steps: list[ResultStep[KeyType, ValueType, SimType]],
     ) -> None:
-        self.final = results[-1]
-        self.intermediates = results
+        self.steps = steps
+    @property
+    def final(self) -> ResultStep[KeyType, ValueType, SimType]:
+        return self.steps[-1]
     @property
     def similarities(self) -> SimMap[KeyType, SimType]:
@@ -72,6 +82,51 @@ class Result(Generic[KeyType, ValueType, SimType]):
     def casebase(self) -> Casebase[KeyType, ValueType]:
         return self.final.casebase
+    def as_dict(self) -> dict[str, Any]:
+        x = asdict(self)
+        for entry in x["steps"]:
+            del entry["casebase"]
+        return x
+def mapply(
+    casebase: Casebase[KeyType, ValueType],
+    queries: Mapping[KeyType, ValueType],
+    retrievers: SimMapFunc[KeyType, ValueType, SimType]
+    | Sequence[SimMapFunc[KeyType, ValueType, SimType]],
+    processes: int = 1,
+) -> Mapping[KeyType, Result[KeyType, ValueType, SimType]]:
+    """Applies multiple queries to a Casebase using retriever functions.
+    Args:
+        casebase: The casebase for the query.
+        queries: The queries that will be applied to the casebase
+        retrievers: Retriever functions that will retrieve similar cases (compared to the query) from the casebase
+        processes: Number of CPUs that will be used for multiprocessing.
+            If 1, a regular loop will be used.
+            If 0, the number of processes will be equal to the number of CPUs.
+            Negative values will be treated as 0.
+    Returns:
+        Returns an object of type Result.
+    """
+    if processes != 1:
+        pool_processes = None if processes <= 0 else processes
+        with mp.Pool(pool_processes) as pool:
+            return {
+                key: pool.apply(
+                    apply,
+                    args=(casebase, value, retrievers),
+                )
+                for key, value in queries.items()
+            }
+    return {key: apply(casebase, value, retrievers) for key, value in queries.items()}
 def apply(
     casebase: Casebase[KeyType, ValueType],
@@ -79,7 +134,7 @@ def apply(
     retrievers: SimMapFunc[KeyType, ValueType, SimType]
     | Sequence[SimMapFunc[KeyType, ValueType, SimType]],
 ) -> Result[KeyType, ValueType, SimType]:
-    """Applies a query to a Casebase using retriever functions.
+    """Applies a single query to a Casebase using retriever functions.
     Args:
         casebase: The casebase for the query.
@@ -117,12 +172,12 @@ def apply(
         retrievers = [retrievers]
     assert len(retrievers) > 0
-    results: list[SingleResult[KeyType, ValueType, SimType]] = []
+    results: list[ResultStep[KeyType, ValueType, SimType]] = []
     current_casebase = casebase
     for retriever_func in retrievers:
         sim_map = retriever_func(current_casebase, query)
-        result = SingleResult.build(sim_map, current_casebase)
+        result = ResultStep.build(sim_map, current_casebase)
         results.append(result)
         current_casebase = result.casebase

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/sim/_aggregator.py RENAMED Viewed

@@ -63,6 +63,12 @@ def aggregator(
         >>> agg = aggregator("mean")
         >>> agg([0.5, 0.75, 1.0])
         0.75
+        >>> agg = aggregator("mean", {1: 1, 2: 1, 3: 0})
+        >>> agg({1: 1, 2: 1, 3: 1})
+        1.0
+        >>> agg = aggregator("mean", {1: 1, 2: 1, 3: 2})
+        >>> agg({1: 1, 2: 1, 3: 1})
+        1.0
     """
     pooling_func = _pooling_funcs[pooling] if isinstance(pooling, str) else pooling
@@ -70,6 +76,7 @@ def aggregator(
     def wrapped_func(similarities: SimSeqOrMap[KeyType, AnyFloat]) -> float:
         assert pooling_weights is None or type(similarities) is type(pooling_weights)
+        pooling_factor = 1.0
         sims: Sequence[float]  # noqa: F821
         if isinstance(similarities, Mapping) and isinstance(pooling_weights, Mapping):
@@ -77,6 +84,10 @@ def aggregator(
                 unpack_sim(sim) * pooling_weights.get(key, default_pooling_weight)
                 for key, sim in similarities.items()
             ]
+            pooling_factor = len(similarities) / sum(
+                pooling_weights.get(key, default_pooling_weight)
+                for key in similarities.keys()
+            )
         elif isinstance(similarities, Sequence) and isinstance(
             pooling_weights, Sequence
         ):
@@ -84,6 +95,7 @@ def aggregator(
                 unpack_sim(s) * w
                 for s, w in zip(similarities, pooling_weights, strict=True)
             ]
+            pooling_factor = len(similarities) / sum(pooling_weights)
         elif isinstance(similarities, Sequence) and pooling_weights is None:
             sims = [unpack_sim(s) for s in similarities]
         elif isinstance(similarities, Mapping) and pooling_weights is None:
@@ -91,6 +103,6 @@ def aggregator(
         else:
             raise NotImplementedError()
-        return pooling_func(sims)
+        return pooling_func(sims) * pooling_factor
     return wrapped_func

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/sim/collections.py RENAMED Viewed

@@ -8,7 +8,17 @@ from cbrkit.typing import FloatProtocol, SimPairFunc, SimType, ValueType
 Number = float | int
-__all__ = ["jaccard", "smith_waterman", "dtw"]
+__all__ = [
+    "jaccard",
+    "smith_waterman",
+    "dtw",
+    "isolated_mapping",
+    "mapping",
+    "sequence_mapping",
+    "sequence_correctness",
+    "SequenceSim",
+    "Weight",
+]
 def jaccard() -> SimPairFunc[Collection[Any], float]:

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/sim/graph/_model.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from collections.abc import Hashable
 from dataclasses import dataclass
-from typing import Generic, Protocol, TypeVar
+from typing import Generic, Protocol, TypeVar, runtime_checkable
 NodeKey = TypeVar("NodeKey")
 NodeData = TypeVar("NodeData")
@@ -9,12 +9,14 @@ EdgeData = TypeVar("EdgeData")
 GraphData = TypeVar("GraphData")
+@runtime_checkable
 class EdgeProtocol(Hashable, Protocol[EdgeData, NodeKey]):
     source: NodeKey
     target: NodeKey
     data: EdgeData
+@runtime_checkable
 class NodeProtocol(Hashable, Protocol[NodeData]):
     data: NodeData

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/sim/numbers.py RENAMED Viewed

@@ -4,15 +4,37 @@ from cbrkit.typing import SimPairFunc
 Number = float | int
-__all__ = ["linear", "threshold", "exponential", "sigmoid"]
+__all__ = ["linear_interval", "linear", "threshold", "exponential", "sigmoid"]
+def linear_interval(min: float, max: float) -> SimPairFunc[Number, float]:
+    """Linear similarity function based on the distance between two values within a range.
+    Args:
+        min: Lower bound of the interval. Should be the minimal value of the entire case base.
+        max: Upper bound of the interval. Should be the maximal value of the entire case base.
+    Examples:
+        >>> sim = linear_interval(1950, 2000)
+        >>> sim(1950, 1975)
+        0.5
+    """
+    def wrapped_func(x: Number, y: Number) -> float:
+        if x < min or x > max or y < min or y > max:
+            return 0.0
+        return 1.0 - abs(x - y) / (max - min)
+    return wrapped_func
 def linear(max: float, min: float = 0.0) -> SimPairFunc[Number, float]:
-    """Linear similarity function.
+    """Linear similarity function based on the distance between two values.
     Args:
-        max: Maximum bound of the interval
-        min: Minimum bound of the interval
+        max: Maximum bound of the distance (i.e., the point where the similarity is 0.0)
+        min: Minimum bound of the distance (i.e., the point where the similarity is 1.0)
     ![linear](../../assets/numeric/linear.png)

{cbrkit-0.12.2 → cbrkit-0.13.0}/cbrkit/typing.py RENAMED Viewed

@@ -1,11 +1,9 @@
 from collections.abc import Mapping, Sequence
 from pathlib import Path
-from typing import (
-    Protocol,
-    TypeVar,
-)
+from typing import Protocol, TypeVar, runtime_checkable
+@runtime_checkable
 class FloatProtocol(Protocol):
     value: float

{cbrkit-0.12.2 → cbrkit-0.13.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cbrkit"
-version = "0.12.2"
+version = "0.13.0"
 description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI."
 authors = ["Mirko Lenz <mirko@mirkolenz.com>"]
 license = "MIT"