PyPI - cbrkit - Versions diffs - 0.19.2__tar.gz → 0.20.0__tar.gz - Mend

cbrkit 0.19.2tar.gz → 0.20.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

{cbrkit-0.19.2 → cbrkit-0.20.0}/PKG-INFO +77 -58
{cbrkit-0.19.2 → cbrkit-0.20.0}/README.md +27 -15
cbrkit-0.20.0/pyproject.toml +111 -0
cbrkit-0.20.0/setup.cfg +4 -0
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/__init__.py +4 -0
cbrkit-0.20.0/src/cbrkit/__main__.py +3 -0
cbrkit-0.20.0/src/cbrkit/adapt/__init__.py +18 -0
cbrkit-0.20.0/src/cbrkit/adapt/_attribute_value.py +90 -0
cbrkit-0.20.0/src/cbrkit/adapt/generic.py +86 -0
cbrkit-0.20.0/src/cbrkit/adapt/numbers.py +52 -0
cbrkit-0.20.0/src/cbrkit/adapt/strings.py +103 -0
cbrkit-0.20.0/src/cbrkit/api.py +127 -0
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/cli.py +34 -12
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/eval/_common.py +2 -2
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/eval/_retrieval.py +2 -3
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/helpers.py +87 -5
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/loaders.py +71 -103
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/retrieval.py +16 -65
cbrkit-0.20.0/src/cbrkit/reuse.py +345 -0
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/__init__.py +1 -2
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/_aggregator.py +6 -5
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/_attribute_value.py +15 -26
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/collections.py +2 -2
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/generic.py +2 -2
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/_isomorphism.py +8 -7
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/numbers.py +2 -2
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/strings/__init__.py +7 -10
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/strings/taxonomy.py +3 -3
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/typing.py +36 -9
cbrkit-0.20.0/src/cbrkit.egg-info/PKG-INFO +390 -0
cbrkit-0.20.0/src/cbrkit.egg-info/SOURCES.txt +42 -0
cbrkit-0.20.0/src/cbrkit.egg-info/dependency_links.txt +1 -0
cbrkit-0.20.0/src/cbrkit.egg-info/entry_points.txt +2 -0
cbrkit-0.20.0/src/cbrkit.egg-info/requires.txt +74 -0
cbrkit-0.20.0/src/cbrkit.egg-info/top_level.txt +1 -0
cbrkit-0.20.0/tests/test_cycle.py +51 -0
cbrkit-0.20.0/tests/test_retrieve.py +159 -0
cbrkit-0.20.0/tests/test_reuse.py +158 -0
cbrkit-0.19.2/cbrkit/__main__.py +0 -3
cbrkit-0.19.2/cbrkit/adaptation.py +0 -17
cbrkit-0.19.2/cbrkit/api.py +0 -74
cbrkit-0.19.2/pyproject.toml +0 -130
{cbrkit-0.19.2 → cbrkit-0.20.0}/LICENSE +0 -0
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/eval/__init__.py +0 -0
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/py.typed +0 -0
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/__init__.py +0 -0
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/_astar.py +0 -0
{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/_model.py +0 -0

{cbrkit-0.19.2 → cbrkit-0.20.0}/PKG-INFO RENAMED Viewed

@@ -1,13 +1,13 @@
 Metadata-Version: 2.1
 Name: cbrkit
-Version: 0.19.2
-Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
-Home-page: https://wi2trier.github.io/cbrkit/
-License: MIT
+Version: 0.20.0
+Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
+Author-email: Mirko Lenz <mirko@mirkolenz.com>
+Project-URL: Repository, https://github.com/wi2trier/cbrkit
+Project-URL: Documentation, https://wi2trier.github.io/cbrkit/
+Project-URL: Issues, https://github.com/wi2trier/cbrkit/issues
+Project-URL: Changelog, https://github.com/wi2trier/cbrkit/releases
 Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
-Author: Mirko Lenz
-Author-email: mirko@mirkolenz.com
-Requires-Python: >=3.12,<4.0
 Classifier: Development Status :: 4 - Beta
 Classifier: Environment :: Console
 Classifier: Framework :: Pytest
@@ -16,56 +16,64 @@ Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Natural Language :: English
 Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Scientific/Engineering :: Information Analysis
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Topic :: Utilities
 Classifier: Typing :: Typed
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: immutables<1,>=0.21
+Requires-Dist: orjson<4,>=3
+Requires-Dist: polars<2,>=1
+Requires-Dist: pyyaml<7,>=6
+Requires-Dist: xmltodict<1,>=0.13
 Provides-Extra: all
+Requires-Dist: cbrkit[api,cli,eval,graphs,llm,nlp,timeseries,transformers]; extra == "all"
+Requires-Dist: numpy<2,>=1; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "all"
+Requires-Dist: numpy<3,>=2; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "all"
+Requires-Dist: numpy<3,>=2; sys_platform == "linux" and extra == "all"
+Requires-Dist: pandas<3,>=2; extra == "all"
+Requires-Dist: pydantic<3,>=2; extra == "all"
+Requires-Dist: scipy<2,>=1; extra == "all"
 Provides-Extra: api
+Requires-Dist: cbrkit[cli]; extra == "api"
+Requires-Dist: fastapi<1,>=0.100; extra == "api"
+Requires-Dist: pydantic-settings<3,>=2; extra == "api"
+Requires-Dist: uvicorn[standard]<1,>=0.30; extra == "api"
 Provides-Extra: cli
+Requires-Dist: rich<14,>=13; extra == "cli"
+Requires-Dist: typer<1,>=0.9; extra == "cli"
 Provides-Extra: eval
+Requires-Dist: ranx<1,>=0.3; extra == "eval"
+Provides-Extra: graphs
+Requires-Dist: networkx<4,>=3; extra == "graphs"
+Requires-Dist: rustworkx<1,>=0.15; extra == "graphs"
 Provides-Extra: llm
+Requires-Dist: cohere<6,>=5; extra == "llm"
+Requires-Dist: ollama<1,>=0.3; extra == "llm"
+Requires-Dist: openai<2,>=1; extra == "llm"
 Provides-Extra: nlp
+Requires-Dist: levenshtein<0.26,>=0.23; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "nlp"
+Requires-Dist: levenshtein<1,>=0.26; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "nlp"
+Requires-Dist: levenshtein<1,>=0.26; sys_platform == "linux" and extra == "nlp"
+Requires-Dist: nltk<4,>=3; extra == "nlp"
+Requires-Dist: spacy<3.8,>=3.7; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "nlp"
+Requires-Dist: spacy<4,>=3.8; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "nlp"
+Requires-Dist: spacy<4,>=3.8; sys_platform == "linux" and extra == "nlp"
 Provides-Extra: timeseries
+Requires-Dist: minineedle<4,>=3; extra == "timeseries"
+Requires-Dist: dtaidistance<3,>=2; extra == "timeseries"
 Provides-Extra: transformers
-Requires-Dist: cohere (>=5,<6) ; extra == "all" or extra == "llm"
-Requires-Dist: dtaidistance (>=2.3,<3.0) ; extra == "all" or extra == "timeseries"
-Requires-Dist: fastapi[all] (>=0.100,<1.0) ; extra == "all" or extra == "api"
-Requires-Dist: immutables (>=0.21,<1.0)
-Requires-Dist: levenshtein (>=0.23,<0.26) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "all" or extra == "nlp")
-Requires-Dist: levenshtein (>=0.23,<1) ; (sys_platform == "darwin" and platform_machine == "arm64") and (extra == "all" or extra == "nlp")
-Requires-Dist: levenshtein (>=0.23,<1) ; (sys_platform == "linux") and (extra == "all" or extra == "nlp")
-Requires-Dist: minineedle (>=3.1,<4.0) ; extra == "all" or extra == "timeseries"
-Requires-Dist: networkx (>=3.0,<4.0) ; extra == "all"
-Requires-Dist: nltk (>=3.8,<4.0) ; extra == "all" or extra == "nlp"
-Requires-Dist: numpy (>=1.26,<3.0) ; extra == "all"
-Requires-Dist: ollama (>=0.3,<1) ; extra == "all" or extra == "llm"
-Requires-Dist: openai (>=1.50,<2.0) ; extra == "all" or extra == "llm"
-Requires-Dist: orjson (>=3.9,<4.0)
-Requires-Dist: pandas (>=2.1,<3.0)
-Requires-Dist: polars (>=1.5,<2.0) ; extra == "all"
-Requires-Dist: pyarrow (>=13.0)
-Requires-Dist: pydantic (>=2.0,<3.0)
-Requires-Dist: pyyaml (>=6.0,<7.0)
-Requires-Dist: ranx (>=0.3,<1.0) ; extra == "all" or extra == "eval"
-Requires-Dist: rich (>=13.7,<14.0) ; extra == "all" or extra == "api" or extra == "cli"
-Requires-Dist: rustworkx (>=0.15,<1.0)
-Requires-Dist: scipy (>=1.12,<2.0) ; extra == "all"
-Requires-Dist: sentence-transformers (>=2.2,<4.0) ; extra == "all" or extra == "transformers"
-Requires-Dist: spacy (>=3.8,<4.0) ; extra == "all" or extra == "nlp"
-Requires-Dist: torch (>=2.2,<2.3) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "all" or extra == "transformers")
-Requires-Dist: torch (>=2.2,<3.0) ; (sys_platform == "darwin" and platform_machine == "arm64") and (extra == "all" or extra == "transformers")
-Requires-Dist: torch (>=2.2,<3.0) ; (sys_platform == "linux") and (extra == "all" or extra == "transformers")
-Requires-Dist: transformers (>=4.35,<5.0) ; extra == "all" or extra == "transformers"
-Requires-Dist: typer (>=0.9,<1.0) ; extra == "all" or extra == "api" or extra == "cli"
-Requires-Dist: uvicorn[standard] (>=0.24,<1.0) ; extra == "all" or extra == "api"
-Requires-Dist: xmltodict (>=0.13,<1.0)
-Project-URL: Repository, https://github.com/wi2trier/cbrkit
-Description-Content-Type: text/markdown
+Requires-Dist: sentence-transformers<4,>=3; extra == "transformers"
+Requires-Dist: torch<2.3,>=2.2; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "transformers"
+Requires-Dist: torch<3,>=2.5; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "transformers"
+Requires-Dist: torch<3,>=2.5; sys_platform == "linux" and extra == "transformers"
+Requires-Dist: transformers<5,>=4; extra == "transformers"
 <!-- markdownlint-disable MD033 MD041 -->
 <h2><p align="center">CBRkit</p></h2>
@@ -109,6 +117,8 @@ The following modules are part of CBRkit:
 - `cbrkit.loaders`: Functions for loading cases and queries.
 - `cbrkit.sim`: Similarity generator functions for common data types like strings and numbers.
 - `cbrkit.retrieval`: Functions for defining and applying retrieval pipelines.
+- `cbrkit.adapt`: Adaptation generator functions for adapting cases based on a query.
+- `cbrkit.reuse`: Functions for defining and applying reuse pipelines.
 - `cbrkit.typing`: Generic type definitions for defining custom functions.
 ## Installation
@@ -127,25 +137,28 @@ pip install cbrkit[EXTRA_NAME,...]
 where `EXTRA_NAME` is one of the following:
+- `all`: All optional dependencies
+- `api`: REST API Server
+- `cli`: Command Line Interface (CLI)
+- `eval`: Evaluation tools for common metrics like `precision` and `recall`
+- `llm`: Large Language Models (LLM) APIs like Ollama and OpenAI
 - `nlp`: Standalone NLP tools `levenshtein`, `nltk`, `openai`, and `spacy`
+- `timeseries`: Time series similarity measures like `dtw` and `smith_waterman`
 - `transformers`: Advanced NLP tools based on `pytorch` and `transformers`
-- `cli`: Command Line Interface (CLI)
-- `api`: REST API Server
-- `all`: All of the above
 ## Loading Cases
 The first step is to load cases and queries.
 We provide predefined functions for the most common formats like CSV, JSON, and XML.
-Additionally, CBRkit also integrates with `pandas` for loading data frames.
-The following example shows how to load cases and queries from a CSV file using `pandas`:
+Additionally, CBRkit also integrates with `polars` and `pandas` for loading data frames.
+The following example shows how to load cases and queries from a CSV file using `polars`:
 ```python
-import pandas as pd
+import polars as pl
 import cbrkit
-df = pd.read_csv("path/to/cases.csv")
-casebase = cbrkit.loaders.pandas(df)
+df = pl.read_csv("path/to/cases.csv")
+casebase = cbrkit.loaders.polars(df)
 ```
 When dealing with formats like JSON, the files can be loaded directly:
@@ -160,17 +173,14 @@ CBRkit expects the type of the queries to match the type of the cases.
 You may define a single query directly in Python as follows
 ```python
-# for pandas
-query = pd.Series({"name": "John", "age": 25})
-# for json
 query = {"name": "John", "age": 25}
 ```
 If you have a collection of queries, you can load them using the same loader functions as for the cases.
 ```python
- # for pandas
-queries = cbrkit.loaders.pandas(pd.read_csv("path/to/queries.csv"))
+ # for polars
+queries = cbrkit.loaders.polars(pl.read_csv("path/to/queries.csv"))
 # for json
 queries = cbrkit.loaders.json("path/to/queries.json")
 ```
@@ -294,8 +304,6 @@ Our result has the following attributes:
 - `ranking` A list of case indices sorted by their similarity score.
 - `casebase` The casebase containing only the retrieved cases (useful for downstream tasks).
-## Combining Multiple Retrieval Pipelines
 In some cases, it is useful to combine multiple retrieval pipelines, for example when applying the MAC/FAC pattern where a cheap pre-filter is applied to the whole casebase before a more expensive similarity measure is applied on the remaining cases.
 To use this pattern, first create the corresponding retrievers using the builder:
@@ -318,6 +326,18 @@ The result has the following two attributes:
 Both `final` and each entry in `steps` have the same attributes as discussed previously.
 The returned result also has these entries which are an alias for the corresponding entries in `final` (i.e., `result.ranking == result.final.ranking`).
+## Adaptation Functions
+Coming soon...
+## Reuse
+Coming soon...
+## Evaluation
+Coming soon...
 ## REST API and CLI
 In order to use the built-in API and CLI, you need to define a retriever in a Python module using the function `cbrkit.retrieval.build()`.
@@ -368,4 +388,3 @@ It offers a single endpoint `/retrieve` that accepts POST requests with a JSON b
 ```
 The server will return a JSON object containing the retrieval results for each query.

{cbrkit-0.19.2 → cbrkit-0.20.0}/README.md RENAMED Viewed

@@ -40,6 +40,8 @@ The following modules are part of CBRkit:
 - `cbrkit.loaders`: Functions for loading cases and queries.
 - `cbrkit.sim`: Similarity generator functions for common data types like strings and numbers.
 - `cbrkit.retrieval`: Functions for defining and applying retrieval pipelines.
+- `cbrkit.adapt`: Adaptation generator functions for adapting cases based on a query.
+- `cbrkit.reuse`: Functions for defining and applying reuse pipelines.
 - `cbrkit.typing`: Generic type definitions for defining custom functions.
 ## Installation
@@ -58,25 +60,28 @@ pip install cbrkit[EXTRA_NAME,...]
 where `EXTRA_NAME` is one of the following:
+- `all`: All optional dependencies
+- `api`: REST API Server
+- `cli`: Command Line Interface (CLI)
+- `eval`: Evaluation tools for common metrics like `precision` and `recall`
+- `llm`: Large Language Models (LLM) APIs like Ollama and OpenAI
 - `nlp`: Standalone NLP tools `levenshtein`, `nltk`, `openai`, and `spacy`
+- `timeseries`: Time series similarity measures like `dtw` and `smith_waterman`
 - `transformers`: Advanced NLP tools based on `pytorch` and `transformers`
-- `cli`: Command Line Interface (CLI)
-- `api`: REST API Server
-- `all`: All of the above
 ## Loading Cases
 The first step is to load cases and queries.
 We provide predefined functions for the most common formats like CSV, JSON, and XML.
-Additionally, CBRkit also integrates with `pandas` for loading data frames.
-The following example shows how to load cases and queries from a CSV file using `pandas`:
+Additionally, CBRkit also integrates with `polars` and `pandas` for loading data frames.
+The following example shows how to load cases and queries from a CSV file using `polars`:
 ```python
-import pandas as pd
+import polars as pl
 import cbrkit
-df = pd.read_csv("path/to/cases.csv")
-casebase = cbrkit.loaders.pandas(df)
+df = pl.read_csv("path/to/cases.csv")
+casebase = cbrkit.loaders.polars(df)
 ```
 When dealing with formats like JSON, the files can be loaded directly:
@@ -91,17 +96,14 @@ CBRkit expects the type of the queries to match the type of the cases.
 You may define a single query directly in Python as follows
 ```python
-# for pandas
-query = pd.Series({"name": "John", "age": 25})
-# for json
 query = {"name": "John", "age": 25}
 ```
 If you have a collection of queries, you can load them using the same loader functions as for the cases.
 ```python
- # for pandas
-queries = cbrkit.loaders.pandas(pd.read_csv("path/to/queries.csv"))
+ # for polars
+queries = cbrkit.loaders.polars(pl.read_csv("path/to/queries.csv"))
 # for json
 queries = cbrkit.loaders.json("path/to/queries.json")
 ```
@@ -225,8 +227,6 @@ Our result has the following attributes:
 - `ranking` A list of case indices sorted by their similarity score.
 - `casebase` The casebase containing only the retrieved cases (useful for downstream tasks).
-## Combining Multiple Retrieval Pipelines
 In some cases, it is useful to combine multiple retrieval pipelines, for example when applying the MAC/FAC pattern where a cheap pre-filter is applied to the whole casebase before a more expensive similarity measure is applied on the remaining cases.
 To use this pattern, first create the corresponding retrievers using the builder:
@@ -249,6 +249,18 @@ The result has the following two attributes:
 Both `final` and each entry in `steps` have the same attributes as discussed previously.
 The returned result also has these entries which are an alias for the corresponding entries in `final` (i.e., `result.ranking == result.final.ranking`).
+## Adaptation Functions
+Coming soon...
+## Reuse
+Coming soon...
+## Evaluation
+Coming soon...
 ## REST API and CLI
 In order to use the built-in API and CLI, you need to define a retriever in a Python module using the function `cbrkit.retrieval.build()`.

cbrkit-0.20.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,111 @@
+[project]
+name = "cbrkit"
+version = "0.20.0"
+description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
+authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
+readme = "README.md"
+keywords = [
+    "cbr",
+    "case-based reasoning",
+    "api",
+    "similarity",
+    "nlp",
+    "retrieval",
+    "cli",
+    "tool",
+    "library",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Framework :: Pytest",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Natural Language :: English",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Topic :: Utilities",
+    "Typing :: Typed",
+]
+requires-python = ">=3.12"
+dependencies = [
+    "immutables>=0.21,<1",
+    "orjson>=3,<4",
+    "polars>=1,<2",
+    "pyyaml>=6,<7",
+    "xmltodict>=0.13,<1",
+]
+[project.optional-dependencies]
+all = [
+    "cbrkit[api,cli,eval,graphs,llm,nlp,timeseries,transformers]",
+    "numpy>=1,<2; sys_platform == 'darwin' and platform_machine == 'x86_64'",
+    "numpy>=2,<3; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "numpy>=2,<3; sys_platform == 'linux'",
+    "pandas>=2,<3",
+    "pydantic>=2,<3",
+    "scipy>=1,<2",
+]
+api = [
+    "cbrkit[cli]",
+    "fastapi>=0.100,<1",
+    "pydantic-settings>=2,<3",
+    "uvicorn[standard]>=0.30,<1",
+]
+cli = ["rich>=13,<14", "typer>=0.9,<1"]
+eval = ["ranx>=0.3,<1"]
+graphs = ["networkx>=3,<4", "rustworkx>=0.15,<1"]
+llm = ["cohere>=5,<6", "ollama>=0.3,<1", "openai>=1,<2"]
+nlp = [
+    "levenshtein>=0.23,<0.26; sys_platform == 'darwin' and platform_machine == 'x86_64'",
+    "levenshtein>=0.26,<1; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "levenshtein>=0.26,<1; sys_platform == 'linux'",
+    "nltk>=3,<4",
+    "spacy>=3.7,<3.8; sys_platform == 'darwin' and platform_machine == 'x86_64'",
+    "spacy>=3.8,<4; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "spacy>=3.8,<4; sys_platform == 'linux'",
+]
+timeseries = ["minineedle>=3,<4", "dtaidistance>=2,<3"]
+transformers = [
+    "sentence-transformers>=3,<4",
+    "torch>=2.2,<2.3; sys_platform == 'darwin' and platform_machine == 'x86_64'",
+    "torch>=2.5,<3; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "torch>=2.5,<3; sys_platform == 'linux'",
+    "transformers>=4,<5",
+]
+[project.urls]
+Repository = "https://github.com/wi2trier/cbrkit"
+Documentation = "https://wi2trier.github.io/cbrkit/"
+Issues = "https://github.com/wi2trier/cbrkit/issues"
+Changelog = "https://github.com/wi2trier/cbrkit/releases"
+[project.scripts]
+cbrkit = "cbrkit.cli:app"
+[dependency-groups]
+test = ["pytest>=8,<9", "pytest-cov>=6,<7"]
+docs = ["pdoc>=15,<16"]
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+[tool.pytest.ini_options]
+addopts = "--cov cbrkit --cov-report term-missing --doctest-modules --ignore data --ignore examples --ignore result"
+doctest_optionflags = "NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS"
+[tool.uv]
+default-groups = ["test", "docs"]
+[tool.ruff]
+target-version = "py312"
+[tool.ruff.lint.pydocstyle]
+convention = "google"

cbrkit-0.20.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

{cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/__init__.py RENAMED Viewed

@@ -6,10 +6,12 @@
 """
 from . import (
+    adapt,
     eval,
     helpers,
     loaders,
     retrieval,
+    reuse,
     sim,
     typing,
 )
@@ -18,6 +20,8 @@ __all__ = [
     "loaders",
     "sim",
     "retrieval",
+    "adapt",
+    "reuse",
     "eval",
     "typing",
     "helpers",

cbrkit-0.20.0/src/cbrkit/__main__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .cli import app
+app()

cbrkit-0.20.0/src/cbrkit/adapt/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+CBRkit contains a selection of adaptation functions for different data types.
+Besides functions for standard data types like
+numbers (`cbrkit.adapt.numbers`),
+strings (`cbrkit.adapt.strings`),
+and generic data (`cbrkit.adapt.generic`),
+there is also a function for attribute-value data.
+"""
+from . import generic, numbers, strings
+from ._attribute_value import attribute_value
+__all__ = [
+    "generic",
+    "strings",
+    "numbers",
+    "attribute_value",
+]

cbrkit-0.20.0/src/cbrkit/adapt/_attribute_value.py ADDED Viewed

@@ -0,0 +1,90 @@
+from collections.abc import Callable, Mapping, Sequence
+from dataclasses import dataclass
+from typing import Any, override
+from ..helpers import get_metadata
+from ..typing import (
+    AdaptPairFunc,
+    JsonDict,
+    SupportsMetadata,
+)
+__all__ = ["attribute_value"]
+def default_value_getter(obj: Any, key: Any) -> Any:
+    if hasattr(obj, "__getitem__"):
+        return obj[key]
+    else:
+        return getattr(obj, key)
+def default_value_setter(obj: Any, key: Any, value: Any) -> None:
+    if hasattr(obj, "__setitem__"):
+        obj[key] = value
+    else:
+        setattr(obj, key, value)
+@dataclass(slots=True, frozen=True)
+class attribute_value[V](AdaptPairFunc[V], SupportsMetadata):
+    """Adapt values of attributes using specified adaptation functions.
+    This class allows for the adaptation of multiple attributes of a case by applying
+    one or more adaptation functions to each attribute. It supports different data structures
+    like mappings (dictionaries) and dataframes
+    Args:
+        attributes: A mapping of attribute names to either single adaptation functions or
+            sequences of adaptation functions that will be applied in order.
+        value_getter: Function to retrieve values from objects. Defaults to dictionary/attribute access.
+        value_setter: Function to set values on objects. Defaults to dictionary/attribute assignment.
+    Returns:
+        A new case with adapted attribute values.
+    Examples:
+        >>> func = attribute_value({
+        ...     "name": lambda x, y: x if x == y else y,
+        ...     "age": lambda x, y: x if x > y else y,
+        ... })
+        >>> result = func(
+        ...     {"name": "Alice", "age": 30},
+        ...     {"name": "Peter", "age": 25}
+        ... )
+        >>> result
+        {'name': 'Peter', 'age': 30}
+    """
+    attributes: Mapping[str, AdaptPairFunc[Any] | Sequence[AdaptPairFunc[Any]]]
+    value_getter: Callable[[Any, str], Any] = default_value_getter
+    value_setter: Callable[[Any, str, Any], None] = default_value_setter
+    @property
+    @override
+    def metadata(self) -> JsonDict:
+        return {
+            "attributes": {
+                key: get_metadata(value) for key, value in self.attributes.items()
+            },
+            "value_getter": get_metadata(self.value_getter),
+            "value_setter": get_metadata(self.value_setter),
+        }
+    @override
+    def __call__(self, case: V, query: V) -> V:
+        for attr_name in self.attributes:
+            adapt_funcs = self.attributes[attr_name]
+            if not isinstance(adapt_funcs, Sequence):
+                adapt_funcs = [adapt_funcs]
+            case_attr_value = self.value_getter(case, attr_name)
+            query_attr_value = self.value_getter(query, attr_name)
+            for adapt_func in adapt_funcs:
+                case_attr_value = adapt_func(case_attr_value, query_attr_value)
+            self.value_setter(case, attr_name, case_attr_value)
+        return case

cbrkit-0.20.0/src/cbrkit/adapt/generic.py ADDED Viewed

@@ -0,0 +1,86 @@
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Literal, override
+from ..helpers import get_metadata
+from ..typing import AdaptPairFunc, JsonDict, SupportsMetadata
+__all__ = [
+    "pipe",
+    "null",
+]
+@dataclass(slots=True, frozen=True)
+class pipe[V](AdaptPairFunc[V], SupportsMetadata):
+    """Chain multiple adaptation functions together.
+    Args:
+        functions: List of adaptation functions to apply in order.
+    Returns:
+        The adapted value.
+    Examples:
+        >>> func = pipe([
+        ...     lambda x, y: x + y,
+        ...     lambda x, y: x * y,
+        ... ])
+        >>> func(2, 3)
+        15
+    """
+    functions: list[AdaptPairFunc[V]]
+    @property
+    @override
+    def metadata(self) -> JsonDict:
+        return {
+            "functions": [get_metadata(func) for func in self.functions],
+        }
+    @override
+    def __call__(self, case: V, query: V) -> V:
+        current_case = case
+        for func in self.functions:
+            current_case = func(current_case, query)
+        return current_case
+@dataclass(slots=True, frozen=True)
+class null[V](AdaptPairFunc[V], SupportsMetadata):
+    """Perform a null adaptation and return the original case or query value.
+    Args:
+        select: Either "case" or "query".
+        copy: Whether to copy the value before returning it.
+    Returns:
+        The original case value.
+    Examples:
+        >>> func = null()
+        >>> func(2, 3)
+        2
+    """
+    target: Literal["case", "query"] = "case"
+    copy: bool = False
+    @override
+    def __call__(self, case: V, query: V) -> V:
+        value: V
+        if self.target == "case":
+            value = case
+        elif self.target == "query":
+            value = query
+        else:
+            raise ValueError(f"Invalid target value: {self.target}")
+        if self.copy:
+            value = deepcopy(value)
+        return value

cbrkit 0.19.2__tar.gz → 0.20.0__tar.gz

cbrkit 0.19.2tar.gz → 0.20.0tar.gz