cbrkit 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cbrkit-1.2.0 → cbrkit-1.3.0}/PKG-INFO +12 -10
- {cbrkit-1.2.0 → cbrkit-1.3.0}/README.md +4 -4
- {cbrkit-1.2.0 → cbrkit-1.3.0}/pyproject.toml +5 -5
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/api.py +2 -2
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/eval/common.py +10 -7
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/helpers.py +2 -2
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/indexable.py +301 -90
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retain/storage.py +10 -5
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/indexable.py +180 -41
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/wrappers.py +37 -5
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/aggregator.py +4 -3
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/embed.py +123 -29
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/wrappers.py +5 -5
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/anthropic.py +1 -1
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/cohere.py +3 -1
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/openai_completions.py +1 -1
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/openai_responses.py +1 -1
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/typing.py +12 -4
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/__main__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/attribute_value.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/generic.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/numbers.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/strings.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/cli.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/constants.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/cycle.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/dumpers.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/eval/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/eval/retrieval.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/loaders.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/model/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/model/graph.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/model/result.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/py.typed +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retain/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retain/apply.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retain/build.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/apply.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/build.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/rerank.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/reuse/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/reuse/apply.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/reuse/build.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/revise/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/revise/apply.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/revise/build.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/attribute_value.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/collections.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/generic.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/alignment.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/astar.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/brute_force.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/common.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/dfs.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/greedy.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/lap.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/qap.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/vf2.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/numbers.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/pooling.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/strings.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/taxonomy.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/apply.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/build.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/model.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/prompts.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/google.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/instructor.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/model.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/ollama.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/openai_agents.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/pydantic_ai.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/wrappers.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/system.py +0 -0
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: cbrkit
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
|
|
5
5
|
Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
|
|
6
6
|
Author: Mirko Lenz
|
|
7
7
|
Author-email: Mirko Lenz <mirko@mirkolenz.com>
|
|
8
|
+
License-Expression: MIT
|
|
8
9
|
Classifier: Development Status :: 4 - Beta
|
|
9
10
|
Classifier: Environment :: Console
|
|
10
11
|
Classifier: Framework :: Pytest
|
|
11
12
|
Classifier: Intended Audience :: Developers
|
|
12
13
|
Classifier: Intended Audience :: Science/Research
|
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
14
14
|
Classifier: Natural Language :: English
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.13
|
|
@@ -41,9 +41,9 @@ Requires-Dist: fastmcp>=3,<4 ; extra == 'api'
|
|
|
41
41
|
Requires-Dist: bm25s[core,stem,indexing]>=0.3,<1 ; extra == 'bm25'
|
|
42
42
|
Requires-Dist: chromadb>=1,<2 ; extra == 'chromadb'
|
|
43
43
|
Requires-Dist: chonkie>=1,<2 ; extra == 'chunking'
|
|
44
|
-
Requires-Dist: rich>=
|
|
45
|
-
Requires-Dist: typer>=0.
|
|
46
|
-
Requires-Dist: cohere>=
|
|
44
|
+
Requires-Dist: rich>=14,<16 ; extra == 'cli'
|
|
45
|
+
Requires-Dist: typer>=0.20,<1 ; extra == 'cli'
|
|
46
|
+
Requires-Dist: cohere>=6,<7 ; extra == 'cohere'
|
|
47
47
|
Requires-Dist: ranx>=0.3,<1 ; extra == 'eval'
|
|
48
48
|
Requires-Dist: google-genai>=1,<2 ; extra == 'google'
|
|
49
49
|
Requires-Dist: networkx>=3,<4 ; extra == 'graphs'
|
|
@@ -66,6 +66,7 @@ Requires-Dist: sentence-transformers>=4,<6 ; extra == 'transformers'
|
|
|
66
66
|
Requires-Dist: torch>=2.5,<3 ; extra == 'transformers'
|
|
67
67
|
Requires-Dist: transformers>=4,<6 ; extra == 'transformers'
|
|
68
68
|
Requires-Dist: voyageai>=0.3,<1 ; extra == 'voyageai'
|
|
69
|
+
Requires-Dist: zvec>=0.2,<1 ; extra == 'zvec'
|
|
69
70
|
Requires-Python: >=3.13, <4
|
|
70
71
|
Project-URL: Repository, https://github.com/wi2trier/cbrkit
|
|
71
72
|
Project-URL: Documentation, https://wi2trier.github.io/cbrkit/
|
|
@@ -97,6 +98,7 @@ Provides-Extra: sql
|
|
|
97
98
|
Provides-Extra: timeseries
|
|
98
99
|
Provides-Extra: transformers
|
|
99
100
|
Provides-Extra: voyageai
|
|
101
|
+
Provides-Extra: zvec
|
|
100
102
|
Description-Content-Type: text/markdown
|
|
101
103
|
|
|
102
104
|
<!-- markdownlint-disable MD033 MD041 -->
|
|
@@ -846,17 +848,17 @@ result = cbrkit.retrieval.apply_query(casebase, query, (retriever, reranker))
|
|
|
846
848
|
|
|
847
849
|
### Indexed Retrieval
|
|
848
850
|
|
|
849
|
-
|
|
851
|
+
Retrievers like `bm25`, `embed`, `lancedb`, `chromadb`, and `zvec` support **indexed retrieval**, where the casebase is pre-indexed once and then queried without passing the full casebase each time.
|
|
850
852
|
This is useful for large casebases or when using external search backends.
|
|
851
853
|
|
|
852
|
-
To use indexed retrieval, first create a retriever and call its `
|
|
854
|
+
To use indexed retrieval, first create a retriever and call its `put_index()` method:
|
|
853
855
|
|
|
854
856
|
```python
|
|
855
857
|
from frozendict import frozendict
|
|
856
858
|
|
|
857
859
|
bm25_func = cbrkit.sim.embed.bm25(language="en")
|
|
858
860
|
retriever = cbrkit.retrieval.bm25(conversion_func=bm25_func)
|
|
859
|
-
retriever.
|
|
861
|
+
retriever.put_index(frozendict(casebase))
|
|
860
862
|
```
|
|
861
863
|
|
|
862
864
|
Then pass an empty casebase (`{}`) to signal that the retriever should use its pre-indexed data:
|
|
@@ -873,7 +875,7 @@ result = cbrkit.retrieval.apply_query_indexed(query, retriever)
|
|
|
873
875
|
result = cbrkit.retrieval.apply_queries_indexed(queries, retriever)
|
|
874
876
|
```
|
|
875
877
|
|
|
876
|
-
If a retriever receives an empty casebase but has not been indexed yet, a `ValueError` is raised with a message to call `
|
|
878
|
+
If a retriever receives an empty casebase but has not been indexed yet, a `ValueError` is raised with a message to call `put_index()` first.
|
|
877
879
|
|
|
878
880
|
The `System` class also supports indexed retrieval by defaulting the casebase to an empty dict.
|
|
879
881
|
This allows creating a system where all retrievers are pre-indexed and no casebase needs to be provided at query time.
|
|
@@ -745,17 +745,17 @@ result = cbrkit.retrieval.apply_query(casebase, query, (retriever, reranker))
|
|
|
745
745
|
|
|
746
746
|
### Indexed Retrieval
|
|
747
747
|
|
|
748
|
-
|
|
748
|
+
Retrievers like `bm25`, `embed`, `lancedb`, `chromadb`, and `zvec` support **indexed retrieval**, where the casebase is pre-indexed once and then queried without passing the full casebase each time.
|
|
749
749
|
This is useful for large casebases or when using external search backends.
|
|
750
750
|
|
|
751
|
-
To use indexed retrieval, first create a retriever and call its `
|
|
751
|
+
To use indexed retrieval, first create a retriever and call its `put_index()` method:
|
|
752
752
|
|
|
753
753
|
```python
|
|
754
754
|
from frozendict import frozendict
|
|
755
755
|
|
|
756
756
|
bm25_func = cbrkit.sim.embed.bm25(language="en")
|
|
757
757
|
retriever = cbrkit.retrieval.bm25(conversion_func=bm25_func)
|
|
758
|
-
retriever.
|
|
758
|
+
retriever.put_index(frozendict(casebase))
|
|
759
759
|
```
|
|
760
760
|
|
|
761
761
|
Then pass an empty casebase (`{}`) to signal that the retriever should use its pre-indexed data:
|
|
@@ -772,7 +772,7 @@ result = cbrkit.retrieval.apply_query_indexed(query, retriever)
|
|
|
772
772
|
result = cbrkit.retrieval.apply_queries_indexed(queries, retriever)
|
|
773
773
|
```
|
|
774
774
|
|
|
775
|
-
If a retriever receives an empty casebase but has not been indexed yet, a `ValueError` is raised with a message to call `
|
|
775
|
+
If a retriever receives an empty casebase but has not been indexed yet, a `ValueError` is raised with a message to call `put_index()` first.
|
|
776
776
|
|
|
777
777
|
The `System` class also supports indexed retrieval by defaulting the casebase to an empty dict.
|
|
778
778
|
This allows creating a system where all retrievers are pre-indexed and no casebase needs to be provided at query time.
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "cbrkit"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.3.0"
|
|
4
4
|
description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
|
|
5
5
|
authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
|
|
6
6
|
readme = "README.md"
|
|
7
|
+
license = "MIT"
|
|
7
8
|
keywords = [
|
|
8
9
|
"cbr",
|
|
9
10
|
"case-based reasoning",
|
|
@@ -21,7 +22,6 @@ classifiers = [
|
|
|
21
22
|
"Framework :: Pytest",
|
|
22
23
|
"Intended Audience :: Developers",
|
|
23
24
|
"Intended Audience :: Science/Research",
|
|
24
|
-
"License :: OSI Approved :: MIT License",
|
|
25
25
|
"Natural Language :: English",
|
|
26
26
|
"Operating System :: OS Independent",
|
|
27
27
|
"Programming Language :: Python :: 3.13",
|
|
@@ -49,7 +49,7 @@ dependencies = [
|
|
|
49
49
|
[project.optional-dependencies]
|
|
50
50
|
# LLM providers
|
|
51
51
|
anthropic = ["anthropic>=0.40,<1"]
|
|
52
|
-
cohere = ["cohere>=
|
|
52
|
+
cohere = ["cohere>=6,<7"]
|
|
53
53
|
google = ["google-genai>=1,<2"]
|
|
54
54
|
instructor = ["instructor>=1,<2"]
|
|
55
55
|
ollama = ["ollama>=0.3,<1"]
|
|
@@ -77,10 +77,10 @@ chromadb = ["chromadb>=1,<2"]
|
|
|
77
77
|
lancedb = ["lancedb>=0.20,<1"]
|
|
78
78
|
pandas = ["pandas>=2,<4"]
|
|
79
79
|
sql = ["sqlalchemy>=2,<3"]
|
|
80
|
-
|
|
80
|
+
zvec = ["zvec>=0.2,<1"]
|
|
81
81
|
|
|
82
82
|
# Tools
|
|
83
|
-
cli = ["rich>=
|
|
83
|
+
cli = ["rich>=14,<16", "typer>=0.20,<1"]
|
|
84
84
|
eval = ["ranx>=0.3,<1"]
|
|
85
85
|
timeseries = ["minineedle>=3,<4"]
|
|
86
86
|
|
|
@@ -189,7 +189,7 @@ def synthesize(
|
|
|
189
189
|
)
|
|
190
190
|
|
|
191
191
|
|
|
192
|
-
def openapi_generator():
|
|
192
|
+
def openapi_generator() -> dict[str, Any]:
|
|
193
193
|
"""Generate and cache the OpenAPI schema for the CBRKit API."""
|
|
194
194
|
if not app.openapi_schema:
|
|
195
195
|
app.openapi_schema = get_openapi(
|
|
@@ -203,4 +203,4 @@ def openapi_generator():
|
|
|
203
203
|
return app.openapi_schema
|
|
204
204
|
|
|
205
205
|
|
|
206
|
-
app.openapi = openapi_generator # type: ignore[assignment]
|
|
206
|
+
app.openapi = openapi_generator # type: ignore[assignment] # ty: ignore[invalid-assignment]
|
|
@@ -487,15 +487,18 @@ def generate_metrics(
|
|
|
487
487
|
>>> generate_metrics(["precision", "recall"], ks=5)
|
|
488
488
|
['precision@5', 'recall@5']
|
|
489
489
|
"""
|
|
490
|
-
if
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
490
|
+
ks_list: list[int | None] = [ks] if ks is None or isinstance(ks, int) else list(ks)
|
|
491
|
+
relevance_levels_list: list[int | None] = (
|
|
492
|
+
[relevance_levels]
|
|
493
|
+
if relevance_levels is None or isinstance(relevance_levels, int)
|
|
494
|
+
else list(relevance_levels)
|
|
495
|
+
)
|
|
495
496
|
|
|
496
497
|
return [
|
|
497
|
-
generate_metric(
|
|
498
|
-
for
|
|
498
|
+
generate_metric(metric, k, relevance_level)
|
|
499
|
+
for metric, k, relevance_level in itertools.product(
|
|
500
|
+
metrics, ks_list, relevance_levels_list
|
|
501
|
+
)
|
|
499
502
|
]
|
|
500
503
|
|
|
501
504
|
|
|
@@ -244,7 +244,7 @@ def singleton[T](x: Mapping[Any, T] | Collection[T]) -> T:
|
|
|
244
244
|
if isinstance(x, Mapping):
|
|
245
245
|
return cast(T, next(iter(x.values())))
|
|
246
246
|
elif isinstance(x, Collection):
|
|
247
|
-
return
|
|
247
|
+
return next(iter(x))
|
|
248
248
|
|
|
249
249
|
raise TypeError(f"Expected a Mapping or Collection, but got {type(x)}")
|
|
250
250
|
|
|
@@ -390,7 +390,7 @@ def is_factory[T](obj: MaybeFactory[T]) -> TypeIs[Factory[T]]:
|
|
|
390
390
|
def produce_factory[T](obj: MaybeFactory[T]) -> T:
|
|
391
391
|
"""Resolve a factory by calling it, or return the value as-is."""
|
|
392
392
|
if is_factory(obj):
|
|
393
|
-
return obj()
|
|
393
|
+
return cast(T, obj())
|
|
394
394
|
|
|
395
395
|
return cast(T, obj)
|
|
396
396
|
|