cbrkit 1.2.0__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cbrkit-1.2.0 → cbrkit-1.4.0}/PKG-INFO +76 -47
- {cbrkit-1.2.0 → cbrkit-1.4.0}/README.md +58 -38
- {cbrkit-1.2.0 → cbrkit-1.4.0}/pyproject.toml +64 -48
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/api.py +2 -2
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/eval/common.py +12 -9
- cbrkit-1.4.0/src/cbrkit/filter.py +81 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/helpers.py +174 -157
- cbrkit-1.4.0/src/cbrkit/indexable/__init__.py +54 -0
- cbrkit-1.4.0/src/cbrkit/indexable/_common.py +262 -0
- cbrkit-1.4.0/src/cbrkit/indexable/chromadb.py +271 -0
- cbrkit-1.4.0/src/cbrkit/indexable/lancedb.py +290 -0
- cbrkit-1.4.0/src/cbrkit/indexable/pgvector.py +345 -0
- cbrkit-1.4.0/src/cbrkit/indexable/sqlalchemy.py +733 -0
- cbrkit-1.4.0/src/cbrkit/indexable/sqlite_vec.py +403 -0
- cbrkit-1.4.0/src/cbrkit/indexable/zvec.py +353 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/loaders.py +34 -7
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/retain/build.py +2 -2
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/retain/storage.py +10 -5
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/retrieval/__init__.py +16 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/apply.py +293 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/indexable/__init__.py +34 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/indexable/_common.py +472 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/indexable/bm25.py +178 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/indexable/chromadb.py +161 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/indexable/embed.py +247 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/indexable/lancedb.py +162 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/indexable/pgvector.py +248 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/indexable/sqlite_vec.py +294 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/indexable/zvec.py +205 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/rerank/__init__.py +18 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/rerank/_common.py +53 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/rerank/cohere.py +41 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/rerank/sentence_transformers.py +101 -0
- cbrkit-1.4.0/src/cbrkit/retrieval/rerank/voyageai.py +38 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/retrieval/wrappers.py +38 -6
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/reuse/build.py +3 -1
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/revise/build.py +4 -2
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/aggregator.py +4 -3
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/collections.py +3 -3
- cbrkit-1.4.0/src/cbrkit/sim/embed/__init__.py +66 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/core.py +327 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/metrics.py +158 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/__init__.py +43 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/bm25.py +181 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/cohere.py +45 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/ollama.py +39 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/openai.py +65 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/pydantic_ai.py +31 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/sentence_transformers.py +80 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/spacy.py +124 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/sparse_encoder.py +93 -0
- cbrkit-1.4.0/src/cbrkit/sim/embed/providers/voyageai.py +38 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/alignment.py +1 -1
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/wrappers.py +5 -5
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/apply.py +2 -1
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/anthropic.py +1 -1
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/cohere.py +3 -1
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/openai_completions.py +1 -1
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/openai_responses.py +1 -1
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/system.py +1 -1
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/typing.py +116 -6
- cbrkit-1.2.0/src/cbrkit/indexable.py +0 -717
- cbrkit-1.2.0/src/cbrkit/retrieval/apply.py +0 -164
- cbrkit-1.2.0/src/cbrkit/retrieval/indexable.py +0 -1050
- cbrkit-1.2.0/src/cbrkit/retrieval/rerank.py +0 -219
- cbrkit-1.2.0/src/cbrkit/sim/embed.py +0 -994
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/__main__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/adapt/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/adapt/attribute_value.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/adapt/generic.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/adapt/numbers.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/adapt/strings.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/cli.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/constants.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/cycle.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/dumpers.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/eval/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/eval/retrieval.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/model/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/model/graph.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/model/result.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/py.typed +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/retain/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/retain/apply.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/retrieval/build.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/reuse/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/reuse/apply.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/revise/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/revise/apply.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/attribute_value.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/generic.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/astar.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/brute_force.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/common.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/dfs.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/greedy.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/lap.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/qap.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/graphs/vf2.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/numbers.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/pooling.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/strings.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/sim/taxonomy.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/build.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/model.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/prompts.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/__init__.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/google.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/instructor.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/model.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/ollama.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/openai_agents.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/pydantic_ai.py +0 -0
- {cbrkit-1.2.0 → cbrkit-1.4.0}/src/cbrkit/synthesis/providers/wrappers.py +0 -0
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: cbrkit
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
|
|
5
5
|
Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
|
|
6
6
|
Author: Mirko Lenz
|
|
7
7
|
Author-email: Mirko Lenz <mirko@mirkolenz.com>
|
|
8
|
+
License-Expression: MIT
|
|
8
9
|
Classifier: Development Status :: 4 - Beta
|
|
9
10
|
Classifier: Environment :: Console
|
|
10
11
|
Classifier: Framework :: Pytest
|
|
11
12
|
Classifier: Intended Audience :: Developers
|
|
12
13
|
Classifier: Intended Audience :: Science/Research
|
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
14
14
|
Classifier: Natural Language :: English
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.13
|
|
@@ -30,7 +30,7 @@ Requires-Dist: pyyaml>=6,<7
|
|
|
30
30
|
Requires-Dist: rtoml>=0.12,<1
|
|
31
31
|
Requires-Dist: scipy>=1,<2
|
|
32
32
|
Requires-Dist: xmltodict>=1,<2
|
|
33
|
-
Requires-Dist: cbrkit[anthropic,api,bm25,chromadb,chunking,cohere,eval,google,graphs,graphviz,instructor,lancedb,levenshtein,nltk,ollama,openai,openai-agents,pandas,pydantic-ai,spacy,sql,timeseries,transformers,voyageai,zvec] ; extra == 'all'
|
|
33
|
+
Requires-Dist: cbrkit[anthropic,api,bm25,chromadb,chunking,cohere,eval,google,graphs,graphviz,instructor,lancedb,levenshtein,nltk,ollama,openai,openai-agents,pandas,pgvector,pydantic-ai,spacy,sql,sqlite-vec,timeseries,transformers,voyageai,zvec] ; extra == 'all'
|
|
34
34
|
Requires-Dist: anthropic>=0.40,<1 ; extra == 'anthropic'
|
|
35
35
|
Requires-Dist: cbrkit[cli] ; extra == 'api'
|
|
36
36
|
Requires-Dist: fastapi>=0.100,<1 ; extra == 'api'
|
|
@@ -41,11 +41,11 @@ Requires-Dist: fastmcp>=3,<4 ; extra == 'api'
|
|
|
41
41
|
Requires-Dist: bm25s[core,stem,indexing]>=0.3,<1 ; extra == 'bm25'
|
|
42
42
|
Requires-Dist: chromadb>=1,<2 ; extra == 'chromadb'
|
|
43
43
|
Requires-Dist: chonkie>=1,<2 ; extra == 'chunking'
|
|
44
|
-
Requires-Dist: rich>=
|
|
45
|
-
Requires-Dist: typer>=0.
|
|
46
|
-
Requires-Dist: cohere>=
|
|
44
|
+
Requires-Dist: rich>=14,<16 ; extra == 'cli'
|
|
45
|
+
Requires-Dist: typer>=0.20,<1 ; extra == 'cli'
|
|
46
|
+
Requires-Dist: cohere>=7,<8 ; extra == 'cohere'
|
|
47
47
|
Requires-Dist: ranx>=0.3,<1 ; extra == 'eval'
|
|
48
|
-
Requires-Dist: google-genai>=
|
|
48
|
+
Requires-Dist: google-genai>=2,<3 ; extra == 'google'
|
|
49
49
|
Requires-Dist: networkx>=3,<4 ; extra == 'graphs'
|
|
50
50
|
Requires-Dist: rustworkx>=0.15,<1 ; extra == 'graphs'
|
|
51
51
|
Requires-Dist: pygraphviz>=1,<2 ; extra == 'graphviz'
|
|
@@ -58,14 +58,20 @@ Requires-Dist: openai>=1,<3 ; extra == 'openai'
|
|
|
58
58
|
Requires-Dist: tiktoken>=0.8,<1 ; extra == 'openai'
|
|
59
59
|
Requires-Dist: openai-agents>=0.2,<1 ; extra == 'openai-agents'
|
|
60
60
|
Requires-Dist: pandas>=2,<4 ; extra == 'pandas'
|
|
61
|
+
Requires-Dist: pgvector>=0.4,<1 ; extra == 'pgvector'
|
|
62
|
+
Requires-Dist: cbrkit[sql] ; extra == 'pgvector'
|
|
61
63
|
Requires-Dist: pydantic-ai-slim>=1,<2 ; extra == 'pydantic-ai'
|
|
62
64
|
Requires-Dist: spacy>=3.8,<4 ; extra == 'spacy'
|
|
63
|
-
Requires-Dist: sqlalchemy>=2,<3 ; extra == 'sql'
|
|
65
|
+
Requires-Dist: sqlalchemy[asyncio]>=2,<3 ; extra == 'sql'
|
|
66
|
+
Requires-Dist: sqlite-vec>=0.1,<1 ; extra == 'sqlite-vec'
|
|
67
|
+
Requires-Dist: aiosqlite>=0.20,<1 ; extra == 'sqlite-vec'
|
|
68
|
+
Requires-Dist: cbrkit[sql] ; extra == 'sqlite-vec'
|
|
64
69
|
Requires-Dist: minineedle>=3,<4 ; extra == 'timeseries'
|
|
65
70
|
Requires-Dist: sentence-transformers>=4,<6 ; extra == 'transformers'
|
|
66
71
|
Requires-Dist: torch>=2.5,<3 ; extra == 'transformers'
|
|
67
72
|
Requires-Dist: transformers>=4,<6 ; extra == 'transformers'
|
|
68
73
|
Requires-Dist: voyageai>=0.3,<1 ; extra == 'voyageai'
|
|
74
|
+
Requires-Dist: zvec>=0.2,<1 ; extra == 'zvec'
|
|
69
75
|
Requires-Python: >=3.13, <4
|
|
70
76
|
Project-URL: Repository, https://github.com/wi2trier/cbrkit
|
|
71
77
|
Project-URL: Documentation, https://wi2trier.github.io/cbrkit/
|
|
@@ -91,12 +97,15 @@ Provides-Extra: ollama
|
|
|
91
97
|
Provides-Extra: openai
|
|
92
98
|
Provides-Extra: openai-agents
|
|
93
99
|
Provides-Extra: pandas
|
|
100
|
+
Provides-Extra: pgvector
|
|
94
101
|
Provides-Extra: pydantic-ai
|
|
95
102
|
Provides-Extra: spacy
|
|
96
103
|
Provides-Extra: sql
|
|
104
|
+
Provides-Extra: sqlite-vec
|
|
97
105
|
Provides-Extra: timeseries
|
|
98
106
|
Provides-Extra: transformers
|
|
99
107
|
Provides-Extra: voyageai
|
|
108
|
+
Provides-Extra: zvec
|
|
100
109
|
Description-Content-Type: text/markdown
|
|
101
110
|
|
|
102
111
|
<!-- markdownlint-disable MD033 MD041 -->
|
|
@@ -229,12 +238,14 @@ df = pl.read_csv("path/to/cases.csv")
|
|
|
229
238
|
casebase = cbrkit.loaders.polars(df)
|
|
230
239
|
```
|
|
231
240
|
|
|
232
|
-
For
|
|
241
|
+
For ad-hoc SQLite loading, CBRkit ships a stdlib-based loader:
|
|
233
242
|
|
|
234
243
|
```python
|
|
235
244
|
casebase = cbrkit.loaders.sqlite("path/to/database.db", "SELECT * FROM cases")
|
|
236
245
|
```
|
|
237
246
|
|
|
247
|
+
For richer relational backends (filters, upserts, vector/FTS search via pgvector on PostgreSQL or sqlite-vec on SQLite), see `cbrkit.indexable.sqlalchemy`, `cbrkit.indexable.pgvector`, and `cbrkit.indexable.sqlite_vec`.
|
|
248
|
+
|
|
238
249
|
**Tip:** You can validate a loaded casebase against a Pydantic model using `cbrkit.loaders.validate()`:
|
|
239
250
|
|
|
240
251
|
```python
|
|
@@ -680,8 +691,7 @@ The result contains `similarities` with quality assessment scores for each case.
|
|
|
680
691
|
## Retain
|
|
681
692
|
|
|
682
693
|
The retain phase decides whether and how to integrate new cases into the casebase.
|
|
683
|
-
|
|
684
|
-
You build a retain pipeline by specifying an assessment function and a storage function:
|
|
694
|
+
Build a retain pipeline from an assessment function and a storage function:
|
|
685
695
|
|
|
686
696
|
```python
|
|
687
697
|
retainer = cbrkit.retain.build(
|
|
@@ -693,27 +703,9 @@ retainer = cbrkit.retain.build(
|
|
|
693
703
|
)
|
|
694
704
|
```
|
|
695
705
|
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
- `indexable`: Keeps an `IndexableFunc`'s index in sync with the casebase.
|
|
700
|
-
|
|
701
|
-
You can filter retained cases based on their assessment scores using the `dropout` wrapper:
|
|
702
|
-
|
|
703
|
-
```python
|
|
704
|
-
retainer = cbrkit.retain.dropout(
|
|
705
|
-
retainer_func=cbrkit.retain.build(...),
|
|
706
|
-
min_similarity=0.5,
|
|
707
|
-
)
|
|
708
|
-
```
|
|
709
|
-
|
|
710
|
-
The retainer can be applied to a revise result:
|
|
711
|
-
|
|
712
|
-
```python
|
|
713
|
-
result = cbrkit.retain.apply_result(revise_result, retainer)
|
|
714
|
-
```
|
|
715
|
-
|
|
716
|
-
The result contains `similarities` with fitness scores and `casebase` with the updated cases.
|
|
706
|
+
The built-in storage functions are `static` (generates collision-free keys from a reference casebase) and `indexable` (keeps an `IndexableFunc`'s index in sync with the casebase).
|
|
707
|
+
Wrap a retainer with `dropout` to filter by assessment score (e.g. `min_similarity=0.5`), then apply it to a revise result via `cbrkit.retain.apply_result(revise_result, retainer)`.
|
|
708
|
+
The result exposes `similarities` (fitness scores) and `casebase` (updated cases).
|
|
717
709
|
|
|
718
710
|
## Full CBR Cycle
|
|
719
711
|
|
|
@@ -846,37 +838,74 @@ result = cbrkit.retrieval.apply_query(casebase, query, (retriever, reranker))
|
|
|
846
838
|
|
|
847
839
|
### Indexed Retrieval
|
|
848
840
|
|
|
849
|
-
|
|
850
|
-
|
|
841
|
+
Indexed retrieval pre-indexes the casebase once and then queries it without passing the full casebase each time, which helps for large casebases or external search backends.
|
|
842
|
+
Index maintenance lives on whichever object owns the index.
|
|
851
843
|
|
|
852
|
-
|
|
844
|
+
The self-contained `bm25` and `embed` retrievers own their index, so you call `put_index()` on the retriever:
|
|
853
845
|
|
|
854
846
|
```python
|
|
855
847
|
from frozendict import frozendict
|
|
856
848
|
|
|
857
|
-
|
|
858
|
-
retriever
|
|
859
|
-
retriever.create_index(frozendict(casebase))
|
|
849
|
+
retriever = cbrkit.retrieval.bm25(conversion_func=cbrkit.sim.embed.bm25(language="en"))
|
|
850
|
+
retriever.put_index(frozendict(casebase))
|
|
860
851
|
```
|
|
861
852
|
|
|
862
|
-
|
|
853
|
+
The storage-backed `lancedb`, `chromadb`, `zvec`, `pgvector`, and `sqlite_vec` retrievers are pure query paths over a separate `cbrkit.indexable` storage that owns the index, so you index on the storage and wrap it for querying:
|
|
863
854
|
|
|
864
855
|
```python
|
|
865
|
-
|
|
856
|
+
storage = cbrkit.indexable.lancedb(uri="./cases", table_name="cases")
|
|
857
|
+
storage.put_index(frozendict(casebase))
|
|
858
|
+
retriever = cbrkit.retrieval.lancedb(storage=storage, search_type="dense")
|
|
866
859
|
```
|
|
867
860
|
|
|
868
|
-
|
|
861
|
+
Query a pre-indexed retriever with `apply_query_indexed` / `apply_queries_indexed` (or pass an empty casebase `{}` to `apply_query`); querying an un-indexed retriever raises `ValueError`:
|
|
869
862
|
|
|
870
863
|
```python
|
|
871
864
|
result = cbrkit.retrieval.apply_query_indexed(query, retriever)
|
|
872
|
-
# or for multiple queries:
|
|
873
|
-
result = cbrkit.retrieval.apply_queries_indexed(queries, retriever)
|
|
874
865
|
```
|
|
875
866
|
|
|
876
|
-
|
|
867
|
+
The `System` class also defaults its casebase to `{}`, so a system of pre-indexed retrievers needs no casebase at query time.
|
|
868
|
+
|
|
869
|
+
#### Typed Values and the Retain Caveat
|
|
870
|
+
|
|
871
|
+
Each backend has one text-field knob — `value_column` (`value_field` for `zvec`/`chromadb`) — naming the embeddable text, and the value type `V` follows the schema source:
|
|
872
|
+
|
|
873
|
+
- **Plain text** (`V = str`, the default) — the bare string is stored under the text knob and read back as a string.
|
|
874
|
+
- **Typed model** (`V = YourModel`) — pass a `model`: a dataclass or Pydantic model for `lancedb`/`zvec`/`chromadb` (fields become columns), or a SQLAlchemy mapped class for `sqlalchemy`/`pgvector`/`sqlite_vec` (its `__table__` defines the schema). Reads reconstruct model instances.
|
|
875
|
+
- **Mapping** (`V = Mapping[str, Any]`) — `sqlalchemy`/`pgvector`/`sqlite_vec` only, via a host-supplied `table` or `reflect=True`.
|
|
876
|
+
|
|
877
|
+
```python
|
|
878
|
+
# plain strings — cbrkit builds the table
|
|
879
|
+
store = cbrkit.indexable.pgvector[str, str](
|
|
880
|
+
url=..., value_column="body", pgvector_dim=384, conversion_func=embed,
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
# typed rows — a SQLAlchemy mapped class defines the schema
|
|
884
|
+
class Car(Base):
|
|
885
|
+
__tablename__ = "cars"
|
|
886
|
+
key: Mapped[str] = mapped_column(primary_key=True)
|
|
887
|
+
desc: Mapped[str] = mapped_column()
|
|
888
|
+
_pgvec: Mapped[Any] = mapped_column(cbrkit.indexable.PGVECTOR(384), nullable=False)
|
|
889
|
+
|
|
890
|
+
store = cbrkit.indexable.pgvector[str, Car](url=..., model=Car, value_column="desc", ...)
|
|
891
|
+
```
|
|
892
|
+
|
|
893
|
+
Pass `vector_type="halfvec"` for half-precision storage (~2x smaller, negligible recall loss); for a typed model, declare the column with the re-exported `cbrkit.indexable.HALFVEC` instead of `PGVECTOR`.
|
|
894
|
+
|
|
895
|
+
For a self-contained, file-based store, `sqlite_vec` offers the same dense/sparse/hybrid API on SQLite via the [`sqlite-vec`](https://github.com/asg017/sqlite-vec) extension (loaded automatically). Dense KNN uses a `vec0` virtual table (so the backend inherits future `vec0` capabilities such as approximate search, and supports quantized `vector_type="int8"` storage today), sparse search uses built-in FTS5, and `Filter` `WHERE` clauses compose by joining matches back to the main table:
|
|
896
|
+
|
|
897
|
+
```python
|
|
898
|
+
store = cbrkit.indexable.sqlite_vec[str, str](
|
|
899
|
+
url="sqlite+aiosqlite:///cases.db",
|
|
900
|
+
value_column="body", vector_dim=384, index_type="hybrid", conversion_func=embed,
|
|
901
|
+
)
|
|
902
|
+
store.put_index(frozendict(casebase))
|
|
903
|
+
retriever = cbrkit.retrieval.indexable.sqlite_vec(storage=store, search_type="hybrid")
|
|
904
|
+
```
|
|
877
905
|
|
|
878
|
-
|
|
879
|
-
|
|
906
|
+
**Retain caveat:** the storage-backed retrievers search by the text column and always return `Casebase[K, str]`, projecting richer values down to their text.
|
|
907
|
+
A retrieve → retain round-trip via `cbrkit.retrieval.indexable.*` + `cbrkit.retain.indexable` therefore lines up cleanly only when `V = str`.
|
|
908
|
+
For model or mapping stores, either use the backend as a typed store (read `storage.index` as `Casebase[K, V]` and retrieve with a value-based retriever like `cbrkit.retrieval.build(...)`), or re-hydrate full rows by key from `storage.index` after text retrieval.
|
|
880
909
|
|
|
881
910
|
## Evaluation
|
|
882
911
|
|
|
@@ -128,12 +128,14 @@ df = pl.read_csv("path/to/cases.csv")
|
|
|
128
128
|
casebase = cbrkit.loaders.polars(df)
|
|
129
129
|
```
|
|
130
130
|
|
|
131
|
-
For
|
|
131
|
+
For ad-hoc SQLite loading, CBRkit ships a stdlib-based loader:
|
|
132
132
|
|
|
133
133
|
```python
|
|
134
134
|
casebase = cbrkit.loaders.sqlite("path/to/database.db", "SELECT * FROM cases")
|
|
135
135
|
```
|
|
136
136
|
|
|
137
|
+
For richer relational backends (filters, upserts, vector/FTS search via pgvector on PostgreSQL or sqlite-vec on SQLite), see `cbrkit.indexable.sqlalchemy`, `cbrkit.indexable.pgvector`, and `cbrkit.indexable.sqlite_vec`.
|
|
138
|
+
|
|
137
139
|
**Tip:** You can validate a loaded casebase against a Pydantic model using `cbrkit.loaders.validate()`:
|
|
138
140
|
|
|
139
141
|
```python
|
|
@@ -579,8 +581,7 @@ The result contains `similarities` with quality assessment scores for each case.
|
|
|
579
581
|
## Retain
|
|
580
582
|
|
|
581
583
|
The retain phase decides whether and how to integrate new cases into the casebase.
|
|
582
|
-
|
|
583
|
-
You build a retain pipeline by specifying an assessment function and a storage function:
|
|
584
|
+
Build a retain pipeline from an assessment function and a storage function:
|
|
584
585
|
|
|
585
586
|
```python
|
|
586
587
|
retainer = cbrkit.retain.build(
|
|
@@ -592,27 +593,9 @@ retainer = cbrkit.retain.build(
|
|
|
592
593
|
)
|
|
593
594
|
```
|
|
594
595
|
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
- `indexable`: Keeps an `IndexableFunc`'s index in sync with the casebase.
|
|
599
|
-
|
|
600
|
-
You can filter retained cases based on their assessment scores using the `dropout` wrapper:
|
|
601
|
-
|
|
602
|
-
```python
|
|
603
|
-
retainer = cbrkit.retain.dropout(
|
|
604
|
-
retainer_func=cbrkit.retain.build(...),
|
|
605
|
-
min_similarity=0.5,
|
|
606
|
-
)
|
|
607
|
-
```
|
|
608
|
-
|
|
609
|
-
The retainer can be applied to a revise result:
|
|
610
|
-
|
|
611
|
-
```python
|
|
612
|
-
result = cbrkit.retain.apply_result(revise_result, retainer)
|
|
613
|
-
```
|
|
614
|
-
|
|
615
|
-
The result contains `similarities` with fitness scores and `casebase` with the updated cases.
|
|
596
|
+
The built-in storage functions are `static` (generates collision-free keys from a reference casebase) and `indexable` (keeps an `IndexableFunc`'s index in sync with the casebase).
|
|
597
|
+
Wrap a retainer with `dropout` to filter by assessment score (e.g. `min_similarity=0.5`), then apply it to a revise result via `cbrkit.retain.apply_result(revise_result, retainer)`.
|
|
598
|
+
The result exposes `similarities` (fitness scores) and `casebase` (updated cases).
|
|
616
599
|
|
|
617
600
|
## Full CBR Cycle
|
|
618
601
|
|
|
@@ -745,37 +728,74 @@ result = cbrkit.retrieval.apply_query(casebase, query, (retriever, reranker))
|
|
|
745
728
|
|
|
746
729
|
### Indexed Retrieval
|
|
747
730
|
|
|
748
|
-
|
|
749
|
-
|
|
731
|
+
Indexed retrieval pre-indexes the casebase once and then queries it without passing the full casebase each time, which helps for large casebases or external search backends.
|
|
732
|
+
Index maintenance lives on whichever object owns the index.
|
|
750
733
|
|
|
751
|
-
|
|
734
|
+
The self-contained `bm25` and `embed` retrievers own their index, so you call `put_index()` on the retriever:
|
|
752
735
|
|
|
753
736
|
```python
|
|
754
737
|
from frozendict import frozendict
|
|
755
738
|
|
|
756
|
-
|
|
757
|
-
retriever
|
|
758
|
-
retriever.create_index(frozendict(casebase))
|
|
739
|
+
retriever = cbrkit.retrieval.bm25(conversion_func=cbrkit.sim.embed.bm25(language="en"))
|
|
740
|
+
retriever.put_index(frozendict(casebase))
|
|
759
741
|
```
|
|
760
742
|
|
|
761
|
-
|
|
743
|
+
The storage-backed `lancedb`, `chromadb`, `zvec`, `pgvector`, and `sqlite_vec` retrievers are pure query paths over a separate `cbrkit.indexable` storage that owns the index, so you index on the storage and wrap it for querying:
|
|
762
744
|
|
|
763
745
|
```python
|
|
764
|
-
|
|
746
|
+
storage = cbrkit.indexable.lancedb(uri="./cases", table_name="cases")
|
|
747
|
+
storage.put_index(frozendict(casebase))
|
|
748
|
+
retriever = cbrkit.retrieval.lancedb(storage=storage, search_type="dense")
|
|
765
749
|
```
|
|
766
750
|
|
|
767
|
-
|
|
751
|
+
Query a pre-indexed retriever with `apply_query_indexed` / `apply_queries_indexed` (or pass an empty casebase `{}` to `apply_query`); querying an un-indexed retriever raises `ValueError`:
|
|
768
752
|
|
|
769
753
|
```python
|
|
770
754
|
result = cbrkit.retrieval.apply_query_indexed(query, retriever)
|
|
771
|
-
# or for multiple queries:
|
|
772
|
-
result = cbrkit.retrieval.apply_queries_indexed(queries, retriever)
|
|
773
755
|
```
|
|
774
756
|
|
|
775
|
-
|
|
757
|
+
The `System` class also defaults its casebase to `{}`, so a system of pre-indexed retrievers needs no casebase at query time.
|
|
758
|
+
|
|
759
|
+
#### Typed Values and the Retain Caveat
|
|
760
|
+
|
|
761
|
+
Each backend has one text-field knob — `value_column` (`value_field` for `zvec`/`chromadb`) — naming the embeddable text, and the value type `V` follows the schema source:
|
|
762
|
+
|
|
763
|
+
- **Plain text** (`V = str`, the default) — the bare string is stored under the text knob and read back as a string.
|
|
764
|
+
- **Typed model** (`V = YourModel`) — pass a `model`: a dataclass or Pydantic model for `lancedb`/`zvec`/`chromadb` (fields become columns), or a SQLAlchemy mapped class for `sqlalchemy`/`pgvector`/`sqlite_vec` (its `__table__` defines the schema). Reads reconstruct model instances.
|
|
765
|
+
- **Mapping** (`V = Mapping[str, Any]`) — `sqlalchemy`/`pgvector`/`sqlite_vec` only, via a host-supplied `table` or `reflect=True`.
|
|
766
|
+
|
|
767
|
+
```python
|
|
768
|
+
# plain strings — cbrkit builds the table
|
|
769
|
+
store = cbrkit.indexable.pgvector[str, str](
|
|
770
|
+
url=..., value_column="body", pgvector_dim=384, conversion_func=embed,
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
# typed rows — a SQLAlchemy mapped class defines the schema
|
|
774
|
+
class Car(Base):
|
|
775
|
+
__tablename__ = "cars"
|
|
776
|
+
key: Mapped[str] = mapped_column(primary_key=True)
|
|
777
|
+
desc: Mapped[str] = mapped_column()
|
|
778
|
+
_pgvec: Mapped[Any] = mapped_column(cbrkit.indexable.PGVECTOR(384), nullable=False)
|
|
779
|
+
|
|
780
|
+
store = cbrkit.indexable.pgvector[str, Car](url=..., model=Car, value_column="desc", ...)
|
|
781
|
+
```
|
|
782
|
+
|
|
783
|
+
Pass `vector_type="halfvec"` for half-precision storage (~2x smaller, negligible recall loss); for a typed model, declare the column with the re-exported `cbrkit.indexable.HALFVEC` instead of `PGVECTOR`.
|
|
784
|
+
|
|
785
|
+
For a self-contained, file-based store, `sqlite_vec` offers the same dense/sparse/hybrid API on SQLite via the [`sqlite-vec`](https://github.com/asg017/sqlite-vec) extension (loaded automatically). Dense KNN uses a `vec0` virtual table (so the backend inherits future `vec0` capabilities such as approximate search, and supports quantized `vector_type="int8"` storage today), sparse search uses built-in FTS5, and `Filter` `WHERE` clauses compose by joining matches back to the main table:
|
|
786
|
+
|
|
787
|
+
```python
|
|
788
|
+
store = cbrkit.indexable.sqlite_vec[str, str](
|
|
789
|
+
url="sqlite+aiosqlite:///cases.db",
|
|
790
|
+
value_column="body", vector_dim=384, index_type="hybrid", conversion_func=embed,
|
|
791
|
+
)
|
|
792
|
+
store.put_index(frozendict(casebase))
|
|
793
|
+
retriever = cbrkit.retrieval.indexable.sqlite_vec(storage=store, search_type="hybrid")
|
|
794
|
+
```
|
|
776
795
|
|
|
777
|
-
|
|
778
|
-
|
|
796
|
+
**Retain caveat:** the storage-backed retrievers search by the text column and always return `Casebase[K, str]`, projecting richer values down to their text.
|
|
797
|
+
A retrieve → retain round-trip via `cbrkit.retrieval.indexable.*` + `cbrkit.retain.indexable` therefore lines up cleanly only when `V = str`.
|
|
798
|
+
For model or mapping stores, either use the backend as a typed store (read `storage.index` as `Casebase[K, V]` and retrieve with a value-based retriever like `cbrkit.retrieval.build(...)`), or re-hydrate full rows by key from `storage.index` after text retrieval.
|
|
779
799
|
|
|
780
800
|
## Evaluation
|
|
781
801
|
|
|
@@ -1,56 +1,56 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "cbrkit"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.4.0"
|
|
4
4
|
description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
|
|
5
5
|
authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
|
|
6
6
|
readme = "README.md"
|
|
7
|
+
license = "MIT"
|
|
7
8
|
keywords = [
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
9
|
+
"cbr",
|
|
10
|
+
"case-based reasoning",
|
|
11
|
+
"api",
|
|
12
|
+
"similarity",
|
|
13
|
+
"nlp",
|
|
14
|
+
"retrieval",
|
|
15
|
+
"cli",
|
|
16
|
+
"tool",
|
|
17
|
+
"library",
|
|
17
18
|
]
|
|
18
19
|
classifiers = [
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"Typing :: Typed",
|
|
20
|
+
"Development Status :: 4 - Beta",
|
|
21
|
+
"Environment :: Console",
|
|
22
|
+
"Framework :: Pytest",
|
|
23
|
+
"Intended Audience :: Developers",
|
|
24
|
+
"Intended Audience :: Science/Research",
|
|
25
|
+
"Natural Language :: English",
|
|
26
|
+
"Operating System :: OS Independent",
|
|
27
|
+
"Programming Language :: Python :: 3.13",
|
|
28
|
+
"Programming Language :: Python :: 3.14",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
31
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
"Topic :: Utilities",
|
|
34
|
+
"Typing :: Typed",
|
|
35
35
|
]
|
|
36
36
|
requires-python = ">=3.13,<4"
|
|
37
37
|
dependencies = [
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
38
|
+
"frozendict>=2,<3",
|
|
39
|
+
"numpy>=2,<3",
|
|
40
|
+
"orjson>=3,<4",
|
|
41
|
+
"polars>=1,<2",
|
|
42
|
+
"pydantic>=2,<3",
|
|
43
|
+
"pyyaml>=6,<7",
|
|
44
|
+
"rtoml>=0.12,<1",
|
|
45
|
+
"scipy>=1,<2",
|
|
46
|
+
"xmltodict>=1,<2",
|
|
47
47
|
]
|
|
48
48
|
|
|
49
49
|
[project.optional-dependencies]
|
|
50
50
|
# LLM providers
|
|
51
51
|
anthropic = ["anthropic>=0.40,<1"]
|
|
52
|
-
cohere = ["cohere>=
|
|
53
|
-
google = ["google-genai>=
|
|
52
|
+
cohere = ["cohere>=7,<8"]
|
|
53
|
+
google = ["google-genai>=2,<3"]
|
|
54
54
|
instructor = ["instructor>=1,<2"]
|
|
55
55
|
ollama = ["ollama>=0.3,<1"]
|
|
56
56
|
openai = ["openai>=1,<3", "tiktoken>=0.8,<1"]
|
|
@@ -76,26 +76,30 @@ graphviz = ["pygraphviz>=1,<2"]
|
|
|
76
76
|
chromadb = ["chromadb>=1,<2"]
|
|
77
77
|
lancedb = ["lancedb>=0.20,<1"]
|
|
78
78
|
pandas = ["pandas>=2,<4"]
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
pgvector = ["pgvector>=0.4,<1", "cbrkit[sql]"]
|
|
80
|
+
sql = ["sqlalchemy[asyncio]>=2,<3"]
|
|
81
|
+
sqlite-vec = ["sqlite-vec>=0.1,<1", "aiosqlite>=0.20,<1", "cbrkit[sql]"]
|
|
82
|
+
zvec = ["zvec>=0.2,<1"]
|
|
81
83
|
|
|
82
84
|
# Tools
|
|
83
|
-
cli = ["rich>=
|
|
85
|
+
cli = ["rich>=14,<16", "typer>=0.20,<1"]
|
|
84
86
|
eval = ["ranx>=0.3,<1"]
|
|
85
87
|
timeseries = ["minineedle>=3,<4"]
|
|
86
88
|
|
|
87
89
|
# Entry points
|
|
88
90
|
api = [
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
91
|
+
"cbrkit[cli]",
|
|
92
|
+
"fastapi>=0.100,<1",
|
|
93
|
+
"pydantic-settings>=2,<3",
|
|
94
|
+
"python-multipart>=0.0.15,<1",
|
|
95
|
+
"uvicorn[standard]>=0.30,<1",
|
|
96
|
+
"fastmcp>=3,<4",
|
|
95
97
|
]
|
|
96
98
|
|
|
97
99
|
# Bundle
|
|
98
|
-
all = [
|
|
100
|
+
all = [
|
|
101
|
+
"cbrkit[anthropic,api,bm25,chromadb,chunking,cohere,eval,google,graphs,graphviz,instructor,lancedb,levenshtein,nltk,ollama,openai,openai-agents,pandas,pgvector,pydantic-ai,spacy,sql,sqlite-vec,timeseries,transformers,voyageai,zvec]",
|
|
102
|
+
]
|
|
99
103
|
|
|
100
104
|
[project.urls]
|
|
101
105
|
Repository = "https://github.com/wi2trier/cbrkit"
|
|
@@ -117,11 +121,23 @@ build-backend = "uv_build"
|
|
|
117
121
|
|
|
118
122
|
[tool.pytest]
|
|
119
123
|
testpaths = ["src", "tests"]
|
|
120
|
-
addopts = [
|
|
124
|
+
addopts = [
|
|
125
|
+
"--cov=src/cbrkit",
|
|
126
|
+
"--cov-report=term-missing",
|
|
127
|
+
"--doctest-modules",
|
|
128
|
+
"--import-mode=importlib",
|
|
129
|
+
]
|
|
121
130
|
doctest_optionflags = ["NORMALIZE_WHITESPACE", "IGNORE_EXCEPTION_DETAIL", "ELLIPSIS"]
|
|
122
131
|
|
|
123
132
|
[tool.uv]
|
|
124
133
|
default-groups = ["dev", "test", "docs"]
|
|
125
134
|
|
|
135
|
+
[tool.uv.extra-build-dependencies]
|
|
136
|
+
pygraphviz = ["setuptools"]
|
|
137
|
+
cbor = ["setuptools"]
|
|
138
|
+
warc3-wet-clueweb09 = ["setuptools"]
|
|
139
|
+
zlib-state = ["setuptools"]
|
|
140
|
+
pystemmer = ["setuptools", "cython"]
|
|
141
|
+
|
|
126
142
|
[tool.ruff.lint.pydocstyle]
|
|
127
143
|
convention = "google"
|
|
@@ -189,7 +189,7 @@ def synthesize(
|
|
|
189
189
|
)
|
|
190
190
|
|
|
191
191
|
|
|
192
|
-
def openapi_generator():
|
|
192
|
+
def openapi_generator() -> dict[str, Any]:
|
|
193
193
|
"""Generate and cache the OpenAPI schema for the CBRKit API."""
|
|
194
194
|
if not app.openapi_schema:
|
|
195
195
|
app.openapi_schema = get_openapi(
|
|
@@ -203,4 +203,4 @@ def openapi_generator():
|
|
|
203
203
|
return app.openapi_schema
|
|
204
204
|
|
|
205
205
|
|
|
206
|
-
app.openapi = openapi_generator # type: ignore[assignment]
|
|
206
|
+
app.openapi = openapi_generator # type: ignore[assignment] # ty: ignore[invalid-assignment]
|
|
@@ -7,7 +7,7 @@ from typing import Literal, cast
|
|
|
7
7
|
from ..helpers import (
|
|
8
8
|
get_logger,
|
|
9
9
|
normalize_and_scale,
|
|
10
|
-
|
|
10
|
+
round_int,
|
|
11
11
|
sim_map2ranking,
|
|
12
12
|
unpack_float,
|
|
13
13
|
unpack_floats,
|
|
@@ -487,15 +487,18 @@ def generate_metrics(
|
|
|
487
487
|
>>> generate_metrics(["precision", "recall"], ks=5)
|
|
488
488
|
['precision@5', 'recall@5']
|
|
489
489
|
"""
|
|
490
|
-
if
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
490
|
+
ks_list: list[int | None] = [ks] if ks is None or isinstance(ks, int) else list(ks)
|
|
491
|
+
relevance_levels_list: list[int | None] = (
|
|
492
|
+
[relevance_levels]
|
|
493
|
+
if relevance_levels is None or isinstance(relevance_levels, int)
|
|
494
|
+
else list(relevance_levels)
|
|
495
|
+
)
|
|
495
496
|
|
|
496
497
|
return [
|
|
497
|
-
generate_metric(
|
|
498
|
-
for
|
|
498
|
+
generate_metric(metric, k, relevance_level)
|
|
499
|
+
for metric, k, relevance_level in itertools.product(
|
|
500
|
+
metrics, ks_list, relevance_levels_list
|
|
501
|
+
)
|
|
499
502
|
]
|
|
500
503
|
|
|
501
504
|
|
|
@@ -528,7 +531,7 @@ def similarities_to_qrels[Q, C](
|
|
|
528
531
|
|
|
529
532
|
return {
|
|
530
533
|
query: {
|
|
531
|
-
case:
|
|
534
|
+
case: round_int(
|
|
532
535
|
normalize_and_scale(sim, min_sim, max_sim, min_qrel, max_qrel),
|
|
533
536
|
round_mode,
|
|
534
537
|
)
|