cbrkit 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {cbrkit-1.2.0 → cbrkit-1.3.0}/PKG-INFO +12 -10
  2. {cbrkit-1.2.0 → cbrkit-1.3.0}/README.md +4 -4
  3. {cbrkit-1.2.0 → cbrkit-1.3.0}/pyproject.toml +5 -5
  4. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/api.py +2 -2
  5. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/eval/common.py +10 -7
  6. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/helpers.py +2 -2
  7. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/indexable.py +301 -90
  8. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retain/storage.py +10 -5
  9. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/indexable.py +180 -41
  10. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/wrappers.py +37 -5
  11. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/aggregator.py +4 -3
  12. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/embed.py +123 -29
  13. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/wrappers.py +5 -5
  14. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/anthropic.py +1 -1
  15. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/cohere.py +3 -1
  16. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/openai_completions.py +1 -1
  17. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/openai_responses.py +1 -1
  18. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/typing.py +12 -4
  19. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/__init__.py +0 -0
  20. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/__main__.py +0 -0
  21. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/__init__.py +0 -0
  22. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/attribute_value.py +0 -0
  23. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/generic.py +0 -0
  24. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/numbers.py +0 -0
  25. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/adapt/strings.py +0 -0
  26. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/cli.py +0 -0
  27. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/constants.py +0 -0
  28. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/cycle.py +0 -0
  29. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/dumpers.py +0 -0
  30. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/eval/__init__.py +0 -0
  31. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/eval/retrieval.py +0 -0
  32. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/loaders.py +0 -0
  33. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/model/__init__.py +0 -0
  34. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/model/graph.py +0 -0
  35. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/model/result.py +0 -0
  36. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/py.typed +0 -0
  37. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retain/__init__.py +0 -0
  38. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retain/apply.py +0 -0
  39. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retain/build.py +0 -0
  40. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/__init__.py +0 -0
  41. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/apply.py +0 -0
  42. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/build.py +0 -0
  43. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/retrieval/rerank.py +0 -0
  44. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/reuse/__init__.py +0 -0
  45. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/reuse/apply.py +0 -0
  46. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/reuse/build.py +0 -0
  47. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/revise/__init__.py +0 -0
  48. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/revise/apply.py +0 -0
  49. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/revise/build.py +0 -0
  50. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/__init__.py +0 -0
  51. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/attribute_value.py +0 -0
  52. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/collections.py +0 -0
  53. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/generic.py +0 -0
  54. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/__init__.py +0 -0
  55. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/alignment.py +0 -0
  56. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/astar.py +0 -0
  57. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/brute_force.py +0 -0
  58. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/common.py +0 -0
  59. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/dfs.py +0 -0
  60. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/greedy.py +0 -0
  61. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/lap.py +0 -0
  62. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/qap.py +0 -0
  63. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/graphs/vf2.py +0 -0
  64. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/numbers.py +0 -0
  65. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/pooling.py +0 -0
  66. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/strings.py +0 -0
  67. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/sim/taxonomy.py +0 -0
  68. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/__init__.py +0 -0
  69. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/apply.py +0 -0
  70. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/build.py +0 -0
  71. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/model.py +0 -0
  72. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/prompts.py +0 -0
  73. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/__init__.py +0 -0
  74. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/google.py +0 -0
  75. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/instructor.py +0 -0
  76. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/model.py +0 -0
  77. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/ollama.py +0 -0
  78. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/openai_agents.py +0 -0
  79. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/pydantic_ai.py +0 -0
  80. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/synthesis/providers/wrappers.py +0 -0
  81. {cbrkit-1.2.0 → cbrkit-1.3.0}/src/cbrkit/system.py +0 -0
@@ -1,16 +1,16 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: cbrkit
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
5
5
  Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
6
6
  Author: Mirko Lenz
7
7
  Author-email: Mirko Lenz <mirko@mirkolenz.com>
8
+ License-Expression: MIT
8
9
  Classifier: Development Status :: 4 - Beta
9
10
  Classifier: Environment :: Console
10
11
  Classifier: Framework :: Pytest
11
12
  Classifier: Intended Audience :: Developers
12
13
  Classifier: Intended Audience :: Science/Research
13
- Classifier: License :: OSI Approved :: MIT License
14
14
  Classifier: Natural Language :: English
15
15
  Classifier: Operating System :: OS Independent
16
16
  Classifier: Programming Language :: Python :: 3.13
@@ -41,9 +41,9 @@ Requires-Dist: fastmcp>=3,<4 ; extra == 'api'
41
41
  Requires-Dist: bm25s[core,stem,indexing]>=0.3,<1 ; extra == 'bm25'
42
42
  Requires-Dist: chromadb>=1,<2 ; extra == 'chromadb'
43
43
  Requires-Dist: chonkie>=1,<2 ; extra == 'chunking'
44
- Requires-Dist: rich>=13,<15 ; extra == 'cli'
45
- Requires-Dist: typer>=0.9,<1 ; extra == 'cli'
46
- Requires-Dist: cohere>=5,<6 ; extra == 'cohere'
44
+ Requires-Dist: rich>=14,<16 ; extra == 'cli'
45
+ Requires-Dist: typer>=0.20,<1 ; extra == 'cli'
46
+ Requires-Dist: cohere>=6,<7 ; extra == 'cohere'
47
47
  Requires-Dist: ranx>=0.3,<1 ; extra == 'eval'
48
48
  Requires-Dist: google-genai>=1,<2 ; extra == 'google'
49
49
  Requires-Dist: networkx>=3,<4 ; extra == 'graphs'
@@ -66,6 +66,7 @@ Requires-Dist: sentence-transformers>=4,<6 ; extra == 'transformers'
66
66
  Requires-Dist: torch>=2.5,<3 ; extra == 'transformers'
67
67
  Requires-Dist: transformers>=4,<6 ; extra == 'transformers'
68
68
  Requires-Dist: voyageai>=0.3,<1 ; extra == 'voyageai'
69
+ Requires-Dist: zvec>=0.2,<1 ; extra == 'zvec'
69
70
  Requires-Python: >=3.13, <4
70
71
  Project-URL: Repository, https://github.com/wi2trier/cbrkit
71
72
  Project-URL: Documentation, https://wi2trier.github.io/cbrkit/
@@ -97,6 +98,7 @@ Provides-Extra: sql
97
98
  Provides-Extra: timeseries
98
99
  Provides-Extra: transformers
99
100
  Provides-Extra: voyageai
101
+ Provides-Extra: zvec
100
102
  Description-Content-Type: text/markdown
101
103
 
102
104
  <!-- markdownlint-disable MD033 MD041 -->
@@ -846,17 +848,17 @@ result = cbrkit.retrieval.apply_query(casebase, query, (retriever, reranker))
846
848
 
847
849
  ### Indexed Retrieval
848
850
 
849
- Some retrievers like `bm25`, `embed`, and `lancedb` support **indexed retrieval**, where the casebase is pre-indexed once and then queried without passing the full casebase each time.
851
+ Retrievers like `bm25`, `embed`, `lancedb`, `chromadb`, and `zvec` support **indexed retrieval**, where the casebase is pre-indexed once and then queried without passing the full casebase each time.
850
852
  This is useful for large casebases or when using external search backends.
851
853
 
852
- To use indexed retrieval, first create a retriever and call its `index()` method:
854
+ To use indexed retrieval, first create a retriever and call its `put_index()` method:
853
855
 
854
856
  ```python
855
857
  from frozendict import frozendict
856
858
 
857
859
  bm25_func = cbrkit.sim.embed.bm25(language="en")
858
860
  retriever = cbrkit.retrieval.bm25(conversion_func=bm25_func)
859
- retriever.create_index(frozendict(casebase))
861
+ retriever.put_index(frozendict(casebase))
860
862
  ```
861
863
 
862
864
  Then pass an empty casebase (`{}`) to signal that the retriever should use its pre-indexed data:
@@ -873,7 +875,7 @@ result = cbrkit.retrieval.apply_query_indexed(query, retriever)
873
875
  result = cbrkit.retrieval.apply_queries_indexed(queries, retriever)
874
876
  ```
875
877
 
876
- If a retriever receives an empty casebase but has not been indexed yet, a `ValueError` is raised with a message to call `index()` first.
878
+ If a retriever receives an empty casebase but has not been indexed yet, a `ValueError` is raised with a message to call `put_index()` first.
877
879
 
878
880
  The `System` class also supports indexed retrieval by defaulting the casebase to an empty dict.
879
881
  This allows creating a system where all retrievers are pre-indexed and no casebase needs to be provided at query time.
@@ -745,17 +745,17 @@ result = cbrkit.retrieval.apply_query(casebase, query, (retriever, reranker))
745
745
 
746
746
  ### Indexed Retrieval
747
747
 
748
- Some retrievers like `bm25`, `embed`, and `lancedb` support **indexed retrieval**, where the casebase is pre-indexed once and then queried without passing the full casebase each time.
748
+ Retrievers like `bm25`, `embed`, `lancedb`, `chromadb`, and `zvec` support **indexed retrieval**, where the casebase is pre-indexed once and then queried without passing the full casebase each time.
749
749
  This is useful for large casebases or when using external search backends.
750
750
 
751
- To use indexed retrieval, first create a retriever and call its `index()` method:
751
+ To use indexed retrieval, first create a retriever and call its `put_index()` method:
752
752
 
753
753
  ```python
754
754
  from frozendict import frozendict
755
755
 
756
756
  bm25_func = cbrkit.sim.embed.bm25(language="en")
757
757
  retriever = cbrkit.retrieval.bm25(conversion_func=bm25_func)
758
- retriever.create_index(frozendict(casebase))
758
+ retriever.put_index(frozendict(casebase))
759
759
  ```
760
760
 
761
761
  Then pass an empty casebase (`{}`) to signal that the retriever should use its pre-indexed data:
@@ -772,7 +772,7 @@ result = cbrkit.retrieval.apply_query_indexed(query, retriever)
772
772
  result = cbrkit.retrieval.apply_queries_indexed(queries, retriever)
773
773
  ```
774
774
 
775
- If a retriever receives an empty casebase but has not been indexed yet, a `ValueError` is raised with a message to call `index()` first.
775
+ If a retriever receives an empty casebase but has not been indexed yet, a `ValueError` is raised with a message to call `put_index()` first.
776
776
 
777
777
  The `System` class also supports indexed retrieval by defaulting the casebase to an empty dict.
778
778
  This allows creating a system where all retrievers are pre-indexed and no casebase needs to be provided at query time.
@@ -1,9 +1,10 @@
1
1
  [project]
2
2
  name = "cbrkit"
3
- version = "1.2.0"
3
+ version = "1.3.0"
4
4
  description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
5
5
  authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
6
6
  readme = "README.md"
7
+ license = "MIT"
7
8
  keywords = [
8
9
  "cbr",
9
10
  "case-based reasoning",
@@ -21,7 +22,6 @@ classifiers = [
21
22
  "Framework :: Pytest",
22
23
  "Intended Audience :: Developers",
23
24
  "Intended Audience :: Science/Research",
24
- "License :: OSI Approved :: MIT License",
25
25
  "Natural Language :: English",
26
26
  "Operating System :: OS Independent",
27
27
  "Programming Language :: Python :: 3.13",
@@ -49,7 +49,7 @@ dependencies = [
49
49
  [project.optional-dependencies]
50
50
  # LLM providers
51
51
  anthropic = ["anthropic>=0.40,<1"]
52
- cohere = ["cohere>=5,<6"]
52
+ cohere = ["cohere>=6,<7"]
53
53
  google = ["google-genai>=1,<2"]
54
54
  instructor = ["instructor>=1,<2"]
55
55
  ollama = ["ollama>=0.3,<1"]
@@ -77,10 +77,10 @@ chromadb = ["chromadb>=1,<2"]
77
77
  lancedb = ["lancedb>=0.20,<1"]
78
78
  pandas = ["pandas>=2,<4"]
79
79
  sql = ["sqlalchemy>=2,<3"]
80
- # zvec = ["zvec>=0.2,<1"]
80
+ zvec = ["zvec>=0.2,<1"]
81
81
 
82
82
  # Tools
83
- cli = ["rich>=13,<15", "typer>=0.9,<1"]
83
+ cli = ["rich>=14,<16", "typer>=0.20,<1"]
84
84
  eval = ["ranx>=0.3,<1"]
85
85
  timeseries = ["minineedle>=3,<4"]
86
86
 
@@ -189,7 +189,7 @@ def synthesize(
189
189
  )
190
190
 
191
191
 
192
- def openapi_generator():
192
+ def openapi_generator() -> dict[str, Any]:
193
193
  """Generate and cache the OpenAPI schema for the CBRKit API."""
194
194
  if not app.openapi_schema:
195
195
  app.openapi_schema = get_openapi(
@@ -203,4 +203,4 @@ def openapi_generator():
203
203
  return app.openapi_schema
204
204
 
205
205
 
206
- app.openapi = openapi_generator # type: ignore[assignment]
206
+ app.openapi = openapi_generator # type: ignore[assignment] # ty: ignore[invalid-assignment]
@@ -487,15 +487,18 @@ def generate_metrics(
487
487
  >>> generate_metrics(["precision", "recall"], ks=5)
488
488
  ['precision@5', 'recall@5']
489
489
  """
490
- if not isinstance(ks, Iterable):
491
- ks = [ks]
492
-
493
- if not isinstance(relevance_levels, Iterable):
494
- relevance_levels = [relevance_levels]
490
+ ks_list: list[int | None] = [ks] if ks is None or isinstance(ks, int) else list(ks)
491
+ relevance_levels_list: list[int | None] = (
492
+ [relevance_levels]
493
+ if relevance_levels is None or isinstance(relevance_levels, int)
494
+ else list(relevance_levels)
495
+ )
495
496
 
496
497
  return [
497
- generate_metric(*args)
498
- for args in itertools.product(metrics, ks, relevance_levels)
498
+ generate_metric(metric, k, relevance_level)
499
+ for metric, k, relevance_level in itertools.product(
500
+ metrics, ks_list, relevance_levels_list
501
+ )
499
502
  ]
500
503
 
501
504
 
@@ -244,7 +244,7 @@ def singleton[T](x: Mapping[Any, T] | Collection[T]) -> T:
244
244
  if isinstance(x, Mapping):
245
245
  return cast(T, next(iter(x.values())))
246
246
  elif isinstance(x, Collection):
247
- return cast(T, next(iter(x)))
247
+ return next(iter(x))
248
248
 
249
249
  raise TypeError(f"Expected a Mapping or Collection, but got {type(x)}")
250
250
 
@@ -390,7 +390,7 @@ def is_factory[T](obj: MaybeFactory[T]) -> TypeIs[Factory[T]]:
390
390
  def produce_factory[T](obj: MaybeFactory[T]) -> T:
391
391
  """Resolve a factory by calling it, or return the value as-is."""
392
392
  if is_factory(obj):
393
- return obj()
393
+ return cast(T, obj())
394
394
 
395
395
  return cast(T, obj)
396
396