retrievalbase 2.1.3__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/CHANGELOG.md +7 -0
  2. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/PKG-INFO +1 -1
  3. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/pyproject.toml +1 -1
  4. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/connector/__init__.py +4 -0
  5. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/connector/minio.py +10 -0
  6. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/connector/parquet.py +4 -0
  7. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/fixtures/components.py +3 -0
  8. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_connector/test_connectors.py +47 -0
  9. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_base_contracts.py +5 -0
  10. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/uv.lock +1 -1
  11. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/.gitignore +0 -0
  12. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/.gitlab-ci.yml +0 -0
  13. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/.pre-commit-config.yaml +0 -0
  14. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/.releaserc.json +0 -0
  15. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/AGENTS.md +0 -0
  16. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/Makefile +0 -0
  17. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/README.md +0 -0
  18. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/codecov.yml +0 -0
  19. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/commitlint.config.cjs +0 -0
  20. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/__init__.py +0 -0
  21. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/connector/settings.py +0 -0
  22. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/constants.py +0 -0
  23. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/dataset/__init__.py +0 -0
  24. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/dataset/hf.py +0 -0
  25. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/dataset/mixins.py +0 -0
  26. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/dataset/polars.py +0 -0
  27. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/dataset/preprocess/__init__.py +0 -0
  28. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/dataset/preprocess/preprocess.py +0 -0
  29. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/dataset/preprocess/token_counter.py +0 -0
  30. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/dataset/settings.py +0 -0
  31. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/enums.py +0 -0
  32. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/__init__.py +0 -0
  33. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/async_batcher.py +0 -0
  34. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/embedders.py +0 -0
  35. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/evaluators/__init__.py +0 -0
  36. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/evaluators/python/__init__.py +0 -0
  37. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/evaluators/python/evaluators.py +0 -0
  38. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/evaluators/python/scores.py +0 -0
  39. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/processors.py +0 -0
  40. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/rerankers.py +0 -0
  41. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/retrievers/__init__.py +0 -0
  42. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/retrievers/dense/__init__.py +0 -0
  43. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/retrievers/dense/retrievers.py +0 -0
  44. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/settings.py +0 -0
  45. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/evaluation/vector_stores.py +0 -0
  46. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/exceptions.py +0 -0
  47. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/ingestion/__init__.py +0 -0
  48. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/ingestion/settings.py +0 -0
  49. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/mixins.py +0 -0
  50. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/py.typed +0 -0
  51. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/settings.py +0 -0
  52. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/types.py +0 -0
  53. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/src/retrievalbase/utils.py +0 -0
  54. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/__init__.py +0 -0
  55. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/conftest.py +0 -0
  56. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/fixtures/__init__.py +0 -0
  57. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/fixtures/data.py +0 -0
  58. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/integration/__init__.py +0 -0
  59. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/integration/test_dataset/__init__.py +0 -0
  60. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/integration/test_dataset/test_huggingface_adapter.py +0 -0
  61. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/integration/test_evaluation/__init__.py +0 -0
  62. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/integration/test_evaluation/conftest.py +0 -0
  63. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/integration/test_evaluation/test_python_evaluator.py +0 -0
  64. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/__init__.py +0 -0
  65. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_config/__init__.py +0 -0
  66. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_config/test_mixins.py +0 -0
  67. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_config/test_settings.py +0 -0
  68. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_connector/__init__.py +0 -0
  69. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_dataset/__init__.py +0 -0
  70. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_dataset/conftest.py +0 -0
  71. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_dataset/test_dataset_base_contracts.py +0 -0
  72. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_dataset/test_dataset_mixins_more.py +0 -0
  73. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_dataset/test_polars_dataset.py +0 -0
  74. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_dataset/test_polars_lazy_paths.py +0 -0
  75. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_dataset/test_preprocess_filters.py +0 -0
  76. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_dataset/test_token_counter_hf.py +0 -0
  77. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_dataset/test_token_counters.py +0 -0
  78. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/__init__.py +0 -0
  79. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/conftest.py +0 -0
  80. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_async_batcher.py +0 -0
  81. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_bm25_retriever.py +0 -0
  82. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_dense_retriever.py +0 -0
  83. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_embedders.py +0 -0
  84. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_hf_reranker.py +0 -0
  85. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_hybrid_retriever.py +0 -0
  86. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_hybrid_retriever_runtime.py +0 -0
  87. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_processors.py +0 -0
  88. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_python_evaluator_classes.py +0 -0
  89. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_python_evaluator_runtime.py +0 -0
  90. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_rerankers.py +0 -0
  91. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_retriever_base.py +0 -0
  92. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_scores.py +0 -0
  93. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_evaluation/test_vector_stores.py +0 -0
  94. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_ingestion/__init__.py +0 -0
  95. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_ingestion/test_text_ingestion_pipeline.py +0 -0
  96. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_utils/__init__.py +0 -0
  97. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_utils/test_utils.py +0 -0
  98. {retrievalbase-2.1.3 → retrievalbase-2.2.0}/tests/unit/test_utils/test_utils_connectors.py +0 -0
@@ -1,3 +1,10 @@
1
+ # [2.2.0](https://gitlab.com/efysent/agentic-core/retrievalbase/compare/v2.1.3...v2.2.0) (2026-05-22)
2
+
3
+
4
+ ### Features
5
+
6
+ * **connector:** add target existence checks ([80e739f](https://gitlab.com/efysent/agentic-core/retrievalbase/commit/80e739fddeca65a66d326c1bb673e1ff6a674ba6))
7
+
1
8
  ## [2.1.3](https://gitlab.com/efysent/agentic-core/retrievalbase/compare/v2.1.2...v2.1.3) (2026-05-19)
2
9
 
3
10
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retrievalbase
3
- Version: 2.1.3
3
+ Version: 2.2.0
4
4
  Author-email: jalal <jalalkhaldi3@gmail.com>
5
5
  Requires-Python: <3.13,>=3.11
6
6
  Requires-Dist: faiss-cpu<2.0.0,>=1.13.2
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "retrievalbase"
3
- version = "2.1.3"
3
+ version = "2.2.0"
4
4
  description = ""
5
5
  authors = [
6
6
  { name = "jalal", email = "jalalkhaldi3@gmail.com" }
@@ -33,6 +33,10 @@ class DatasetConnector[TCDatasetConnector: DatasetConnectorSettings](
33
33
  def to(self, ds: "Dataset[Any]") -> None:
34
34
  raise NotImplementedError
35
35
 
36
+ @abstractmethod
37
+ def target_exists(self, target: str) -> bool:
38
+ raise NotImplementedError
39
+
36
40
  def load(self) -> "Dataset[pl.DataFrame | pl.LazyFrame]":
37
41
  from retrievalbase.dataset.polars import PolarsDataset
38
42
 
@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Any
3
3
 
4
4
  import polars as pl
5
5
  from minio import Minio
6
+ from minio.error import S3Error
6
7
 
7
8
  from retrievalbase.connector import DatasetConnector
8
9
  from retrievalbase.connector.settings import MinioDatasetConnectorSettings
@@ -43,3 +44,12 @@ class MinioDatasetConnector(DatasetConnector[MinioDatasetConnectorSettings]):
43
44
  length=buffer.getbuffer().nbytes,
44
45
  content_type="application/octet-stream",
45
46
  )
47
+
48
+ def target_exists(self, target: str) -> bool:
49
+ try:
50
+ self.client.stat_object(self.config.bucket, target)
51
+ except S3Error as error:
52
+ if error.code in {"NoSuchBucket", "NoSuchKey"}:
53
+ return False
54
+ raise
55
+ return True
@@ -1,3 +1,4 @@
1
+ from pathlib import Path
1
2
  from typing import TYPE_CHECKING, Any
2
3
 
3
4
  import polars as pl
@@ -18,3 +19,6 @@ class ParquetDatasetConnector(DatasetConnector[ParquetDatasetConnectorSettings])
18
19
 
19
20
  def to(self, ds: "Dataset[Any]") -> None:
20
21
  ds.polars.write_parquet(self.config.path)
22
+
23
+ def target_exists(self, target: str) -> bool:
24
+ return Path(target).exists()
@@ -79,6 +79,9 @@ class FakeDatasetConnector(DatasetConnector[FakeDatasetConnectorSettings]):
79
79
  def to(self, ds: Any) -> None:
80
80
  self.__class__.last_written = ds.polars
81
81
 
82
+ def target_exists(self, target: str) -> bool:
83
+ return target == "exists"
84
+
82
85
 
83
86
  class FakeTextPreprocessorSettings(TextPreprocessorSettings):
84
87
  kind: str = "suffix"
@@ -5,7 +5,9 @@ from typing import TypedDict, cast
5
5
 
6
6
  import polars as pl
7
7
  import pytest
8
+ from minio.error import S3Error
8
9
  from pydantic import SecretStr
10
+ from urllib3.response import BaseHTTPResponse
9
11
 
10
12
  from retrievalbase.connector.minio import MinioDatasetConnector
11
13
  from retrievalbase.connector.parquet import ParquetDatasetConnector
@@ -82,6 +84,18 @@ def test_parquet_connector_round_trips_dataframe(tmp_path, lazy: bool) -> None:
82
84
  assert loaded.polars.to_dict(as_series=False) == sample_text_df.to_dict(as_series=False)
83
85
 
84
86
 
87
+ def test_parquet_connector_reports_target_existence(tmp_path) -> None:
88
+ existing_path = tmp_path / "dataset.parquet"
89
+ missing_path = tmp_path / "missing.parquet"
90
+ existing_path.write_bytes(b"not a real parquet file")
91
+ connector = ParquetDatasetConnector(
92
+ ParquetDatasetConnectorSettings(module_path="x", path=str(existing_path), lazy=False)
93
+ )
94
+
95
+ assert connector.target_exists(str(existing_path)) is True
96
+ assert connector.target_exists(str(missing_path)) is False
97
+
98
+
85
99
  def test_minio_connector_reads_and_writes_parquet_payloads(
86
100
  monkeypatch: pytest.MonkeyPatch,
87
101
  ) -> None:
@@ -160,3 +174,36 @@ def test_minio_connector_reads_and_writes_parquet_payloads(
160
174
  assert loaded.polars.to_dict(as_series=False) == sample_text_df.to_dict(as_series=False)
161
175
  assert writes["closed"] is True
162
176
  assert writes["released"] is True
177
+
178
+
179
+ def test_minio_connector_reports_target_existence(monkeypatch: pytest.MonkeyPatch) -> None:
180
+ calls: list[tuple[str, str]] = []
181
+
182
+ class FakeMinio:
183
+ def __init__(self, endpoint: str, access_key: str, secret_key: str, secure: bool) -> None:
184
+ pass
185
+
186
+ def stat_object(self, bucket: str, key: str) -> object:
187
+ calls.append((bucket, key))
188
+ if key == "missing.parquet":
189
+ raise S3Error(
190
+ cast(BaseHTTPResponse, None), "NoSuchKey", "not found", key, "request-id", "host-id", bucket, key
191
+ )
192
+ return object()
193
+
194
+ monkeypatch.setattr("retrievalbase.connector.minio.Minio", FakeMinio)
195
+
196
+ connector = MinioDatasetConnector(
197
+ MinioDatasetConnectorSettings(
198
+ module_path="x",
199
+ endpoint="https://minio.local",
200
+ bucket="datasets",
201
+ key="sample.parquet",
202
+ access_key=SecretStr("access"),
203
+ secret_key=SecretStr("secret"),
204
+ )
205
+ )
206
+
207
+ assert connector.target_exists("sample.parquet") is True
208
+ assert connector.target_exists("missing.parquet") is False
209
+ assert calls == [("datasets", "sample.parquet"), ("datasets", "missing.parquet")]
@@ -33,6 +33,9 @@ class RaisingConnector(DatasetConnector[DatasetConnectorSettings]):
33
33
  def to(self, ds) -> None:
34
34
  super().to(ds)
35
35
 
36
+ def target_exists(self, target: str) -> bool:
37
+ return super().target_exists(target)
38
+
36
39
 
37
40
  class RaisingPreprocessor(TextPreprocessor[TextPreprocessorSettings]):
38
41
  def apply(self, ds):
@@ -94,6 +97,8 @@ def test_base_contracts_raise_not_implemented_and_noops() -> None:
94
97
  connector._load()
95
98
  with pytest.raises(NotImplementedError):
96
99
  connector.to(PolarsTextDataset.from_records([("x", {"doc_id": "1"})]))
100
+ with pytest.raises(NotImplementedError):
101
+ connector.target_exists("x")
97
102
  with pytest.raises(NotImplementedError):
98
103
  preprocessor.apply(PolarsTextDataset.from_records([("x", {"doc_id": "1"})]))
99
104
  with pytest.raises(NotImplementedError):
@@ -2584,7 +2584,7 @@ wheels = [
2584
2584
 
2585
2585
  [[package]]
2586
2586
  name = "retrievalbase"
2587
- version = "2.1.3"
2587
+ version = "2.2.0"
2588
2588
  source = { editable = "." }
2589
2589
  dependencies = [
2590
2590
  { name = "faiss-cpu" },
File without changes
File without changes
File without changes
File without changes
File without changes