cloud-dog-vdb 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. cloud_dog_vdb/__init__.py +45 -0
  2. cloud_dog_vdb/access/__init__.py +15 -0
  3. cloud_dog_vdb/access/enforcement.py +32 -0
  4. cloud_dog_vdb/access/policy.py +38 -0
  5. cloud_dog_vdb/adapters/__init__.py +39 -0
  6. cloud_dog_vdb/adapters/base.py +94 -0
  7. cloud_dog_vdb/adapters/chroma.py +329 -0
  8. cloud_dog_vdb/adapters/factory.py +51 -0
  9. cloud_dog_vdb/adapters/infinity.py +404 -0
  10. cloud_dog_vdb/adapters/opensearch.py +281 -0
  11. cloud_dog_vdb/adapters/pgvector.py +300 -0
  12. cloud_dog_vdb/adapters/qdrant.py +315 -0
  13. cloud_dog_vdb/adapters/registry.py +38 -0
  14. cloud_dog_vdb/adapters/vector_utils.py +35 -0
  15. cloud_dog_vdb/adapters/weaviate.py +291 -0
  16. cloud_dog_vdb/capabilities/__init__.py +15 -0
  17. cloud_dog_vdb/capabilities/models.py +28 -0
  18. cloud_dog_vdb/capabilities/planner.py +27 -0
  19. cloud_dog_vdb/collections/__init__.py +15 -0
  20. cloud_dog_vdb/collections/manager.py +44 -0
  21. cloud_dog_vdb/collections/specs.py +34 -0
  22. cloud_dog_vdb/compat/__init__.py +20 -0
  23. cloud_dog_vdb/compat/response_normaliser.py +194 -0
  24. cloud_dog_vdb/config/__init__.py +17 -0
  25. cloud_dog_vdb/config/models.py +38 -0
  26. cloud_dog_vdb/domain/__init__.py +25 -0
  27. cloud_dog_vdb/domain/enums.py +35 -0
  28. cloud_dog_vdb/domain/errors.py +45 -0
  29. cloud_dog_vdb/domain/models.py +108 -0
  30. cloud_dog_vdb/embeddings/__init__.py +18 -0
  31. cloud_dog_vdb/embeddings/base.py +28 -0
  32. cloud_dog_vdb/embeddings/providers.py +86 -0
  33. cloud_dog_vdb/factory.py +27 -0
  34. cloud_dog_vdb/ingestion/__init__.py +29 -0
  35. cloud_dog_vdb/ingestion/acquire.py +35 -0
  36. cloud_dog_vdb/ingestion/checkpoints.py +34 -0
  37. cloud_dog_vdb/ingestion/chunk/__init__.py +15 -0
  38. cloud_dog_vdb/ingestion/chunk/base.py +33 -0
  39. cloud_dog_vdb/ingestion/chunk/boundary.py +23 -0
  40. cloud_dog_vdb/ingestion/chunk/fixed.py +28 -0
  41. cloud_dog_vdb/ingestion/chunk/recursive.py +33 -0
  42. cloud_dog_vdb/ingestion/chunk/semantic.py +40 -0
  43. cloud_dog_vdb/ingestion/convert/__init__.py +15 -0
  44. cloud_dog_vdb/ingestion/convert/base.py +34 -0
  45. cloud_dog_vdb/ingestion/convert/deepdoc_conv.py +26 -0
  46. cloud_dog_vdb/ingestion/convert/mineru_conv.py +25 -0
  47. cloud_dog_vdb/ingestion/convert/pandas_conv.py +32 -0
  48. cloud_dog_vdb/ingestion/embed.py +30 -0
  49. cloud_dog_vdb/ingestion/ocr/__init__.py +26 -0
  50. cloud_dog_vdb/ingestion/ocr/base.py +52 -0
  51. cloud_dog_vdb/ingestion/ocr/heuristics.py +31 -0
  52. cloud_dog_vdb/ingestion/ocr/planner.py +43 -0
  53. cloud_dog_vdb/ingestion/ocr/providers/__init__.py +23 -0
  54. cloud_dog_vdb/ingestion/ocr/providers/external_service.py +69 -0
  55. cloud_dog_vdb/ingestion/ocr/providers/llm.py +94 -0
  56. cloud_dog_vdb/ingestion/ocr/providers/local.py +78 -0
  57. cloud_dog_vdb/ingestion/ocr/registry.py +36 -0
  58. cloud_dog_vdb/ingestion/parse/__init__.py +46 -0
  59. cloud_dog_vdb/ingestion/parse/async_runner.py +215 -0
  60. cloud_dog_vdb/ingestion/parse/base.py +52 -0
  61. cloud_dog_vdb/ingestion/parse/capabilities.py +32 -0
  62. cloud_dog_vdb/ingestion/parse/ir.py +57 -0
  63. cloud_dog_vdb/ingestion/parse/planner.py +31 -0
  64. cloud_dog_vdb/ingestion/parse/providers/__init__.py +29 -0
  65. cloud_dog_vdb/ingestion/parse/providers/deepdoc.py +101 -0
  66. cloud_dog_vdb/ingestion/parse/providers/docling.py +101 -0
  67. cloud_dog_vdb/ingestion/parse/providers/internal.py +83 -0
  68. cloud_dog_vdb/ingestion/parse/providers/marker_mcp.py +643 -0
  69. cloud_dog_vdb/ingestion/parse/providers/mineru.py +703 -0
  70. cloud_dog_vdb/ingestion/parse/providers/transformers.py +176 -0
  71. cloud_dog_vdb/ingestion/parse/quality.py +21 -0
  72. cloud_dog_vdb/ingestion/parse/registry.py +36 -0
  73. cloud_dog_vdb/ingestion/pipeline.py +433 -0
  74. cloud_dog_vdb/ingestion/table/__init__.py +25 -0
  75. cloud_dog_vdb/ingestion/table/policy.py +31 -0
  76. cloud_dog_vdb/ingestion/table/renderers.py +74 -0
  77. cloud_dog_vdb/ingestion/table/schema.py +40 -0
  78. cloud_dog_vdb/ingestion/verify.py +30 -0
  79. cloud_dog_vdb/integrations/__init__.py +15 -0
  80. cloud_dog_vdb/integrations/langchain.py +32 -0
  81. cloud_dog_vdb/integrations/llamaindex.py +32 -0
  82. cloud_dog_vdb/isolation/__init__.py +15 -0
  83. cloud_dog_vdb/isolation/manager.py +36 -0
  84. cloud_dog_vdb/jobs/__init__.py +15 -0
  85. cloud_dog_vdb/jobs/models.py +28 -0
  86. cloud_dog_vdb/jobs/queue.py +45 -0
  87. cloud_dog_vdb/jobs/status.py +32 -0
  88. cloud_dog_vdb/jobs/worker.py +28 -0
  89. cloud_dog_vdb/lifecycle/__init__.py +25 -0
  90. cloud_dog_vdb/lifecycle/manager.py +53 -0
  91. cloud_dog_vdb/lifecycle/retention.py +83 -0
  92. cloud_dog_vdb/metadata/__init__.py +46 -0
  93. cloud_dog_vdb/metadata/filters.py +130 -0
  94. cloud_dog_vdb/metadata/identity.py +72 -0
  95. cloud_dog_vdb/metadata/normalise.py +35 -0
  96. cloud_dog_vdb/metadata/provenance.py +102 -0
  97. cloud_dog_vdb/metadata/schema.py +166 -0
  98. cloud_dog_vdb/observability/__init__.py +15 -0
  99. cloud_dog_vdb/observability/audit.py +32 -0
  100. cloud_dog_vdb/observability/metrics.py +37 -0
  101. cloud_dog_vdb/observability/otel.py +32 -0
  102. cloud_dog_vdb/options/__init__.py +15 -0
  103. cloud_dog_vdb/options/chroma.py +28 -0
  104. cloud_dog_vdb/options/common.py +35 -0
  105. cloud_dog_vdb/options/manager.py +34 -0
  106. cloud_dog_vdb/options/opensearch.py +28 -0
  107. cloud_dog_vdb/options/pgvector.py +28 -0
  108. cloud_dog_vdb/options/qdrant.py +28 -0
  109. cloud_dog_vdb/options/weaviate.py +28 -0
  110. cloud_dog_vdb/remote/__init__.py +20 -0
  111. cloud_dog_vdb/remote/client.py +105 -0
  112. cloud_dog_vdb/runtime/__init__.py +18 -0
  113. cloud_dog_vdb/runtime/client.py +362 -0
  114. cloud_dog_vdb/runtime/factory.py +113 -0
  115. cloud_dog_vdb/search/__init__.py +15 -0
  116. cloud_dog_vdb/search/engine.py +44 -0
  117. cloud_dog_vdb/search/rerank.py +29 -0
  118. cloud_dog_vdb/testing/__init__.py +22 -0
  119. cloud_dog_vdb/testing/comparison.py +424 -0
  120. cloud_dog_vdb/testing/comparison_report.py +89 -0
  121. cloud_dog_vdb/testing/conformance.py +32 -0
  122. cloud_dog_vdb/testing/fixtures.py +30 -0
  123. cloud_dog_vdb/testing/mock_adapters.py +32 -0
  124. cloud_dog_vdb/versioning/__init__.py +24 -0
  125. cloud_dog_vdb/versioning/schema_version.py +151 -0
  126. cloud_dog_vdb-0.5.4.dist-info/METADATA +43 -0
  127. cloud_dog_vdb-0.5.4.dist-info/RECORD +131 -0
  128. cloud_dog_vdb-0.5.4.dist-info/WHEEL +4 -0
  129. cloud_dog_vdb-0.5.4.dist-info/licenses/LICENCE +190 -0
  130. cloud_dog_vdb-0.5.4.dist-info/licenses/LICENSE +176 -0
  131. cloud_dog_vdb-0.5.4.dist-info/licenses/NOTICE +7 -0
@@ -0,0 +1,45 @@
1
+ # Copyright 2026 Cloud-Dog, Viewdeck Engineering Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ from cloud_dog_vdb.domain.models import (
18
+ CapabilityDescriptor,
19
+ CollectionSpec,
20
+ Job,
21
+ Record,
22
+ SearchRequest,
23
+ SearchResponse,
24
+ SearchResult,
25
+ )
26
+ from cloud_dog_vdb.factory import get_vdb_client
27
+ from cloud_dog_vdb.ingestion.pipeline import IngestionPipeline, ParserIngestionOptions, ingest_document
28
+ from cloud_dog_vdb.runtime.client import VDBClient
29
+
30
+ __version__ = "0.5.4"
31
+
32
+ __all__ = [
33
+ "VDBClient",
34
+ "CapabilityDescriptor",
35
+ "CollectionSpec",
36
+ "Record",
37
+ "SearchRequest",
38
+ "SearchResult",
39
+ "SearchResponse",
40
+ "Job",
41
+ "get_vdb_client",
42
+ "ingest_document",
43
+ "ParserIngestionOptions",
44
+ "IngestionPipeline",
45
+ ]
@@ -0,0 +1,15 @@
1
+ # Copyright 2026 Cloud-Dog, Viewdeck Engineering Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # cloud_dog_vdb access
@@ -0,0 +1,32 @@
1
+ # Copyright 2026 Cloud-Dog, Viewdeck Engineering Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ from cloud_dog_vdb.access.policy import AccessPolicy
18
+
19
+
20
+ def can_read(role: str, policy: AccessPolicy) -> bool:
21
+ """Handle can read."""
22
+ return role in policy.readers or role in policy.writers or role in policy.admins
23
+
24
+
25
+ def can_write(role: str, policy: AccessPolicy) -> bool:
26
+ """Handle can write."""
27
+ return role in policy.writers or role in policy.admins
28
+
29
+
30
+ def can_admin(role: str, policy: AccessPolicy) -> bool:
31
+ """Handle can admin."""
32
+ return role in policy.admins
@@ -0,0 +1,38 @@
1
+ # Copyright 2026 Cloud-Dog, Viewdeck Engineering Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass, field
18
+
19
+
20
+ @dataclass(frozen=True, slots=True)
21
+ class AccessPolicy:
22
+ """Represent access policy."""
23
+
24
+ readers: set[str] = field(default_factory=set)
25
+ writers: set[str] = field(default_factory=set)
26
+ admins: set[str] = field(default_factory=set)
27
+
28
+ def can_read(self, role: str) -> bool:
29
+ """Handle can read."""
30
+ return role in self.readers or self.can_write(role)
31
+
32
+ def can_write(self, role: str) -> bool:
33
+ """Handle can write."""
34
+ return role in self.writers or self.can_admin(role)
35
+
36
+ def can_admin(self, role: str) -> bool:
37
+ """Handle can admin."""
38
+ return role in self.admins
@@ -0,0 +1,39 @@
1
+ # Copyright 2026 Cloud-Dog, Viewdeck Engineering Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from cloud_dog_vdb.adapters.base import VDBAdapter
16
+ from cloud_dog_vdb.adapters.chroma import ChromaAdapter
17
+ from cloud_dog_vdb.adapters.factory import build_adapter
18
+ from cloud_dog_vdb.adapters.infinity import InfinityAdapter
19
+ from cloud_dog_vdb.adapters.opensearch import OpenSearchAdapter
20
+ from cloud_dog_vdb.adapters.qdrant import QdrantAdapter
21
+ from cloud_dog_vdb.adapters.registry import AdapterRegistry
22
+ from cloud_dog_vdb.adapters.weaviate import WeaviateAdapter
23
+
24
+ try: # optional dependency (asyncpg)
25
+ from cloud_dog_vdb.adapters.pgvector import PGVectorAdapter
26
+ except Exception: # pragma: no cover
27
+ PGVectorAdapter = None # type: ignore[assignment]
28
+
29
+ __all__ = [
30
+ "VDBAdapter",
31
+ "AdapterRegistry",
32
+ "build_adapter",
33
+ "ChromaAdapter",
34
+ "QdrantAdapter",
35
+ "WeaviateAdapter",
36
+ "OpenSearchAdapter",
37
+ "InfinityAdapter",
38
+ "PGVectorAdapter",
39
+ ]
@@ -0,0 +1,94 @@
1
+ # Copyright 2026 Cloud-Dog, Viewdeck Engineering Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ from abc import ABC, abstractmethod
18
+ from typing import Any
19
+
20
+ from cloud_dog_vdb.domain.models import CapabilityDescriptor, CollectionSpec
21
+
22
+
23
+ class VDBAdapter(ABC):
24
+ """Define the asynchronous vector-database adapter contract."""
25
+
26
+ @abstractmethod
27
+ async def initialize(self, config: dict[str, Any] | None = None) -> bool:
28
+ """Initialise the adapter with optional configuration."""
29
+ raise NotImplementedError
30
+
31
+ @abstractmethod
32
+ async def health_check(self) -> bool:
33
+ """Return whether the adapter is healthy."""
34
+ raise NotImplementedError
35
+
36
+ @abstractmethod
37
+ async def create_collection(self, spec: CollectionSpec) -> dict:
38
+ """Create collection."""
39
+ raise NotImplementedError
40
+
41
+ @abstractmethod
42
+ async def get_collection(self, name: str) -> dict | None:
43
+ """Return collection."""
44
+ raise NotImplementedError
45
+
46
+ @abstractmethod
47
+ async def delete_collection(self, name: str) -> bool:
48
+ """Delete collection."""
49
+ raise NotImplementedError
50
+
51
+ @abstractmethod
52
+ async def add_documents(
53
+ self,
54
+ collection: str,
55
+ documents: list[str],
56
+ metadatas: list[dict[str, Any]] | None = None,
57
+ ids: list[str] | None = None,
58
+ ) -> list[str]:
59
+ """Add documents to a collection and return their identifiers."""
60
+ raise NotImplementedError
61
+
62
+ @abstractmethod
63
+ async def search(
64
+ self,
65
+ collection: str,
66
+ query: str,
67
+ n_results: int,
68
+ filter: dict[str, Any] | None = None,
69
+ search_options: dict[str, Any] | None = None,
70
+ ) -> list[dict[str, Any]]:
71
+ """Search a collection and return matching records."""
72
+ raise NotImplementedError
73
+
74
+ @abstractmethod
75
+ async def delete_document(self, collection: str, doc_id: str) -> bool:
76
+ """Delete document."""
77
+ raise NotImplementedError
78
+
79
+ @abstractmethod
80
+ async def update_document(
81
+ self, collection: str, doc_id: str, content: str, metadata: dict[str, Any] | None = None
82
+ ) -> bool:
83
+ """Update document."""
84
+ raise NotImplementedError
85
+
86
+ @abstractmethod
87
+ async def count_documents(self, collection: str, filter: dict[str, Any] | None = None) -> int:
88
+ """Count documents in a collection."""
89
+ raise NotImplementedError
90
+
91
+ @abstractmethod
92
+ def capabilities(self) -> CapabilityDescriptor:
93
+ """Return the adapter capability descriptor."""
94
+ raise NotImplementedError
@@ -0,0 +1,329 @@
1
+ # Copyright 2026 Cloud-Dog, Viewdeck Engineering Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import uuid
19
+ from typing import Any
20
+
21
+ import httpx
22
+
23
+ from cloud_dog_vdb.adapters.base import VDBAdapter
24
+ from cloud_dog_vdb.adapters.vector_utils import deterministic_vector
25
+ from cloud_dog_vdb.config.models import ProviderConfig
26
+ from cloud_dog_vdb.domain.models import CapabilityDescriptor, CollectionSpec
27
+ from cloud_dog_vdb.embeddings.base import EmbeddingProvider
28
+ from cloud_dog_vdb.embeddings.providers import build_embedding_provider
29
+
30
+
31
+ class ChromaAdapter(VDBAdapter):
32
+ """Represent chroma adapter."""
33
+
34
+ def __init__(self, config: ProviderConfig, *, local_mode: bool = False) -> None:
35
+ self.config = config
36
+ self.local_mode = local_mode
37
+ self._client = httpx.AsyncClient(timeout=config.timeout_seconds)
38
+ self._local: dict[str, dict] = {}
39
+ self._dims: dict[str, int] = {}
40
+ self._embedding_provider: EmbeddingProvider | None = None if local_mode else build_embedding_provider(config)
41
+
42
+ async def initialize(self, config: dict[str, Any] | None = None) -> bool:
43
+ """Handle initialize."""
44
+ _ = config
45
+ return await self.health_check()
46
+
47
+ def _headers(self) -> dict[str, str]:
48
+ headers = {"Content-Type": "application/json"}
49
+ api_key = str(self.config.api_key)
50
+ if api_key:
51
+ headers["Authorization"] = f"Bearer {api_key}"
52
+ return headers
53
+
54
+ def _root(self) -> str:
55
+ base_url = str(self.config.base_url)
56
+ return f"{base_url.rstrip('/')}/api/v2/tenants/default_tenant/databases/default_database/collections"
57
+
58
+ @staticmethod
59
+ def _serialise_metadata_value(value: Any) -> str | int | float | bool:
60
+ if isinstance(value, bool):
61
+ return value
62
+ if isinstance(value, (str, int, float)):
63
+ return value
64
+ if value is None:
65
+ return ""
66
+ if isinstance(value, (list, tuple, set, dict)):
67
+ return json.dumps(value, ensure_ascii=True, sort_keys=True)
68
+ return str(value)
69
+
70
+ @classmethod
71
+ def _serialise_metadata(cls, metadata: dict[str, Any] | None) -> dict[str, str | int | float | bool]:
72
+ source = metadata or {}
73
+ return {
74
+ str(key): cls._serialise_metadata_value(value)
75
+ for key, value in source.items()
76
+ }
77
+
78
+ async def _embed_text(self, text: str, dim: int) -> list[float]:
79
+ if self._embedding_provider is None:
80
+ return deterministic_vector(text, dim)
81
+ vector = await self._embedding_provider.embed(text)
82
+ if not vector:
83
+ raise ValueError("Embedding provider returned an empty vector")
84
+ return vector
85
+
86
+ async def _embed_many(self, texts: list[str], dim: int) -> list[list[float]]:
87
+ return [await self._embed_text(text, dim) for text in texts]
88
+
89
+ async def health_check(self) -> bool:
90
+ """Handle health check."""
91
+ if self.local_mode:
92
+ return True
93
+ try:
94
+ resp = await self._client.get(
95
+ f"{str(self.config.base_url).rstrip('/')}/api/v2/heartbeat",
96
+ headers=self._headers(),
97
+ )
98
+ return resp.status_code == 200
99
+ except Exception:
100
+ return False
101
+
102
+ async def create_collection(self, spec: CollectionSpec) -> dict:
103
+ """Create collection."""
104
+ if self.local_mode:
105
+ data = {"name": spec.name, "id": spec.name, "local": True, "metadata": dict(spec.metadata)}
106
+ self._local[spec.name] = data
107
+ self._dims[spec.name] = spec.embedding_dim
108
+ return data
109
+ payload = {"name": spec.name, "configuration": {"hnsw": {"space": spec.distance_metric.value}}}
110
+ if spec.metadata:
111
+ payload["metadata"] = spec.metadata
112
+ resp = await self._client.post(self._root(), headers=self._headers(), json=payload)
113
+ if resp.status_code == 409:
114
+ return {"name": spec.name, "status": "exists"}
115
+ resp.raise_for_status()
116
+ self._dims[spec.name] = spec.embedding_dim
117
+ return resp.json()
118
+
119
+ async def get_collection(self, name: str) -> dict | None:
120
+ """Return collection."""
121
+ if self.local_mode:
122
+ return self._local.get(name)
123
+ resp = await self._client.get(self._root(), headers=self._headers())
124
+ resp.raise_for_status()
125
+ for col in resp.json() or []:
126
+ if col.get("name") == name:
127
+ return col
128
+ return None
129
+
130
+ async def delete_collection(self, name: str) -> bool:
131
+ """Delete collection."""
132
+ self._dims.pop(name, None)
133
+ if self.local_mode:
134
+ return self._local.pop(name, None) is not None
135
+ # Newer Chroma deployments accept collection name in delete path,
136
+ # while some older deployments may still rely on collection ID.
137
+ resp = await self._client.delete(f"{self._root()}/{name}", headers=self._headers())
138
+ if resp.status_code < 300 or resp.status_code == 404:
139
+ return True
140
+
141
+ cid = await self._collection_id(name)
142
+ if not cid:
143
+ return True
144
+ resp = await self._client.delete(f"{self._root()}/{cid}", headers=self._headers())
145
+ return resp.status_code < 300 or resp.status_code == 404
146
+
147
+ async def add_documents(
148
+ self,
149
+ collection: str,
150
+ documents: list[str],
151
+ metadatas: list[dict[str, Any]] | None = None,
152
+ ids: list[str] | None = None,
153
+ ) -> list[str]:
154
+ """Handle add documents."""
155
+ out: list[str] = []
156
+ dim = self._dims.get(collection, 1024)
157
+ docs: list[str] = []
158
+ ids_out: list[str] = []
159
+ metas_out: list[dict[str, Any]] = []
160
+ embeddings = await self._embed_many(documents, dim)
161
+ for i, content in enumerate(documents):
162
+ doc_id = ids[i] if ids and i < len(ids) else uuid.uuid4().hex
163
+ meta = self._serialise_metadata(metadatas[i] if metadatas and i < len(metadatas) else {})
164
+ docs.append(content)
165
+ ids_out.append(doc_id)
166
+ metas_out.append(meta)
167
+ out.append(doc_id)
168
+ if self.local_mode:
169
+ local = self._local.setdefault(collection, {"docs": {}})
170
+ docs_state = local.setdefault("docs", {})
171
+ for idx, doc_id in enumerate(ids_out):
172
+ docs_state[doc_id] = {"content": docs[idx], "metadata": metas_out[idx], "embedding": embeddings[idx]}
173
+ return out
174
+ cid = await self._collection_id(collection)
175
+ if not cid:
176
+ raise ValueError(f"Collection not found: {collection}")
177
+ payload = {"ids": ids_out, "documents": docs, "metadatas": metas_out, "embeddings": embeddings}
178
+ resp = await self._client.post(f"{self._root()}/{cid}/upsert", headers=self._headers(), json=payload)
179
+ if resp.status_code == 404:
180
+ resp = await self._client.post(f"{self._root()}/{cid}/add", headers=self._headers(), json=payload)
181
+ resp.raise_for_status()
182
+ return out
183
+
184
+ async def search(
185
+ self,
186
+ collection: str,
187
+ query: str,
188
+ n_results: int,
189
+ filter: dict[str, Any] | None = None,
190
+ search_options: dict[str, Any] | None = None,
191
+ ) -> list[dict[str, Any]]:
192
+ """Handle search."""
193
+ _ = search_options
194
+ dim = self._dims.get(collection, 1024)
195
+ if self.local_mode:
196
+ docs_state = (self._local.get(collection) or {}).get("docs", {})
197
+ target = deterministic_vector(query or "*", dim)
198
+ out: list[dict[str, Any]] = []
199
+ for doc_id, value in docs_state.items():
200
+ metadata = value.get("metadata") or {}
201
+ if filter and any(metadata.get(k) != v for k, v in filter.items()):
202
+ continue
203
+ emb = value.get("embedding") or target
204
+ score = sum(a * b for a, b in zip(emb, target))
205
+ out.append(
206
+ {
207
+ "id": doc_id,
208
+ "score": float(score),
209
+ "content": value.get("content", ""),
210
+ "metadata": metadata,
211
+ }
212
+ )
213
+ out.sort(key=lambda x: x["score"], reverse=True)
214
+ return out[:n_results]
215
+ cid = await self._collection_id(collection)
216
+ if not cid:
217
+ return []
218
+ payload: dict[str, Any] = {
219
+ "query_embeddings": [await self._embed_text(query or "*", dim)],
220
+ "n_results": n_results,
221
+ "include": ["distances", "metadatas", "documents"],
222
+ }
223
+ if filter:
224
+ payload["where"] = self._build_where(filter)
225
+ resp = await self._client.post(f"{self._root()}/{cid}/query", headers=self._headers(), json=payload)
226
+ resp.raise_for_status()
227
+ body = resp.json()
228
+ ids = (body.get("ids") or [[]])[0]
229
+ documents = (body.get("documents") or [[]])[0]
230
+ metadatas = (body.get("metadatas") or [[]])[0]
231
+ distances = (body.get("distances") or [[]])[0]
232
+ out = []
233
+ for i, doc_id in enumerate(ids):
234
+ dist = float(distances[i]) if i < len(distances) else 1.0
235
+ out.append(
236
+ {
237
+ "id": str(doc_id),
238
+ "score": 1.0 - dist,
239
+ "content": documents[i] if i < len(documents) else "",
240
+ "metadata": metadatas[i] if i < len(metadatas) else {},
241
+ }
242
+ )
243
+ return out
244
+
245
+ async def delete_document(self, collection: str, doc_id: str) -> bool:
246
+ """Delete document."""
247
+ if self.local_mode:
248
+ docs_state = (self._local.get(collection) or {}).get("docs", {})
249
+ return docs_state.pop(doc_id, None) is not None
250
+ cid = await self._collection_id(collection)
251
+ if not cid:
252
+ return False
253
+ resp = await self._client.post(
254
+ f"{self._root()}/{cid}/delete",
255
+ headers=self._headers(),
256
+ json={"ids": [doc_id]},
257
+ )
258
+ return resp.status_code < 300
259
+
260
+ async def update_document(
261
+ self, collection: str, doc_id: str, content: str, metadata: dict[str, Any] | None = None
262
+ ) -> bool:
263
+ """Update document."""
264
+ dim = self._dims.get(collection, 1024)
265
+ if self.local_mode:
266
+ docs_state = (self._local.get(collection) or {}).setdefault("docs", {})
267
+ if doc_id not in docs_state:
268
+ return False
269
+ docs_state[doc_id] = {
270
+ "content": content,
271
+ "metadata": metadata or {},
272
+ "embedding": deterministic_vector(content, dim),
273
+ }
274
+ return True
275
+ cid = await self._collection_id(collection)
276
+ if not cid:
277
+ return False
278
+ payload = {
279
+ "ids": [doc_id],
280
+ "documents": [content],
281
+ "metadatas": [metadata or {}],
282
+ "embeddings": [await self._embed_text(content, dim)],
283
+ }
284
+ resp = await self._client.post(f"{self._root()}/{cid}/upsert", headers=self._headers(), json=payload)
285
+ if resp.status_code == 404:
286
+ resp = await self._client.post(f"{self._root()}/{cid}/update", headers=self._headers(), json=payload)
287
+ return resp.status_code < 300
288
+
289
+ async def count_documents(self, collection: str, filter: dict[str, Any] | None = None) -> int:
290
+ """Handle count documents."""
291
+ if self.local_mode:
292
+ docs_state = (self._local.get(collection) or {}).get("docs", {})
293
+ if not filter:
294
+ return len(docs_state)
295
+ return sum(
296
+ 1
297
+ for value in docs_state.values()
298
+ if all((value.get("metadata") or {}).get(k) == v for k, v in filter.items())
299
+ )
300
+ cid = await self._collection_id(collection)
301
+ if not cid:
302
+ return 0
303
+ if filter:
304
+ docs = await self.search(collection, "*", 10000, filter, {})
305
+ return len(docs)
306
+ resp = await self._client.get(f"{self._root()}/{cid}/count", headers=self._headers())
307
+ if resp.status_code == 404:
308
+ return 0
309
+ resp.raise_for_status()
310
+ body = resp.json()
311
+ return int(body if isinstance(body, int) else body.get("count", 0))
312
+
313
+ def capabilities(self) -> CapabilityDescriptor:
314
+ """Handle capabilities."""
315
+ return CapabilityDescriptor(provider_id="chroma", filtering=True, hybrid_search=False, delete_by_filter=False)
316
+
317
+ async def _collection_id(self, name: str) -> str | None:
318
+ resp = await self._client.get(self._root(), headers=self._headers())
319
+ resp.raise_for_status()
320
+ for col in resp.json() or []:
321
+ if col.get("name") == name:
322
+ return str(col.get("id") or col.get("name"))
323
+ return None
324
+
325
+ @staticmethod
326
+ def _build_where(filter: dict[str, Any]) -> dict[str, Any]:
327
+ if len(filter) <= 1:
328
+ return dict(filter)
329
+ return {"$and": [{k: v} for k, v in filter.items()]}
@@ -0,0 +1,51 @@
1
+ # Copyright 2026 Cloud-Dog, Viewdeck Engineering Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # cloud_dog_vdb — Adapter factory
16
+ """Factory helpers for constructing VDB adapters from provider config."""
17
+
18
+ from __future__ import annotations
19
+
20
+ from cloud_dog_vdb.adapters.base import VDBAdapter
21
+ from cloud_dog_vdb.config.models import ProviderConfig
22
+
23
+
24
+ def build_adapter(config: ProviderConfig, *, local_mode: bool = False) -> VDBAdapter:
25
+ """Build adapter."""
26
+ provider = config.provider_id.lower()
27
+ if provider == "chroma":
28
+ from cloud_dog_vdb.adapters.chroma import ChromaAdapter
29
+
30
+ return ChromaAdapter(config, local_mode=local_mode)
31
+ if provider == "qdrant":
32
+ from cloud_dog_vdb.adapters.qdrant import QdrantAdapter
33
+
34
+ return QdrantAdapter(config, local_mode=local_mode)
35
+ if provider == "weaviate":
36
+ from cloud_dog_vdb.adapters.weaviate import WeaviateAdapter
37
+
38
+ return WeaviateAdapter(config, local_mode=local_mode)
39
+ if provider == "opensearch":
40
+ from cloud_dog_vdb.adapters.opensearch import OpenSearchAdapter
41
+
42
+ return OpenSearchAdapter(config, local_mode=local_mode)
43
+ if provider == "pgvector":
44
+ from cloud_dog_vdb.adapters.pgvector import PGVectorAdapter
45
+
46
+ return PGVectorAdapter(config, local_mode=local_mode)
47
+ if provider == "infinity":
48
+ from cloud_dog_vdb.adapters.infinity import InfinityAdapter
49
+
50
+ return InfinityAdapter(config, local_mode=local_mode)
51
+ raise ValueError(f"Unsupported provider: {config.provider_id}")