cognee-community-vector-adapter-pinecone 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from .pinecone_adapter import PineconeAdapter
2
+
3
+ __all__ = ["PineconeAdapter"]
@@ -0,0 +1,360 @@
1
+ import asyncio
2
+
3
+ from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
4
+ from cognee.infrastructure.databases.vector import VectorDBInterface
5
+ from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import (
6
+ EmbeddingEngine,
7
+ )
8
+ from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
9
+ from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
10
+ from cognee.infrastructure.engine import DataPoint
11
+ from cognee.infrastructure.engine.utils import parse_id
12
+ from cognee.shared.logging_utils import get_logger
13
+ from pinecone import Pinecone, ServerlessSpec
14
+
15
+ logger = get_logger("PineconeAdapter")
16
+
17
+
18
+ def sanitize_pinecone_name(name: str) -> str:
19
+ """
20
+ Sanitize a name to comply with Pinecone's naming requirements:
21
+ - Only lowercase alphanumeric characters and hyphens
22
+ - Must start with a letter
23
+ - Must not end with a hyphen
24
+ """
25
+ # Convert to lowercase
26
+ name = name.lower()
27
+
28
+ # Replace invalid characters with hyphens
29
+ import re
30
+
31
+ name = re.sub(r"[^a-z0-9\-]", "-", name)
32
+
33
+ # Ensure it starts with a letter
34
+ if not name[0].isalpha():
35
+ name = "index-" + name
36
+
37
+ # Remove consecutive hyphens
38
+ name = re.sub(r"-+", "-", name)
39
+
40
+ # Ensure it doesn't end with a hyphen
41
+ name = name.rstrip("-")
42
+
43
+ return name
44
+
45
+
46
+ class IndexSchema(DataPoint):
47
+ text: str
48
+
49
+ metadata: dict = {"index_fields": ["text"]}
50
+
51
+
52
+ class PineconeAdapter(VectorDBInterface):
53
+ name = "Pinecone"
54
+ api_key: str = None
55
+ environment: str = None
56
+ cloud: str = None
57
+ region: str = None
58
+
59
+ def __init__(
60
+ self,
61
+ url,
62
+ api_key,
63
+ embedding_engine: EmbeddingEngine,
64
+ database_name: str = "cognee",
65
+ environment: str = None,
66
+ cloud: str = None,
67
+ region: str = None,
68
+ ):
69
+ self.url = url # Not used by Pinecone, but required by Cognee interface
70
+ self.api_key = api_key
71
+ self.environment = environment
72
+ self.database_name = database_name
73
+ self.cloud = cloud if cloud is not None else "aws"
74
+ self.region = region if region is not None else "us-east-1"
75
+ self.embedding_engine = embedding_engine
76
+ self.VECTOR_DB_LOCK = asyncio.Lock()
77
+
78
+ # Initialize Pinecone client
79
+ self.pc = Pinecone(api_key=api_key)
80
+
81
+ def get_pinecone_index(self, collection_name: str):
82
+ """Get Pinecone index instance"""
83
+ return self.pc.Index(collection_name)
84
+
85
+ async def embed_data(self, data: list[str]) -> list[float]:
86
+ return await self.embedding_engine.embed_text(data)
87
+
88
+ async def has_collection(self, collection_name: str) -> bool:
89
+ try:
90
+ index_list = self.pc.list_indexes()
91
+ return collection_name in [index.name for index in index_list]
92
+ except Exception as e:
93
+ logger.error("Error checking if collection exists: %s", str(e))
94
+ return False
95
+
96
+ async def create_collection(
97
+ self,
98
+ collection_name: str,
99
+ payload_schema=None,
100
+ ):
101
+ async with self.VECTOR_DB_LOCK:
102
+ if not await self.has_collection(collection_name):
103
+ try:
104
+ self.pc.create_index(
105
+ name=collection_name,
106
+ dimension=self.embedding_engine.get_vector_size(),
107
+ metric="cosine",
108
+ spec=ServerlessSpec(cloud=self.cloud, region=self.region),
109
+ )
110
+ logger.info("Created Pinecone index: %s", collection_name)
111
+ except Exception as e:
112
+ logger.error("Error creating collection: %s", str(e))
113
+ raise e
114
+
115
+ async def create_data_points(self, collection_name: str, data_points: list[DataPoint]):
116
+ try:
117
+ if not await self.has_collection(collection_name):
118
+ raise CollectionNotFoundError(message=f"Collection {collection_name} not found!")
119
+
120
+ index = self.get_pinecone_index(collection_name)
121
+
122
+ data_vectors = await self.embed_data(
123
+ [DataPoint.get_embeddable_data(data_point) for data_point in data_points]
124
+ )
125
+
126
+ def convert_to_pinecone_vector(data_point: DataPoint):
127
+ # Clean metadata to only include Pinecone-compatible types
128
+ clean_metadata = {}
129
+ data_dump = data_point.model_dump()
130
+
131
+ for key, value in data_dump.items():
132
+ if key != "metadata": # Skip the nested metadata field that causes issues
133
+ if isinstance(value, (str, int, float, bool)):
134
+ clean_metadata[key] = value
135
+ elif isinstance(value, list) and all(
136
+ isinstance(item, str) for item in value
137
+ ):
138
+ clean_metadata[key] = value
139
+ else:
140
+ # Convert complex types to strings
141
+ clean_metadata[key] = str(value)
142
+
143
+ return {
144
+ "id": str(data_point.id),
145
+ "values": data_vectors[data_points.index(data_point)],
146
+ "metadata": clean_metadata,
147
+ }
148
+
149
+ vectors = [convert_to_pinecone_vector(point) for point in data_points]
150
+
151
+ index.upsert(vectors=vectors)
152
+ logger.info("Uploaded %d data points to Pinecone", len(vectors))
153
+
154
+ except Exception as error:
155
+ logger.error("Error uploading data points to Pinecone: %s", str(error))
156
+ raise error
157
+
158
+ async def create_vector_index(self, index_name: str, index_property_name: str):
159
+ sanitized_name = sanitize_pinecone_name(f"{index_name}_{index_property_name}")
160
+ await self.create_collection(sanitized_name)
161
+
162
+ async def index_data_points(
163
+ self, index_name: str, index_property_name: str, data_points: list[DataPoint]
164
+ ):
165
+ sanitized_name = sanitize_pinecone_name(f"{index_name}_{index_property_name}")
166
+ await self.create_data_points(
167
+ sanitized_name,
168
+ [
169
+ IndexSchema(
170
+ id=data_point.id,
171
+ text=getattr(data_point, data_point.metadata["index_fields"][0]),
172
+ )
173
+ for data_point in data_points
174
+ ],
175
+ )
176
+
177
+ async def retrieve(self, collection_name: str, data_point_ids: list[str]):
178
+ try:
179
+ if not await self.has_collection(collection_name):
180
+ raise CollectionNotFoundError(message=f"Collection {collection_name} not found!")
181
+
182
+ index = self.get_pinecone_index(collection_name)
183
+ results = index.fetch(ids=data_point_ids)
184
+ return results
185
+ except Exception as e:
186
+ logger.error("Error retrieving data points: %s", str(e))
187
+ raise e
188
+
189
+ async def search(
190
+ self,
191
+ collection_name: str,
192
+ query_text: str | None = None,
193
+ query_vector: list[float] | None = None,
194
+ limit: int = 15,
195
+ with_vector: bool = False,
196
+ include_payload: bool = False,
197
+ ) -> list[ScoredResult]:
198
+ """Search for similar vectors in the collection.
199
+
200
+ Args:
201
+ collection_name: Name of the collection to search.
202
+ query_text: Text query to search for (will be embedded).
203
+ query_vector: Pre-computed query vector.
204
+ limit: Maximum number of results to return.
205
+ with_vector: Whether to include vectors in results.
206
+ include_payload: Whether to include payload in results.
207
+
208
+ Returns:
209
+ List of ScoredResult objects sorted by similarity.
210
+
211
+ Raises:
212
+ MissingQueryParameterError: If neither query_text nor query_vector is provided.
213
+ """
214
+ if query_text is None and query_vector is None:
215
+ raise MissingQueryParameterError()
216
+
217
+ if not await self.has_collection(collection_name):
218
+ logger.warning(
219
+ f"Collection '{collection_name}' not found in PineconeAdapter.search; returning []."
220
+ )
221
+ return []
222
+
223
+ if query_vector is None:
224
+ query_vector = (await self.embed_data([query_text]))[0]
225
+
226
+ try:
227
+ index = self.get_pinecone_index(collection_name)
228
+
229
+ if limit == 0:
230
+ # Get actual index stats instead of hardcoded limit
231
+ stats = index.describe_index_stats()
232
+ limit = stats.total_vector_count
233
+
234
+ if limit == 0:
235
+ return []
236
+
237
+ results = index.query(
238
+ vector=query_vector,
239
+ top_k=limit,
240
+ include_metadata=include_payload,
241
+ include_values=with_vector,
242
+ )
243
+
244
+ return [
245
+ ScoredResult(
246
+ id=parse_id(match.id),
247
+ payload={
248
+ **match.metadata,
249
+ "id": parse_id(match.id),
250
+ }
251
+ if include_payload
252
+ else {},
253
+ # Pinecone returns similarity score (0-1, higher = more similar)
254
+ score=match.score,
255
+ )
256
+ for match in results.matches
257
+ ]
258
+
259
+ except Exception as e:
260
+ logger.error("Error searching collection: %s", str(e))
261
+ raise e
262
+
263
+ async def batch_search(
264
+ self,
265
+ collection_name: str,
266
+ query_texts: list[str],
267
+ limit: int | None = None,
268
+ with_vectors: bool = False,
269
+ include_payload: bool = False,
270
+ ) -> list[list[ScoredResult]]:
271
+ """Perform batch search for multiple queries.
272
+
273
+ Args:
274
+ collection_name: Name of the collection to search.
275
+ query_texts: List of query texts to search for.
276
+ limit: Maximum number of results per query.
277
+ with_vectors: Whether to include vectors in results.
278
+ include_payload: Whether to include payload in results.
279
+
280
+ Returns:
281
+ List of search results, one list per query.
282
+
283
+ Raises:
284
+ Exception: If search execution fails.
285
+ """
286
+ if limit is None:
287
+ limit = 15
288
+
289
+ if not await self.has_collection(collection_name):
290
+ logger.warning(
291
+ f"Collection '{collection_name}' not found in PineconeAdapter.batch_search;"
292
+ f"returning empty results."
293
+ )
294
+ return [[] for _ in query_texts]
295
+
296
+ try:
297
+ vectors = await self.embed_data(query_texts)
298
+ index = self.get_pinecone_index(collection_name)
299
+
300
+ results = []
301
+ for i, vector in enumerate(vectors):
302
+ try:
303
+ result = index.query(
304
+ vector=vector,
305
+ top_k=limit,
306
+ include_metadata=include_payload,
307
+ include_values=with_vectors,
308
+ )
309
+
310
+ # Convert to ScoredResult objects (no filtering to match other adapters)
311
+ scored_results = [
312
+ ScoredResult(
313
+ id=parse_id(match.id),
314
+ payload={
315
+ **match.metadata,
316
+ "id": parse_id(match.id),
317
+ }
318
+ if include_payload
319
+ else {},
320
+ score=match.score,
321
+ )
322
+ for match in result.matches
323
+ ]
324
+ results.append(scored_results)
325
+
326
+ except Exception as e:
327
+ logger.error(f"Error in batch search for query {i}: {str(e)}")
328
+ results.append([])
329
+
330
+ return results
331
+
332
+ except Exception as e:
333
+ logger.error(f"Error during batch search: {str(e)}")
334
+ raise e
335
+
336
+ async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
337
+ try:
338
+ if not await self.has_collection(collection_name):
339
+ raise CollectionNotFoundError(message=f"Collection {collection_name} not found!")
340
+
341
+ index = self.get_pinecone_index(collection_name)
342
+ results = index.delete(ids=data_point_ids)
343
+ logger.info("Deleted %d data points from %s", len(data_point_ids), collection_name)
344
+ return results
345
+ except Exception as e:
346
+ logger.error("Error deleting data points: %s", str(e))
347
+ raise e
348
+
349
+ async def prune(self):
350
+ """Delete all indexes in Pinecone"""
351
+ try:
352
+ index_list = self.pc.list_indexes()
353
+
354
+ for index_info in index_list:
355
+ self.pc.delete_index(index_info.name)
356
+ logger.info("Deleted Pinecone index: %s", index_info.name)
357
+
358
+ except Exception as e:
359
+ logger.error("Error pruning Pinecone indexes: %s", str(e))
360
+ raise e
@@ -0,0 +1,5 @@
1
+ from cognee.infrastructure.databases.vector import use_vector_adapter
2
+
3
+ from .pinecone_adapter import PineconeAdapter
4
+
5
+ use_vector_adapter("pinecone", PineconeAdapter)
@@ -0,0 +1,47 @@
1
+ Metadata-Version: 2.4
2
+ Name: cognee-community-vector-adapter-pinecone
3
+ Version: 0.1.0
4
+ Summary: Pinecone vector database adapter for cognee
5
+ Requires-Python: <=3.13,>=3.11
6
+ Requires-Dist: cognee==0.5.2
7
+ Requires-Dist: instructor>=1.11
8
+ Requires-Dist: pinecone>=3.0.0
9
+ Requires-Dist: starlette>=0.48.0
10
+ Description-Content-Type: text/markdown
11
+
12
+ # Cognee Pinecone Adapter
13
+
14
+ ## Installation
15
+
16
+ If published, the package can be simply installed via pip:
17
+
18
+ ```bash
19
+ pip install cognee-community-vector-adapter-pinecone
20
+ ```
21
+
22
+ In case it is not published yet, you can use poetry to locally build the adapter package:
23
+
24
+ ```bash
25
+ pip install poetry
26
+ poetry install # run this command in the directory containing the pyproject.toml file
27
+ ```
28
+
29
+ ## Connection Setup
30
+
31
+ To use the Pinecone adapter, you need to:
32
+
33
+ 1. Sign up for a Pinecone account at https://www.pinecone.io/
34
+ 2. Create a new project and get your API key
35
+ 3. Note your environment details (cloud provider and region)
36
+
37
+ ## Usage
38
+
39
+ Import and register the adapter in your code:
40
+
41
+ ```python
42
+ from cognee_community_vector_adapter_pinecone import register
43
+ ```
44
+
45
+ ## Example
46
+
47
+ See example in `example.py` file.
@@ -0,0 +1,6 @@
1
+ cognee_community_vector_adapter_pinecone/__init__.py,sha256=To8bCX7WdMDbAtjdF0bf4FVMR2NUxYvm6JAqAVFLUag,77
2
+ cognee_community_vector_adapter_pinecone/pinecone_adapter.py,sha256=pzstAY_9uOGpWt6xL61h9yz4wT81dAPqUZDJ69zSTMQ,13073
3
+ cognee_community_vector_adapter_pinecone/register.py,sha256=TggMmL38QcbWvqWMw7ZtPupwmU2DKL0S3pwxLs1d13k,166
4
+ cognee_community_vector_adapter_pinecone-0.1.0.dist-info/METADATA,sha256=gUMNawhgH3ngwQOyEwqlvTiAHtPrw_VvhyDdMZSZFow,1126
5
+ cognee_community_vector_adapter_pinecone-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ cognee_community_vector_adapter_pinecone-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any