langchain-githubcopilot-chat 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,439 @@
1
+ """GithubcopilotChat vector stores."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from typing import (
7
+ Any,
8
+ Callable,
9
+ Iterator,
10
+ List,
11
+ Optional,
12
+ Sequence,
13
+ Tuple,
14
+ Type,
15
+ TypeVar,
16
+ )
17
+
18
+ from langchain_core.documents import Document
19
+ from langchain_core.embeddings import Embeddings
20
+ from langchain_core.vectorstores import VectorStore
21
+ from langchain_core.vectorstores.utils import _cosine_similarity as cosine_similarity
22
+
23
+ VST = TypeVar("VST", bound=VectorStore)
24
+
25
+
26
+ class GithubcopilotChatVectorStore(VectorStore):
27
+ # TODO: Replace all TODOs in docstring.
28
+ """GithubcopilotChat vector store integration.
29
+
30
+ # TODO: Replace with relevant packages, env vars.
31
+ Setup:
32
+ Install ``langchain-githubcopilot-chat`` and set environment variable ``GITHUBCOPILOTCHAT_API_KEY``.
33
+
34
+ .. code-block:: bash
35
+
36
+ pip install -U langchain-githubcopilot-chat
37
+ export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
38
+
39
+ # TODO: Populate with relevant params.
40
+ Key init args — indexing params:
41
+ collection_name: str
42
+ Name of the collection.
43
+ embedding_function: Embeddings
44
+ Embedding function to use.
45
+
46
+ # TODO: Populate with relevant params.
47
+ Key init args — client params:
48
+ client: Optional[Client]
49
+ Client to use.
50
+ connection_args: Optional[dict]
51
+ Connection arguments.
52
+
53
+ # TODO: Replace with relevant init params.
54
+ Instantiate:
55
+ .. code-block:: python
56
+
57
+ from langchain_githubcopilot_chat.vectorstores import GithubcopilotChatVectorStore
58
+ from langchain_openai import OpenAIEmbeddings
59
+
60
+ vector_store = GithubcopilotChatVectorStore(
61
+ collection_name="foo",
62
+ embedding_function=OpenAIEmbeddings(),
63
+ connection_args={"uri": "./foo.db"},
64
+ # other params...
65
+ )
66
+
67
+ # TODO: Populate with relevant variables.
68
+ Add Documents:
69
+ .. code-block:: python
70
+
71
+ from langchain_core.documents import Document
72
+
73
+ document_1 = Document(page_content="foo", metadata={"baz": "bar"})
74
+ document_2 = Document(page_content="thud", metadata={"bar": "baz"})
75
+ document_3 = Document(page_content="i will be deleted :(")
76
+
77
+ documents = [document_1, document_2, document_3]
78
+ ids = ["1", "2", "3"]
79
+ vector_store.add_documents(documents=documents, ids=ids)
80
+
81
+ # TODO: Populate with relevant variables.
82
+ Delete Documents:
83
+ .. code-block:: python
84
+
85
+ vector_store.delete(ids=["3"])
86
+
87
+ # TODO: Fill out with relevant variables and example output.
88
+ Search:
89
+ .. code-block:: python
90
+
91
+ results = vector_store.similarity_search(query="thud",k=1)
92
+ for doc in results:
93
+ print(f"* {doc.page_content} [{doc.metadata}]")
94
+
95
+ .. code-block:: python
96
+
97
+ # TODO: Example output
98
+
99
+ # TODO: Fill out with relevant variables and example output.
100
+ Search with filter:
101
+ .. code-block:: python
102
+
103
+ results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
104
+ for doc in results:
105
+ print(f"* {doc.page_content} [{doc.metadata}]")
106
+
107
+ .. code-block:: python
108
+
109
+ # TODO: Example output
110
+
111
+ # TODO: Fill out with relevant variables and example output.
112
+ Search with score:
113
+ .. code-block:: python
114
+
115
+ results = vector_store.similarity_search_with_score(query="qux",k=1)
116
+ for doc, score in results:
117
+ print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
118
+
119
+ .. code-block:: python
120
+
121
+ # TODO: Example output
122
+
123
+ # TODO: Fill out with relevant variables and example output.
124
+ Async:
125
+ .. code-block:: python
126
+
127
+ # add documents
128
+ # await vector_store.aadd_documents(documents=documents, ids=ids)
129
+
130
+ # delete documents
131
+ # await vector_store.adelete(ids=["3"])
132
+
133
+ # search
134
+ # results = vector_store.asimilarity_search(query="thud",k=1)
135
+
136
+ # search with score
137
+ results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
138
+ for doc,score in results:
139
+ print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
140
+
141
+ .. code-block:: python
142
+
143
+ # TODO: Example output
144
+
145
+ # TODO: Fill out with relevant variables and example output.
146
+ Use as Retriever:
147
+ .. code-block:: python
148
+
149
+ retriever = vector_store.as_retriever(
150
+ search_type="mmr",
151
+ search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
152
+ )
153
+ retriever.invoke("thud")
154
+
155
+ .. code-block:: python
156
+
157
+ # TODO: Example output
158
+
159
+ """ # noqa: E501
160
+
161
+ def __init__(self, embedding: Embeddings) -> None:
162
+ """Initialize with the given embedding function.
163
+
164
+ Args:
165
+ embedding: embedding function to use.
166
+ """
167
+ self._database: dict[str, dict[str, Any]] = {}
168
+ self.embedding = embedding
169
+
170
+ @classmethod
171
+ def from_texts(
172
+ cls: Type[GithubcopilotChatVectorStore],
173
+ texts: List[str],
174
+ embedding: Embeddings,
175
+ metadatas: Optional[List[dict]] = None,
176
+ **kwargs: Any,
177
+ ) -> GithubcopilotChatVectorStore:
178
+ store = cls(
179
+ embedding=embedding,
180
+ )
181
+ store.add_texts(texts=texts, metadatas=metadatas, **kwargs)
182
+ return store
183
+
184
+ # optional: add custom async implementations
185
+ # @classmethod
186
+ # async def afrom_texts(
187
+ # cls: Type[VST],
188
+ # texts: List[str],
189
+ # embedding: Embeddings,
190
+ # metadatas: Optional[List[dict]] = None,
191
+ # **kwargs: Any,
192
+ # ) -> VST:
193
+ # return await asyncio.get_running_loop().run_in_executor(
194
+ # None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas
195
+ # )
196
+
197
+ @property
198
+ def embeddings(self) -> Embeddings:
199
+ return self.embedding
200
+
201
+ def add_documents(
202
+ self,
203
+ documents: List[Document],
204
+ ids: Optional[List[str]] = None,
205
+ **kwargs: Any,
206
+ ) -> List[str]:
207
+ """Add documents to the store."""
208
+ texts = [doc.page_content for doc in documents]
209
+ vectors = self.embedding.embed_documents(texts)
210
+
211
+ if ids and len(ids) != len(texts):
212
+ msg = (
213
+ f"ids must be the same length as texts. "
214
+ f"Got {len(ids)} ids and {len(texts)} texts."
215
+ )
216
+ raise ValueError(msg)
217
+
218
+ id_iterator: Iterator[Optional[str]] = (
219
+ iter(ids) if ids else iter(doc.id for doc in documents)
220
+ )
221
+
222
+ ids_ = []
223
+
224
+ for doc, vector in zip(documents, vectors):
225
+ doc_id = next(id_iterator)
226
+ doc_id_ = doc_id if doc_id else str(uuid.uuid4())
227
+ ids_.append(doc_id_)
228
+ self._database[doc_id_] = {
229
+ "id": doc_id_,
230
+ "vector": vector,
231
+ "text": doc.page_content,
232
+ "metadata": doc.metadata,
233
+ }
234
+
235
+ return ids_
236
+
237
+ # optional: add custom async implementations
238
+ # async def aadd_documents(
239
+ # self,
240
+ # documents: List[Document],
241
+ # ids: Optional[List[str]] = None,
242
+ # **kwargs: Any,
243
+ # ) -> List[str]:
244
+ # raise NotImplementedError
245
+
246
+ def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
247
+ if ids:
248
+ for _id in ids:
249
+ self._database.pop(_id, None)
250
+
251
+ # optional: add custom async implementations
252
+ # async def adelete(
253
+ # self, ids: Optional[List[str]] = None, **kwargs: Any
254
+ # ) -> None:
255
+ # raise NotImplementedError
256
+
257
+ def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
258
+ """Get documents by their ids.
259
+
260
+ Args:
261
+ ids: The ids of the documents to get.
262
+
263
+ Returns:
264
+ A list of Document objects.
265
+ """
266
+ documents = []
267
+
268
+ for doc_id in ids:
269
+ doc = self._database.get(doc_id)
270
+ if doc:
271
+ documents.append(
272
+ Document(
273
+ id=doc["id"],
274
+ page_content=doc["text"],
275
+ metadata=doc["metadata"],
276
+ )
277
+ )
278
+ return documents
279
+
280
+ # optional: add custom async implementations
281
+ # async def aget_by_ids(self, ids: Sequence[str], /) -> list[Document]:
282
+ # raise NotImplementedError
283
+
284
+ # NOTE: the below helper method implements similarity search for in-memory
285
+ # storage. It is optional and not a part of the vector store interface.
286
+ def _similarity_search_with_score_by_vector(
287
+ self,
288
+ embedding: List[float],
289
+ k: int = 4,
290
+ filter: Optional[Callable[[Document], bool]] = None,
291
+ **kwargs: Any,
292
+ ) -> List[tuple[Document, float, List[float]]]:
293
+ # get all docs with fixed order in list
294
+ docs = list(self._database.values())
295
+
296
+ if filter is not None:
297
+ docs = [
298
+ doc
299
+ for doc in docs
300
+ if filter(Document(page_content=doc["text"], metadata=doc["metadata"]))
301
+ ]
302
+
303
+ if not docs:
304
+ return []
305
+
306
+ similarity = cosine_similarity([embedding], [doc["vector"] for doc in docs])[0]
307
+
308
+ # get the indices ordered by similarity score
309
+ top_k_idx = similarity.argsort()[::-1][:k]
310
+
311
+ return [
312
+ (
313
+ # Document
314
+ Document(
315
+ id=doc_dict["id"],
316
+ page_content=doc_dict["text"],
317
+ metadata=doc_dict["metadata"],
318
+ ),
319
+ # Score
320
+ float(similarity[idx].item()),
321
+ # Embedding vector
322
+ doc_dict["vector"],
323
+ )
324
+ for idx in top_k_idx
325
+ # Assign using walrus operator to avoid multiple lookups
326
+ if (doc_dict := docs[idx])
327
+ ]
328
+
329
+ def similarity_search(
330
+ self, query: str, k: int = 4, **kwargs: Any
331
+ ) -> List[Document]:
332
+ embedding = self.embedding.embed_query(query)
333
+ return [
334
+ doc
335
+ for doc, _, _ in self._similarity_search_with_score_by_vector(
336
+ embedding=embedding, k=k, **kwargs
337
+ )
338
+ ]
339
+
340
+ # optional: add custom async implementations
341
+ # async def asimilarity_search(
342
+ # self, query: str, k: int = 4, **kwargs: Any
343
+ # ) -> List[Document]:
344
+ # # This is a temporary workaround to make the similarity search
345
+ # # asynchronous. The proper solution is to make the similarity search
346
+ # # asynchronous in the vector store implementations.
347
+ # func = partial(self.similarity_search, query, k=k, **kwargs)
348
+ # return await asyncio.get_event_loop().run_in_executor(None, func)
349
+
350
+ def similarity_search_with_score(
351
+ self, query: str, k: int = 4, **kwargs: Any
352
+ ) -> List[Tuple[Document, float]]:
353
+ embedding = self.embedding.embed_query(query)
354
+ return [
355
+ (doc, similarity)
356
+ for doc, similarity, _ in self._similarity_search_with_score_by_vector(
357
+ embedding=embedding, k=k, **kwargs
358
+ )
359
+ ]
360
+
361
+ # optional: add custom async implementations
362
+ # async def asimilarity_search_with_score(
363
+ # self, *args: Any, **kwargs: Any
364
+ # ) -> List[Tuple[Document, float]]:
365
+ # # This is a temporary workaround to make the similarity search
366
+ # # asynchronous. The proper solution is to make the similarity search
367
+ # # asynchronous in the vector store implementations.
368
+ # func = partial(self.similarity_search_with_score, *args, **kwargs)
369
+ # return await asyncio.get_event_loop().run_in_executor(None, func)
370
+
371
+ ### ADDITIONAL OPTIONAL SEARCH METHODS BELOW ###
372
+
373
+ # def similarity_search_by_vector(
374
+ # self, embedding: List[float], k: int = 4, **kwargs: Any
375
+ # ) -> List[Document]:
376
+ # raise NotImplementedError
377
+
378
+ # optional: add custom async implementations
379
+ # async def asimilarity_search_by_vector(
380
+ # self, embedding: List[float], k: int = 4, **kwargs: Any
381
+ # ) -> List[Document]:
382
+ # # This is a temporary workaround to make the similarity search
383
+ # # asynchronous. The proper solution is to make the similarity search
384
+ # # asynchronous in the vector store implementations.
385
+ # func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
386
+ # return await asyncio.get_event_loop().run_in_executor(None, func)
387
+
388
+ # def max_marginal_relevance_search(
389
+ # self,
390
+ # query: str,
391
+ # k: int = 4,
392
+ # fetch_k: int = 20,
393
+ # lambda_mult: float = 0.5,
394
+ # **kwargs: Any,
395
+ # ) -> List[Document]:
396
+ # raise NotImplementedError
397
+
398
+ # optional: add custom async implementations
399
+ # async def amax_marginal_relevance_search(
400
+ # self,
401
+ # query: str,
402
+ # k: int = 4,
403
+ # fetch_k: int = 20,
404
+ # lambda_mult: float = 0.5,
405
+ # **kwargs: Any,
406
+ # ) -> List[Document]:
407
+ # # This is a temporary workaround to make the similarity search
408
+ # # asynchronous. The proper solution is to make the similarity search
409
+ # # asynchronous in the vector store implementations.
410
+ # func = partial(
411
+ # self.max_marginal_relevance_search,
412
+ # query,
413
+ # k=k,
414
+ # fetch_k=fetch_k,
415
+ # lambda_mult=lambda_mult,
416
+ # **kwargs,
417
+ # )
418
+ # return await asyncio.get_event_loop().run_in_executor(None, func)
419
+
420
+ # def max_marginal_relevance_search_by_vector(
421
+ # self,
422
+ # embedding: List[float],
423
+ # k: int = 4,
424
+ # fetch_k: int = 20,
425
+ # lambda_mult: float = 0.5,
426
+ # **kwargs: Any,
427
+ # ) -> List[Document]:
428
+ # raise NotImplementedError
429
+
430
+ # optional: add custom async implementations
431
+ # async def amax_marginal_relevance_search_by_vector(
432
+ # self,
433
+ # embedding: List[float],
434
+ # k: int = 4,
435
+ # fetch_k: int = 20,
436
+ # lambda_mult: float = 0.5,
437
+ # **kwargs: Any,
438
+ # ) -> List[Document]:
439
+ # raise NotImplementedError
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 LangChain, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.1
2
+ Name: langchain-githubcopilot-chat
3
+ Version: 0.1.0
4
+ Summary: An integration package connecting GithubcopilotChat and LangChain
5
+ Home-page: https://github.com/langchain-ai/langchain
6
+ License: MIT
7
+ Author: YIhan Wu
8
+ Author-email: iumm@ibat.ac.cn
9
+ Requires-Python: >=3.9,<4.0
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Dist: httpx (>=0.24.0)
18
+ Requires-Dist: langchain-core (>=0.3.15,<0.4.0)
19
+ Project-URL: Repository, https://github.com/langchain-ai/langchain
20
+ Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22githubcopilot-chat%3D%3D0%22&expanded=true
21
+ Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/githubcopilot-chat
22
+ Description-Content-Type: text/markdown
23
+
24
+ # langchain-githubcopilot-chat
25
+
26
+ This package contains the LangChain integration with GithubcopilotChat
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install -U langchain-githubcopilot-chat
32
+ ```
33
+
34
+ And you should configure credentials by setting the following environment variables:
35
+
36
+ * TODO: fill this out
37
+
38
+ ## Chat Models
39
+
40
+ `ChatGithubcopilotChat` class exposes chat models from GithubcopilotChat.
41
+
42
+ ```python
43
+ from langchain_githubcopilot_chat import ChatGithubcopilotChat
44
+
45
+ llm = ChatGithubcopilotChat()
46
+ llm.invoke("Sing a ballad of LangChain.")
47
+ ```
48
+
49
+ ## Embeddings
50
+
51
+ `GithubcopilotChatEmbeddings` class exposes embeddings from GithubcopilotChat.
52
+
53
+ ```python
54
+ from langchain_githubcopilot_chat import GithubcopilotChatEmbeddings
55
+
56
+ embeddings = GithubcopilotChatEmbeddings()
57
+ embeddings.embed_query("What is the meaning of life?")
58
+ ```
59
+
60
+ ## LLMs
61
+
62
+ `GithubcopilotChatLLM` class exposes LLMs from GithubcopilotChat.
63
+
64
+ ```python
65
+ from langchain_githubcopilot_chat import GithubcopilotChatLLM
66
+
67
+ llm = GithubcopilotChatLLM()
68
+ llm.invoke("The meaning of life is")
69
+ ```
70
+
@@ -0,0 +1,13 @@
1
+ langchain_githubcopilot_chat/__init__.py,sha256=0gzCpfEQave85jhRxL_2jt5FSH3cnc5x-KF74BDDgLk,1162
2
+ langchain_githubcopilot_chat/chat_models.py,sha256=q9-oIM7HbnzBrZiAVVn1bKvez7-HrxJ0CFAbHLqvcCE,33756
3
+ langchain_githubcopilot_chat/document_loaders.py,sha256=40HublhiKiwqx_nA8kfpMgYlvVOoVUx_IJAYJz64nnQ,2257
4
+ langchain_githubcopilot_chat/embeddings.py,sha256=rnkeKHQn92brG0g_9PD0gF8km6pifJ22GheuSswftAQ,2732
5
+ langchain_githubcopilot_chat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ langchain_githubcopilot_chat/retrievers.py,sha256=ZjLO6AwcxfHmdTxWxKYJ2Xo2TjJWn9CS6zG7m6dRLMw,3104
7
+ langchain_githubcopilot_chat/toolkits.py,sha256=J-NwHAZ-h2iFBpgPuMKrvatSYXXcIhOEptgz_2NefiM,1993
8
+ langchain_githubcopilot_chat/tools.py,sha256=WwxpkhycsmlP1mczjyKT3H4-IuvLODr5aeGjMFIh7N8,2774
9
+ langchain_githubcopilot_chat/vectorstores.py,sha256=Iu10f1mLH8kQEFuRM0h5ZenI18wZ5oUD8LVRWMREvw4,14305
10
+ langchain_githubcopilot_chat-0.1.0.dist-info/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
11
+ langchain_githubcopilot_chat-0.1.0.dist-info/METADATA,sha256=mRCtxGBbbWWvnCkskhLTURRIqWOdydVv7L--klNV_HU,2102
12
+ langchain_githubcopilot_chat-0.1.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
13
+ langchain_githubcopilot_chat-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any