langchain-githubcopilot-chat 0.1.2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langchain_githubcopilot_chat-0.1.2 → langchain_githubcopilot_chat-0.2.0}/PKG-INFO +1 -1
- {langchain_githubcopilot_chat-0.1.2 → langchain_githubcopilot_chat-0.2.0}/langchain_githubcopilot_chat/__init__.py +7 -10
- langchain_githubcopilot_chat-0.2.0/langchain_githubcopilot_chat/auth.py +85 -0
- {langchain_githubcopilot_chat-0.1.2 → langchain_githubcopilot_chat-0.2.0}/langchain_githubcopilot_chat/chat_models.py +52 -26
- {langchain_githubcopilot_chat-0.1.2 → langchain_githubcopilot_chat-0.2.0}/langchain_githubcopilot_chat/embeddings.py +24 -24
- {langchain_githubcopilot_chat-0.1.2 → langchain_githubcopilot_chat-0.2.0}/pyproject.toml +1 -1
- langchain_githubcopilot_chat-0.1.2/langchain_githubcopilot_chat/document_loaders.py +0 -73
- langchain_githubcopilot_chat-0.1.2/langchain_githubcopilot_chat/retrievers.py +0 -107
- langchain_githubcopilot_chat-0.1.2/langchain_githubcopilot_chat/toolkits.py +0 -72
- langchain_githubcopilot_chat-0.1.2/langchain_githubcopilot_chat/tools.py +0 -94
- langchain_githubcopilot_chat-0.1.2/langchain_githubcopilot_chat/vectorstores.py +0 -439
- {langchain_githubcopilot_chat-0.1.2 → langchain_githubcopilot_chat-0.2.0}/LICENSE +0 -0
- {langchain_githubcopilot_chat-0.1.2 → langchain_githubcopilot_chat-0.2.0}/README.md +0 -0
- {langchain_githubcopilot_chat-0.1.2 → langchain_githubcopilot_chat-0.2.0}/langchain_githubcopilot_chat/py.typed +0 -0
|
@@ -1,15 +1,11 @@
|
|
|
1
1
|
from importlib import metadata
|
|
2
2
|
|
|
3
|
+
from langchain_githubcopilot_chat.auth import get_copilot_token
|
|
3
4
|
from langchain_githubcopilot_chat.chat_models import (
|
|
4
5
|
ChatGithubCopilot,
|
|
5
6
|
ChatGithubcopilotChat,
|
|
6
7
|
)
|
|
7
|
-
from langchain_githubcopilot_chat.document_loaders import GithubcopilotChatLoader
|
|
8
8
|
from langchain_githubcopilot_chat.embeddings import GithubcopilotChatEmbeddings
|
|
9
|
-
from langchain_githubcopilot_chat.retrievers import GithubcopilotChatRetriever
|
|
10
|
-
from langchain_githubcopilot_chat.toolkits import GithubcopilotChatToolkit
|
|
11
|
-
from langchain_githubcopilot_chat.tools import GithubcopilotChatTool
|
|
12
|
-
from langchain_githubcopilot_chat.vectorstores import GithubcopilotChatVectorStore
|
|
13
9
|
|
|
14
10
|
try:
|
|
15
11
|
__version__ = metadata.version(__package__)
|
|
@@ -18,14 +14,15 @@ except metadata.PackageNotFoundError:
|
|
|
18
14
|
__version__ = ""
|
|
19
15
|
del metadata # optional, avoids polluting the results of dir(__package__)
|
|
20
16
|
|
|
17
|
+
get_available_models = ChatGithubCopilot.get_available_models
|
|
18
|
+
get_vscode_token = get_copilot_token
|
|
19
|
+
|
|
21
20
|
__all__ = [
|
|
22
21
|
"ChatGithubCopilot",
|
|
23
22
|
"ChatGithubcopilotChat", # backwards-compatible alias
|
|
24
|
-
"GithubcopilotChatVectorStore",
|
|
25
23
|
"GithubcopilotChatEmbeddings",
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"GithubcopilotChatTool",
|
|
24
|
+
"get_copilot_token",
|
|
25
|
+
"get_vscode_token",
|
|
26
|
+
"get_available_models",
|
|
30
27
|
"__version__",
|
|
31
28
|
]
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import httpx
|
|
5
|
+
|
|
6
|
+
CLIENT_ID = "Iv1.b507a08c87ecfe98"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_copilot_token(client_id: str = CLIENT_ID) -> Optional[str]:
|
|
10
|
+
"""
|
|
11
|
+
Authenticate via GitHub Device Flow to get a Copilot Token.
|
|
12
|
+
This function will block and wait for the user to complete the
|
|
13
|
+
authorization in their browser.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
client_id: The GitHub OAuth App Client ID to use. Defaults
|
|
17
|
+
to the VS Code Copilot Chat client ID.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
The fetched Copilot Token string, or None if authentication failed.
|
|
21
|
+
"""
|
|
22
|
+
print("1. Requesting device code from GitHub...") # noqa: T201 # noqa: T201
|
|
23
|
+
with httpx.Client() as client:
|
|
24
|
+
res = client.post(
|
|
25
|
+
"https://github.com/login/device/code",
|
|
26
|
+
headers={"Accept": "application/json"},
|
|
27
|
+
data={"client_id": client_id, "scope": "read:user"},
|
|
28
|
+
)
|
|
29
|
+
res.raise_for_status()
|
|
30
|
+
data = res.json()
|
|
31
|
+
|
|
32
|
+
device_code = data.get("device_code")
|
|
33
|
+
user_code = data.get("user_code")
|
|
34
|
+
verification_uri = data.get("verification_uri")
|
|
35
|
+
interval = data.get("interval", 5)
|
|
36
|
+
|
|
37
|
+
print("\n==========================================") # noqa: T201
|
|
38
|
+
print(f"Please open your browser to: {verification_uri}") # noqa: T201
|
|
39
|
+
print(f"And enter the authorization code: {user_code}") # noqa: T201
|
|
40
|
+
print("==========================================\n") # noqa: T201
|
|
41
|
+
print(f"Waiting for authorization (checking every {interval} seconds)...") # noqa: T201
|
|
42
|
+
|
|
43
|
+
access_token = None
|
|
44
|
+
with httpx.Client() as client:
|
|
45
|
+
while True:
|
|
46
|
+
token_res = client.post(
|
|
47
|
+
"https://github.com/login/oauth/access_token",
|
|
48
|
+
headers={"Accept": "application/json"},
|
|
49
|
+
data={
|
|
50
|
+
"client_id": client_id,
|
|
51
|
+
"device_code": device_code,
|
|
52
|
+
"grant_type": "urn:ietf:params:oauth:grant-type:device_code",
|
|
53
|
+
},
|
|
54
|
+
).json()
|
|
55
|
+
|
|
56
|
+
if "access_token" in token_res:
|
|
57
|
+
access_token = token_res["access_token"]
|
|
58
|
+
print( # noqa: T201 # noqa: T201
|
|
59
|
+
"\n✅ Authorization successful! Exchanging for Copilot Token..."
|
|
60
|
+
)
|
|
61
|
+
break
|
|
62
|
+
elif token_res.get("error") == "authorization_pending":
|
|
63
|
+
time.sleep(interval)
|
|
64
|
+
else:
|
|
65
|
+
print(f"\n❌ Authorization failed: {token_res}") # noqa: T201 # noqa: T201
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
# Exchange the standard access token for a Copilot internal token
|
|
69
|
+
copilot_res = client.get(
|
|
70
|
+
"https://api.github.com/copilot_internal/v2/token",
|
|
71
|
+
headers={
|
|
72
|
+
"Authorization": f"token {access_token}",
|
|
73
|
+
"Accept": "application/json",
|
|
74
|
+
"Editor-Version": "vscode/1.104.1",
|
|
75
|
+
"Editor-Plugin-Version": "copilot-chat/0.26.7",
|
|
76
|
+
},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if copilot_res.status_code == 200:
|
|
80
|
+
copilot_token = copilot_res.json().get("token")
|
|
81
|
+
print("🎉 Successfully acquired Copilot Token!") # noqa: T201 # noqa: T201
|
|
82
|
+
return copilot_token
|
|
83
|
+
else:
|
|
84
|
+
print(f"❌ Failed to acquire Copilot Token: {copilot_res.text}") # noqa: T201 # noqa: T201
|
|
85
|
+
return None
|
|
@@ -56,10 +56,23 @@ _ROLE_MAP = {
|
|
|
56
56
|
"tool": "tool",
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
_INFERENCE_PATH = "/
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
_GITHUB_COPILOT_BASE_URL = "https://api.githubcopilot.com"
|
|
60
|
+
_INFERENCE_PATH = "/chat/completions"
|
|
61
|
+
|
|
62
|
+
COPILOT_EDITOR_VERSION = "vscode/1.104.1"
|
|
63
|
+
COPILOT_PLUGIN_VERSION = "copilot-chat/0.26.7"
|
|
64
|
+
COPILOT_INTEGRATION_ID = "vscode-chat"
|
|
65
|
+
COPILOT_USER_AGENT = "GitHubCopilotChat/0.26.7"
|
|
66
|
+
|
|
67
|
+
COPILOT_DEFAULT_HEADERS = {
|
|
68
|
+
"Copilot-Integration-Id": COPILOT_INTEGRATION_ID,
|
|
69
|
+
"User-Agent": COPILOT_USER_AGENT,
|
|
70
|
+
"Editor-Version": COPILOT_EDITOR_VERSION,
|
|
71
|
+
"Editor-Plugin-Version": COPILOT_PLUGIN_VERSION,
|
|
72
|
+
"editor-version": COPILOT_EDITOR_VERSION,
|
|
73
|
+
"editor-plugin-version": COPILOT_PLUGIN_VERSION,
|
|
74
|
+
"copilot-vision-request": "true",
|
|
75
|
+
}
|
|
63
76
|
|
|
64
77
|
|
|
65
78
|
def _message_to_dict(message: BaseMessage) -> Dict[str, Any]:
|
|
@@ -403,19 +416,8 @@ class ChatGithubCopilot(BaseChatModel):
|
|
|
403
416
|
is used.
|
|
404
417
|
"""
|
|
405
418
|
|
|
406
|
-
base_url: str =
|
|
407
|
-
"""Base URL for the GitHub
|
|
408
|
-
|
|
409
|
-
org: Optional[str] = None
|
|
410
|
-
"""Organisation login for attributed inference requests.
|
|
411
|
-
|
|
412
|
-
When set, requests are sent to
|
|
413
|
-
``/orgs/{org}/inference/chat/completions`` instead of
|
|
414
|
-
``/inference/chat/completions``.
|
|
415
|
-
"""
|
|
416
|
-
|
|
417
|
-
api_version: str = _API_VERSION
|
|
418
|
-
"""GitHub Models API version sent as the ``X-GitHub-Api-Version`` header."""
|
|
419
|
+
base_url: str = _GITHUB_COPILOT_BASE_URL
|
|
420
|
+
"""Base URL for the GitHub Copilot API."""
|
|
419
421
|
|
|
420
422
|
temperature: Optional[float] = None
|
|
421
423
|
"""Sampling temperature in ``[0, 1]``."""
|
|
@@ -491,19 +493,43 @@ class ChatGithubCopilot(BaseChatModel):
|
|
|
491
493
|
@property
|
|
492
494
|
def _inference_url(self) -> str:
|
|
493
495
|
"""Return the full chat-completions endpoint URL."""
|
|
494
|
-
|
|
495
|
-
path = _ORG_INFERENCE_PATH.format(org=self.org)
|
|
496
|
-
else:
|
|
497
|
-
path = _INFERENCE_PATH
|
|
498
|
-
return self.base_url.rstrip("/") + path
|
|
496
|
+
return self.base_url.rstrip("/") + _INFERENCE_PATH
|
|
499
497
|
|
|
500
498
|
def _build_headers(self) -> Dict[str, str]:
|
|
501
|
-
|
|
499
|
+
headers = {
|
|
502
500
|
"Authorization": f"Bearer {self._token}",
|
|
503
|
-
"Accept": "application/
|
|
501
|
+
"Accept": "application/json",
|
|
502
|
+
"Content-Type": "application/json",
|
|
503
|
+
}
|
|
504
|
+
headers.update(COPILOT_DEFAULT_HEADERS)
|
|
505
|
+
return headers
|
|
506
|
+
|
|
507
|
+
@classmethod
|
|
508
|
+
def get_available_models(
|
|
509
|
+
cls, github_token: Optional[str] = None
|
|
510
|
+
) -> List[Dict[str, Any]]:
|
|
511
|
+
"""Get the list of available models from the GitHub Copilot API."""
|
|
512
|
+
token = github_token or os.environ.get("GITHUB_TOKEN")
|
|
513
|
+
if not token:
|
|
514
|
+
raise ValueError(
|
|
515
|
+
"A GitHub token is required. Set the GITHUB_TOKEN environment "
|
|
516
|
+
"variable or pass ``github_token``."
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
headers = {
|
|
520
|
+
"Authorization": f"Bearer {token}",
|
|
521
|
+
"Accept": "application/json",
|
|
504
522
|
"Content-Type": "application/json",
|
|
505
|
-
"X-GitHub-Api-Version": self.api_version,
|
|
506
523
|
}
|
|
524
|
+
headers.update(COPILOT_DEFAULT_HEADERS)
|
|
525
|
+
|
|
526
|
+
url = f"{_GITHUB_COPILOT_BASE_URL}/models"
|
|
527
|
+
|
|
528
|
+
with httpx.Client() as client:
|
|
529
|
+
response = client.get(url, headers=headers)
|
|
530
|
+
response.raise_for_status()
|
|
531
|
+
data = response.json()
|
|
532
|
+
return data.get("data", [])
|
|
507
533
|
|
|
508
534
|
def _build_payload(
|
|
509
535
|
self,
|
|
@@ -712,7 +738,7 @@ class ChatGithubCopilot(BaseChatModel):
|
|
|
712
738
|
|
|
713
739
|
@property
|
|
714
740
|
def _llm_type(self) -> str:
|
|
715
|
-
return "
|
|
741
|
+
return "github-copilot"
|
|
716
742
|
|
|
717
743
|
@property
|
|
718
744
|
def _identifying_params(self) -> Dict[str, Any]:
|
|
@@ -9,10 +9,23 @@ import httpx
|
|
|
9
9
|
from langchain_core.embeddings import Embeddings
|
|
10
10
|
from pydantic import BaseModel, Field, SecretStr, model_validator
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
_EMBEDDINGS_PATH = "/
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
_GITHUB_COPILOT_BASE_URL = "https://api.githubcopilot.com"
|
|
13
|
+
_EMBEDDINGS_PATH = "/embeddings"
|
|
14
|
+
|
|
15
|
+
COPILOT_EDITOR_VERSION = "vscode/1.104.1"
|
|
16
|
+
COPILOT_PLUGIN_VERSION = "copilot-chat/0.26.7"
|
|
17
|
+
COPILOT_INTEGRATION_ID = "vscode-chat"
|
|
18
|
+
COPILOT_USER_AGENT = "GitHubCopilotChat/0.26.7"
|
|
19
|
+
|
|
20
|
+
COPILOT_DEFAULT_HEADERS = {
|
|
21
|
+
"Copilot-Integration-Id": COPILOT_INTEGRATION_ID,
|
|
22
|
+
"User-Agent": COPILOT_USER_AGENT,
|
|
23
|
+
"Editor-Version": COPILOT_EDITOR_VERSION,
|
|
24
|
+
"Editor-Plugin-Version": COPILOT_PLUGIN_VERSION,
|
|
25
|
+
"editor-version": COPILOT_EDITOR_VERSION,
|
|
26
|
+
"editor-plugin-version": COPILOT_PLUGIN_VERSION,
|
|
27
|
+
"copilot-vision-request": "true",
|
|
28
|
+
}
|
|
16
29
|
|
|
17
30
|
|
|
18
31
|
class GithubcopilotChatEmbeddings(BaseModel, Embeddings):
|
|
@@ -109,18 +122,8 @@ class GithubcopilotChatEmbeddings(BaseModel, Embeddings):
|
|
|
109
122
|
is used.
|
|
110
123
|
"""
|
|
111
124
|
|
|
112
|
-
base_url: str =
|
|
113
|
-
"""Base URL for the GitHub
|
|
114
|
-
|
|
115
|
-
org: Optional[str] = None
|
|
116
|
-
"""Organisation login for attributed inference requests.
|
|
117
|
-
|
|
118
|
-
When set, requests are sent to
|
|
119
|
-
``/orgs/{org}/inference/embeddings`` instead of ``/inference/embeddings``.
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
api_version: str = _API_VERSION
|
|
123
|
-
"""GitHub Models API version sent as the ``X-GitHub-Api-Version`` header."""
|
|
125
|
+
base_url: str = _GITHUB_COPILOT_BASE_URL
|
|
126
|
+
"""Base URL for the GitHub Copilot API."""
|
|
124
127
|
|
|
125
128
|
dimensions: Optional[int] = None
|
|
126
129
|
"""Number of output embedding dimensions.
|
|
@@ -173,19 +176,16 @@ class GithubcopilotChatEmbeddings(BaseModel, Embeddings):
|
|
|
173
176
|
@property
|
|
174
177
|
def _embeddings_url(self) -> str:
|
|
175
178
|
"""Return the full embeddings endpoint URL."""
|
|
176
|
-
|
|
177
|
-
path = _ORG_EMBEDDINGS_PATH.format(org=self.org)
|
|
178
|
-
else:
|
|
179
|
-
path = _EMBEDDINGS_PATH
|
|
180
|
-
return self.base_url.rstrip("/") + path
|
|
179
|
+
return self.base_url.rstrip("/") + _EMBEDDINGS_PATH
|
|
181
180
|
|
|
182
181
|
def _build_headers(self) -> Dict[str, str]:
|
|
183
|
-
|
|
182
|
+
headers = {
|
|
184
183
|
"Authorization": f"Bearer {self._token}",
|
|
185
|
-
"Accept": "application/
|
|
184
|
+
"Accept": "application/json",
|
|
186
185
|
"Content-Type": "application/json",
|
|
187
|
-
"X-GitHub-Api-Version": self.api_version,
|
|
188
186
|
}
|
|
187
|
+
headers.update(COPILOT_DEFAULT_HEADERS)
|
|
188
|
+
return headers
|
|
189
189
|
|
|
190
190
|
def _build_payload(self, input: Union[str, List[str]]) -> Dict[str, Any]:
|
|
191
191
|
"""Assemble the JSON body for the embeddings API."""
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "langchain-githubcopilot-chat"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "An integration package connecting GithubcopilotChat and LangChain"
|
|
9
9
|
authors = ["YIhan Wu <iumm@ibat.ac.cn>"]
|
|
10
10
|
readme = "README.md"
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
"""GithubcopilotChat document loader."""
|
|
2
|
-
|
|
3
|
-
from typing import Iterator
|
|
4
|
-
|
|
5
|
-
from langchain_core.document_loaders.base import BaseLoader
|
|
6
|
-
from langchain_core.documents import Document
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class GithubcopilotChatLoader(BaseLoader):
|
|
10
|
-
# TODO: Replace all TODOs in docstring. See example docstring:
|
|
11
|
-
# https://github.com/langchain-ai/langchain/blob/869523ad728e6b76d77f170cce13925b4ebc3c1e/libs/community/langchain_community/document_loaders/recursive_url_loader.py#L54
|
|
12
|
-
"""
|
|
13
|
-
GithubcopilotChat document loader integration
|
|
14
|
-
|
|
15
|
-
# TODO: Replace with relevant packages, env vars.
|
|
16
|
-
Setup:
|
|
17
|
-
Install ``langchain-githubcopilot-chat`` and set environment variable ``GITHUBCOPILOTCHAT_API_KEY``.
|
|
18
|
-
|
|
19
|
-
.. code-block:: bash
|
|
20
|
-
|
|
21
|
-
pip install -U langchain-githubcopilot-chat
|
|
22
|
-
export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
|
|
23
|
-
|
|
24
|
-
# TODO: Replace with relevant init params.
|
|
25
|
-
Instantiate:
|
|
26
|
-
.. code-block:: python
|
|
27
|
-
|
|
28
|
-
from langchain_community.document_loaders import GithubcopilotChatLoader
|
|
29
|
-
|
|
30
|
-
loader = GithubcopilotChatLoader(
|
|
31
|
-
# required params = ...
|
|
32
|
-
# other params = ...
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
Lazy load:
|
|
36
|
-
.. code-block:: python
|
|
37
|
-
|
|
38
|
-
docs = []
|
|
39
|
-
docs_lazy = loader.lazy_load()
|
|
40
|
-
|
|
41
|
-
# async variant:
|
|
42
|
-
# docs_lazy = await loader.alazy_load()
|
|
43
|
-
|
|
44
|
-
for doc in docs_lazy:
|
|
45
|
-
docs.append(doc)
|
|
46
|
-
print(docs[0].page_content[:100])
|
|
47
|
-
print(docs[0].metadata)
|
|
48
|
-
|
|
49
|
-
.. code-block:: python
|
|
50
|
-
|
|
51
|
-
TODO: Example output
|
|
52
|
-
|
|
53
|
-
# TODO: Delete if async load is not implemented
|
|
54
|
-
Async load:
|
|
55
|
-
.. code-block:: python
|
|
56
|
-
|
|
57
|
-
docs = await loader.aload()
|
|
58
|
-
print(docs[0].page_content[:100])
|
|
59
|
-
print(docs[0].metadata)
|
|
60
|
-
|
|
61
|
-
.. code-block:: python
|
|
62
|
-
|
|
63
|
-
TODO: Example output
|
|
64
|
-
|
|
65
|
-
""" # noqa: E501
|
|
66
|
-
|
|
67
|
-
# TODO: This method must be implemented to load documents.
|
|
68
|
-
# Do not implement load(), a default implementation is already available.
|
|
69
|
-
def lazy_load(self) -> Iterator[Document]:
|
|
70
|
-
raise NotImplementedError()
|
|
71
|
-
|
|
72
|
-
# TODO: Implement if you would like to change default BaseLoader implementation
|
|
73
|
-
# async def alazy_load(self) -> AsyncIterator[Document]:
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
"""GithubcopilotChat retrievers."""
|
|
2
|
-
|
|
3
|
-
from typing import Any, List
|
|
4
|
-
|
|
5
|
-
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
|
6
|
-
from langchain_core.documents import Document
|
|
7
|
-
from langchain_core.retrievers import BaseRetriever
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class GithubcopilotChatRetriever(BaseRetriever):
|
|
11
|
-
# TODO: Replace all TODOs in docstring. See example docstring:
|
|
12
|
-
# https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/retrievers/tavily_search_api.py#L17
|
|
13
|
-
"""GithubcopilotChat retriever.
|
|
14
|
-
|
|
15
|
-
# TODO: Replace with relevant packages, env vars, etc.
|
|
16
|
-
Setup:
|
|
17
|
-
Install ``langchain-githubcopilot-chat`` and set environment variable
|
|
18
|
-
``GITHUBCOPILOTCHAT_API_KEY``.
|
|
19
|
-
|
|
20
|
-
.. code-block:: bash
|
|
21
|
-
|
|
22
|
-
pip install -U langchain-githubcopilot-chat
|
|
23
|
-
export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
|
|
24
|
-
|
|
25
|
-
# TODO: Populate with relevant params.
|
|
26
|
-
Key init args:
|
|
27
|
-
arg 1: type
|
|
28
|
-
description
|
|
29
|
-
arg 2: type
|
|
30
|
-
description
|
|
31
|
-
|
|
32
|
-
# TODO: Replace with relevant init params.
|
|
33
|
-
Instantiate:
|
|
34
|
-
.. code-block:: python
|
|
35
|
-
|
|
36
|
-
from langchain-githubcopilot-chat import GithubcopilotChatRetriever
|
|
37
|
-
|
|
38
|
-
retriever = GithubcopilotChatRetriever(
|
|
39
|
-
# ...
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
Usage:
|
|
43
|
-
.. code-block:: python
|
|
44
|
-
|
|
45
|
-
query = "..."
|
|
46
|
-
|
|
47
|
-
retriever.invoke(query)
|
|
48
|
-
|
|
49
|
-
.. code-block:: none
|
|
50
|
-
|
|
51
|
-
# TODO: Example output.
|
|
52
|
-
|
|
53
|
-
Use within a chain:
|
|
54
|
-
.. code-block:: python
|
|
55
|
-
|
|
56
|
-
from langchain_core.output_parsers import StrOutputParser
|
|
57
|
-
from langchain_core.prompts import ChatPromptTemplate
|
|
58
|
-
from langchain_core.runnables import RunnablePassthrough
|
|
59
|
-
from langchain_openai import ChatOpenAI
|
|
60
|
-
|
|
61
|
-
prompt = ChatPromptTemplate.from_template(
|
|
62
|
-
\"\"\"Answer the question based only on the context provided.
|
|
63
|
-
|
|
64
|
-
Context: {context}
|
|
65
|
-
|
|
66
|
-
Question: {question}\"\"\"
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
|
70
|
-
|
|
71
|
-
def format_docs(docs):
|
|
72
|
-
return "\\n\\n".join(doc.page_content for doc in docs)
|
|
73
|
-
|
|
74
|
-
chain = (
|
|
75
|
-
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
|
76
|
-
| prompt
|
|
77
|
-
| llm
|
|
78
|
-
| StrOutputParser()
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
chain.invoke("...")
|
|
82
|
-
|
|
83
|
-
.. code-block:: none
|
|
84
|
-
|
|
85
|
-
# TODO: Example output.
|
|
86
|
-
|
|
87
|
-
"""
|
|
88
|
-
|
|
89
|
-
k: int = 3
|
|
90
|
-
|
|
91
|
-
# TODO: This method must be implemented to retrieve documents.
|
|
92
|
-
def _get_relevant_documents(
|
|
93
|
-
self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any
|
|
94
|
-
) -> List[Document]:
|
|
95
|
-
k = kwargs.get("k", self.k)
|
|
96
|
-
return [
|
|
97
|
-
Document(page_content=f"Result {i} for query: {query}") for i in range(k)
|
|
98
|
-
]
|
|
99
|
-
|
|
100
|
-
# optional: add custom async implementations here
|
|
101
|
-
# async def _aget_relevant_documents(
|
|
102
|
-
# self,
|
|
103
|
-
# query: str,
|
|
104
|
-
# *,
|
|
105
|
-
# run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
106
|
-
# **kwargs: Any,
|
|
107
|
-
# ) -> List[Document]: ...
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
"""GithubcopilotChat toolkits."""
|
|
2
|
-
|
|
3
|
-
from typing import List
|
|
4
|
-
|
|
5
|
-
from langchain_core.tools import BaseTool, BaseToolkit
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class GithubcopilotChatToolkit(BaseToolkit):
|
|
9
|
-
# TODO: Replace all TODOs in docstring. See example docstring:
|
|
10
|
-
# https://github.com/langchain-ai/langchain/blob/c123cb2b304f52ab65db4714eeec46af69a861ec/libs/community/langchain_community/agent_toolkits/sql/toolkit.py#L19
|
|
11
|
-
"""GithubcopilotChat toolkit.
|
|
12
|
-
|
|
13
|
-
# TODO: Replace with relevant packages, env vars, etc.
|
|
14
|
-
Setup:
|
|
15
|
-
Install ``langchain-githubcopilot-chat`` and set environment variable ``GITHUBCOPILOTCHAT_API_KEY``.
|
|
16
|
-
|
|
17
|
-
.. code-block:: bash
|
|
18
|
-
|
|
19
|
-
pip install -U langchain-githubcopilot-chat
|
|
20
|
-
export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
|
|
21
|
-
|
|
22
|
-
# TODO: Populate with relevant params.
|
|
23
|
-
Key init args:
|
|
24
|
-
arg 1: type
|
|
25
|
-
description
|
|
26
|
-
arg 2: type
|
|
27
|
-
description
|
|
28
|
-
|
|
29
|
-
# TODO: Replace with relevant init params.
|
|
30
|
-
Instantiate:
|
|
31
|
-
.. code-block:: python
|
|
32
|
-
|
|
33
|
-
from langchain-githubcopilot-chat import GithubcopilotChatToolkit
|
|
34
|
-
|
|
35
|
-
toolkit = GithubcopilotChatToolkit(
|
|
36
|
-
# ...
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
Tools:
|
|
40
|
-
.. code-block:: python
|
|
41
|
-
|
|
42
|
-
toolkit.get_tools()
|
|
43
|
-
|
|
44
|
-
.. code-block:: none
|
|
45
|
-
|
|
46
|
-
# TODO: Example output.
|
|
47
|
-
|
|
48
|
-
Use within an agent:
|
|
49
|
-
.. code-block:: python
|
|
50
|
-
|
|
51
|
-
from langgraph.prebuilt import create_react_agent
|
|
52
|
-
|
|
53
|
-
agent_executor = create_react_agent(llm, tools)
|
|
54
|
-
|
|
55
|
-
example_query = "..."
|
|
56
|
-
|
|
57
|
-
events = agent_executor.stream(
|
|
58
|
-
{"messages": [("user", example_query)]},
|
|
59
|
-
stream_mode="values",
|
|
60
|
-
)
|
|
61
|
-
for event in events:
|
|
62
|
-
event["messages"][-1].pretty_print()
|
|
63
|
-
|
|
64
|
-
.. code-block:: none
|
|
65
|
-
|
|
66
|
-
# TODO: Example output.
|
|
67
|
-
|
|
68
|
-
""" # noqa: E501
|
|
69
|
-
|
|
70
|
-
# TODO: This method must be implemented to list tools.
|
|
71
|
-
def get_tools(self) -> List[BaseTool]:
|
|
72
|
-
raise NotImplementedError()
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
"""GithubcopilotChat tools."""
|
|
2
|
-
|
|
3
|
-
from typing import Optional, Type
|
|
4
|
-
|
|
5
|
-
from langchain_core.callbacks import (
|
|
6
|
-
CallbackManagerForToolRun,
|
|
7
|
-
)
|
|
8
|
-
from langchain_core.tools import BaseTool
|
|
9
|
-
from pydantic import BaseModel, Field
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class GithubcopilotChatToolInput(BaseModel):
|
|
13
|
-
"""Input schema for GithubcopilotChat tool.
|
|
14
|
-
|
|
15
|
-
This docstring is **not** part of what is sent to the model when performing tool
|
|
16
|
-
calling. The Field default values and descriptions **are** part of what is sent to
|
|
17
|
-
the model when performing tool calling.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
# TODO: Add input args and descriptions.
|
|
21
|
-
a: int = Field(..., description="first number to add")
|
|
22
|
-
b: int = Field(..., description="second number to add")
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class GithubcopilotChatTool(BaseTool): # type: ignore[override]
|
|
26
|
-
"""GithubcopilotChat tool.
|
|
27
|
-
|
|
28
|
-
Setup:
|
|
29
|
-
# TODO: Replace with relevant packages, env vars.
|
|
30
|
-
Install ``langchain-githubcopilot-chat`` and set environment variable ``GITHUBCOPILOTCHAT_API_KEY``.
|
|
31
|
-
|
|
32
|
-
.. code-block:: bash
|
|
33
|
-
|
|
34
|
-
pip install -U langchain-githubcopilot-chat
|
|
35
|
-
export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
|
|
36
|
-
|
|
37
|
-
Instantiation:
|
|
38
|
-
.. code-block:: python
|
|
39
|
-
|
|
40
|
-
tool = GithubcopilotChatTool(
|
|
41
|
-
# TODO: init params
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
Invocation with args:
|
|
45
|
-
.. code-block:: python
|
|
46
|
-
|
|
47
|
-
# TODO: invoke args
|
|
48
|
-
tool.invoke({...})
|
|
49
|
-
|
|
50
|
-
.. code-block:: python
|
|
51
|
-
|
|
52
|
-
# TODO: output of invocation
|
|
53
|
-
|
|
54
|
-
Invocation with ToolCall:
|
|
55
|
-
|
|
56
|
-
.. code-block:: python
|
|
57
|
-
|
|
58
|
-
# TODO: invoke args
|
|
59
|
-
tool.invoke({"args": {...}, "id": "1", "name": tool.name, "type": "tool_call"})
|
|
60
|
-
|
|
61
|
-
.. code-block:: python
|
|
62
|
-
|
|
63
|
-
# TODO: output of invocation
|
|
64
|
-
|
|
65
|
-
""" # noqa: E501
|
|
66
|
-
|
|
67
|
-
# TODO: Set tool name and description
|
|
68
|
-
name: str = "TODO: Tool name"
|
|
69
|
-
"""The name that is passed to the model when performing tool calling."""
|
|
70
|
-
description: str = "TODO: Tool description."
|
|
71
|
-
"""The description that is passed to the model when performing tool calling."""
|
|
72
|
-
args_schema: Type[BaseModel] = GithubcopilotChatToolInput
|
|
73
|
-
"""The schema that is passed to the model when performing tool calling."""
|
|
74
|
-
|
|
75
|
-
# TODO: Add any other init params for the tool.
|
|
76
|
-
# param1: Optional[str]
|
|
77
|
-
# """param1 determines foobar"""
|
|
78
|
-
|
|
79
|
-
# TODO: Replaced (a, b) with real tool arguments.
|
|
80
|
-
def _run(
|
|
81
|
-
self, a: int, b: int, *, run_manager: Optional[CallbackManagerForToolRun] = None
|
|
82
|
-
) -> str:
|
|
83
|
-
return str(a + b + 80)
|
|
84
|
-
|
|
85
|
-
# TODO: Implement if tool has native async functionality, otherwise delete.
|
|
86
|
-
|
|
87
|
-
# async def _arun(
|
|
88
|
-
# self,
|
|
89
|
-
# a: int,
|
|
90
|
-
# b: int,
|
|
91
|
-
# *,
|
|
92
|
-
# run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
|
93
|
-
# ) -> str:
|
|
94
|
-
# ...
|
|
@@ -1,439 +0,0 @@
|
|
|
1
|
-
"""GithubcopilotChat vector stores."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import uuid
|
|
6
|
-
from typing import (
|
|
7
|
-
Any,
|
|
8
|
-
Callable,
|
|
9
|
-
Iterator,
|
|
10
|
-
List,
|
|
11
|
-
Optional,
|
|
12
|
-
Sequence,
|
|
13
|
-
Tuple,
|
|
14
|
-
Type,
|
|
15
|
-
TypeVar,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
from langchain_core.documents import Document
|
|
19
|
-
from langchain_core.embeddings import Embeddings
|
|
20
|
-
from langchain_core.vectorstores import VectorStore
|
|
21
|
-
from langchain_core.vectorstores.utils import _cosine_similarity as cosine_similarity
|
|
22
|
-
|
|
23
|
-
VST = TypeVar("VST", bound=VectorStore)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class GithubcopilotChatVectorStore(VectorStore):
|
|
27
|
-
# TODO: Replace all TODOs in docstring.
|
|
28
|
-
"""GithubcopilotChat vector store integration.
|
|
29
|
-
|
|
30
|
-
# TODO: Replace with relevant packages, env vars.
|
|
31
|
-
Setup:
|
|
32
|
-
Install ``langchain-githubcopilot-chat`` and set environment variable ``GITHUBCOPILOTCHAT_API_KEY``.
|
|
33
|
-
|
|
34
|
-
.. code-block:: bash
|
|
35
|
-
|
|
36
|
-
pip install -U langchain-githubcopilot-chat
|
|
37
|
-
export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
|
|
38
|
-
|
|
39
|
-
# TODO: Populate with relevant params.
|
|
40
|
-
Key init args — indexing params:
|
|
41
|
-
collection_name: str
|
|
42
|
-
Name of the collection.
|
|
43
|
-
embedding_function: Embeddings
|
|
44
|
-
Embedding function to use.
|
|
45
|
-
|
|
46
|
-
# TODO: Populate with relevant params.
|
|
47
|
-
Key init args — client params:
|
|
48
|
-
client: Optional[Client]
|
|
49
|
-
Client to use.
|
|
50
|
-
connection_args: Optional[dict]
|
|
51
|
-
Connection arguments.
|
|
52
|
-
|
|
53
|
-
# TODO: Replace with relevant init params.
|
|
54
|
-
Instantiate:
|
|
55
|
-
.. code-block:: python
|
|
56
|
-
|
|
57
|
-
from langchain_githubcopilot_chat.vectorstores import GithubcopilotChatVectorStore
|
|
58
|
-
from langchain_openai import OpenAIEmbeddings
|
|
59
|
-
|
|
60
|
-
vector_store = GithubcopilotChatVectorStore(
|
|
61
|
-
collection_name="foo",
|
|
62
|
-
embedding_function=OpenAIEmbeddings(),
|
|
63
|
-
connection_args={"uri": "./foo.db"},
|
|
64
|
-
# other params...
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
# TODO: Populate with relevant variables.
|
|
68
|
-
Add Documents:
|
|
69
|
-
.. code-block:: python
|
|
70
|
-
|
|
71
|
-
from langchain_core.documents import Document
|
|
72
|
-
|
|
73
|
-
document_1 = Document(page_content="foo", metadata={"baz": "bar"})
|
|
74
|
-
document_2 = Document(page_content="thud", metadata={"bar": "baz"})
|
|
75
|
-
document_3 = Document(page_content="i will be deleted :(")
|
|
76
|
-
|
|
77
|
-
documents = [document_1, document_2, document_3]
|
|
78
|
-
ids = ["1", "2", "3"]
|
|
79
|
-
vector_store.add_documents(documents=documents, ids=ids)
|
|
80
|
-
|
|
81
|
-
# TODO: Populate with relevant variables.
|
|
82
|
-
Delete Documents:
|
|
83
|
-
.. code-block:: python
|
|
84
|
-
|
|
85
|
-
vector_store.delete(ids=["3"])
|
|
86
|
-
|
|
87
|
-
# TODO: Fill out with relevant variables and example output.
|
|
88
|
-
Search:
|
|
89
|
-
.. code-block:: python
|
|
90
|
-
|
|
91
|
-
results = vector_store.similarity_search(query="thud",k=1)
|
|
92
|
-
for doc in results:
|
|
93
|
-
print(f"* {doc.page_content} [{doc.metadata}]")
|
|
94
|
-
|
|
95
|
-
.. code-block:: python
|
|
96
|
-
|
|
97
|
-
# TODO: Example output
|
|
98
|
-
|
|
99
|
-
# TODO: Fill out with relevant variables and example output.
|
|
100
|
-
Search with filter:
|
|
101
|
-
.. code-block:: python
|
|
102
|
-
|
|
103
|
-
results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
|
|
104
|
-
for doc in results:
|
|
105
|
-
print(f"* {doc.page_content} [{doc.metadata}]")
|
|
106
|
-
|
|
107
|
-
.. code-block:: python
|
|
108
|
-
|
|
109
|
-
# TODO: Example output
|
|
110
|
-
|
|
111
|
-
# TODO: Fill out with relevant variables and example output.
|
|
112
|
-
Search with score:
|
|
113
|
-
.. code-block:: python
|
|
114
|
-
|
|
115
|
-
results = vector_store.similarity_search_with_score(query="qux",k=1)
|
|
116
|
-
for doc, score in results:
|
|
117
|
-
print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
|
|
118
|
-
|
|
119
|
-
.. code-block:: python
|
|
120
|
-
|
|
121
|
-
# TODO: Example output
|
|
122
|
-
|
|
123
|
-
# TODO: Fill out with relevant variables and example output.
|
|
124
|
-
Async:
|
|
125
|
-
.. code-block:: python
|
|
126
|
-
|
|
127
|
-
# add documents
|
|
128
|
-
# await vector_store.aadd_documents(documents=documents, ids=ids)
|
|
129
|
-
|
|
130
|
-
# delete documents
|
|
131
|
-
# await vector_store.adelete(ids=["3"])
|
|
132
|
-
|
|
133
|
-
# search
|
|
134
|
-
# results = vector_store.asimilarity_search(query="thud",k=1)
|
|
135
|
-
|
|
136
|
-
# search with score
|
|
137
|
-
results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
|
|
138
|
-
for doc,score in results:
|
|
139
|
-
print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
|
|
140
|
-
|
|
141
|
-
.. code-block:: python
|
|
142
|
-
|
|
143
|
-
# TODO: Example output
|
|
144
|
-
|
|
145
|
-
# TODO: Fill out with relevant variables and example output.
|
|
146
|
-
Use as Retriever:
|
|
147
|
-
.. code-block:: python
|
|
148
|
-
|
|
149
|
-
retriever = vector_store.as_retriever(
|
|
150
|
-
search_type="mmr",
|
|
151
|
-
search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
|
|
152
|
-
)
|
|
153
|
-
retriever.invoke("thud")
|
|
154
|
-
|
|
155
|
-
.. code-block:: python
|
|
156
|
-
|
|
157
|
-
# TODO: Example output
|
|
158
|
-
|
|
159
|
-
""" # noqa: E501
|
|
160
|
-
|
|
161
|
-
def __init__(self, embedding: Embeddings) -> None:
|
|
162
|
-
"""Initialize with the given embedding function.
|
|
163
|
-
|
|
164
|
-
Args:
|
|
165
|
-
embedding: embedding function to use.
|
|
166
|
-
"""
|
|
167
|
-
self._database: dict[str, dict[str, Any]] = {}
|
|
168
|
-
self.embedding = embedding
|
|
169
|
-
|
|
170
|
-
@classmethod
|
|
171
|
-
def from_texts(
|
|
172
|
-
cls: Type[GithubcopilotChatVectorStore],
|
|
173
|
-
texts: List[str],
|
|
174
|
-
embedding: Embeddings,
|
|
175
|
-
metadatas: Optional[List[dict]] = None,
|
|
176
|
-
**kwargs: Any,
|
|
177
|
-
) -> GithubcopilotChatVectorStore:
|
|
178
|
-
store = cls(
|
|
179
|
-
embedding=embedding,
|
|
180
|
-
)
|
|
181
|
-
store.add_texts(texts=texts, metadatas=metadatas, **kwargs)
|
|
182
|
-
return store
|
|
183
|
-
|
|
184
|
-
# optional: add custom async implementations
|
|
185
|
-
# @classmethod
|
|
186
|
-
# async def afrom_texts(
|
|
187
|
-
# cls: Type[VST],
|
|
188
|
-
# texts: List[str],
|
|
189
|
-
# embedding: Embeddings,
|
|
190
|
-
# metadatas: Optional[List[dict]] = None,
|
|
191
|
-
# **kwargs: Any,
|
|
192
|
-
# ) -> VST:
|
|
193
|
-
# return await asyncio.get_running_loop().run_in_executor(
|
|
194
|
-
# None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas
|
|
195
|
-
# )
|
|
196
|
-
|
|
197
|
-
@property
|
|
198
|
-
def embeddings(self) -> Embeddings:
|
|
199
|
-
return self.embedding
|
|
200
|
-
|
|
201
|
-
def add_documents(
|
|
202
|
-
self,
|
|
203
|
-
documents: List[Document],
|
|
204
|
-
ids: Optional[List[str]] = None,
|
|
205
|
-
**kwargs: Any,
|
|
206
|
-
) -> List[str]:
|
|
207
|
-
"""Add documents to the store."""
|
|
208
|
-
texts = [doc.page_content for doc in documents]
|
|
209
|
-
vectors = self.embedding.embed_documents(texts)
|
|
210
|
-
|
|
211
|
-
if ids and len(ids) != len(texts):
|
|
212
|
-
msg = (
|
|
213
|
-
f"ids must be the same length as texts. "
|
|
214
|
-
f"Got {len(ids)} ids and {len(texts)} texts."
|
|
215
|
-
)
|
|
216
|
-
raise ValueError(msg)
|
|
217
|
-
|
|
218
|
-
id_iterator: Iterator[Optional[str]] = (
|
|
219
|
-
iter(ids) if ids else iter(doc.id for doc in documents)
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
ids_ = []
|
|
223
|
-
|
|
224
|
-
for doc, vector in zip(documents, vectors):
|
|
225
|
-
doc_id = next(id_iterator)
|
|
226
|
-
doc_id_ = doc_id if doc_id else str(uuid.uuid4())
|
|
227
|
-
ids_.append(doc_id_)
|
|
228
|
-
self._database[doc_id_] = {
|
|
229
|
-
"id": doc_id_,
|
|
230
|
-
"vector": vector,
|
|
231
|
-
"text": doc.page_content,
|
|
232
|
-
"metadata": doc.metadata,
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
return ids_
|
|
236
|
-
|
|
237
|
-
# optional: add custom async implementations
|
|
238
|
-
# async def aadd_documents(
|
|
239
|
-
# self,
|
|
240
|
-
# documents: List[Document],
|
|
241
|
-
# ids: Optional[List[str]] = None,
|
|
242
|
-
# **kwargs: Any,
|
|
243
|
-
# ) -> List[str]:
|
|
244
|
-
# raise NotImplementedError
|
|
245
|
-
|
|
246
|
-
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
|
|
247
|
-
if ids:
|
|
248
|
-
for _id in ids:
|
|
249
|
-
self._database.pop(_id, None)
|
|
250
|
-
|
|
251
|
-
# optional: add custom async implementations
|
|
252
|
-
# async def adelete(
|
|
253
|
-
# self, ids: Optional[List[str]] = None, **kwargs: Any
|
|
254
|
-
# ) -> None:
|
|
255
|
-
# raise NotImplementedError
|
|
256
|
-
|
|
257
|
-
def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
|
|
258
|
-
"""Get documents by their ids.
|
|
259
|
-
|
|
260
|
-
Args:
|
|
261
|
-
ids: The ids of the documents to get.
|
|
262
|
-
|
|
263
|
-
Returns:
|
|
264
|
-
A list of Document objects.
|
|
265
|
-
"""
|
|
266
|
-
documents = []
|
|
267
|
-
|
|
268
|
-
for doc_id in ids:
|
|
269
|
-
doc = self._database.get(doc_id)
|
|
270
|
-
if doc:
|
|
271
|
-
documents.append(
|
|
272
|
-
Document(
|
|
273
|
-
id=doc["id"],
|
|
274
|
-
page_content=doc["text"],
|
|
275
|
-
metadata=doc["metadata"],
|
|
276
|
-
)
|
|
277
|
-
)
|
|
278
|
-
return documents
|
|
279
|
-
|
|
280
|
-
# optional: add custom async implementations
|
|
281
|
-
# async def aget_by_ids(self, ids: Sequence[str], /) -> list[Document]:
|
|
282
|
-
# raise NotImplementedError
|
|
283
|
-
|
|
284
|
-
# NOTE: the below helper method implements similarity search for in-memory
|
|
285
|
-
# storage. It is optional and not a part of the vector store interface.
|
|
286
|
-
def _similarity_search_with_score_by_vector(
|
|
287
|
-
self,
|
|
288
|
-
embedding: List[float],
|
|
289
|
-
k: int = 4,
|
|
290
|
-
filter: Optional[Callable[[Document], bool]] = None,
|
|
291
|
-
**kwargs: Any,
|
|
292
|
-
) -> List[tuple[Document, float, List[float]]]:
|
|
293
|
-
# get all docs with fixed order in list
|
|
294
|
-
docs = list(self._database.values())
|
|
295
|
-
|
|
296
|
-
if filter is not None:
|
|
297
|
-
docs = [
|
|
298
|
-
doc
|
|
299
|
-
for doc in docs
|
|
300
|
-
if filter(Document(page_content=doc["text"], metadata=doc["metadata"]))
|
|
301
|
-
]
|
|
302
|
-
|
|
303
|
-
if not docs:
|
|
304
|
-
return []
|
|
305
|
-
|
|
306
|
-
similarity = cosine_similarity([embedding], [doc["vector"] for doc in docs])[0]
|
|
307
|
-
|
|
308
|
-
# get the indices ordered by similarity score
|
|
309
|
-
top_k_idx = similarity.argsort()[::-1][:k]
|
|
310
|
-
|
|
311
|
-
return [
|
|
312
|
-
(
|
|
313
|
-
# Document
|
|
314
|
-
Document(
|
|
315
|
-
id=doc_dict["id"],
|
|
316
|
-
page_content=doc_dict["text"],
|
|
317
|
-
metadata=doc_dict["metadata"],
|
|
318
|
-
),
|
|
319
|
-
# Score
|
|
320
|
-
float(similarity[idx].item()),
|
|
321
|
-
# Embedding vector
|
|
322
|
-
doc_dict["vector"],
|
|
323
|
-
)
|
|
324
|
-
for idx in top_k_idx
|
|
325
|
-
# Assign using walrus operator to avoid multiple lookups
|
|
326
|
-
if (doc_dict := docs[idx])
|
|
327
|
-
]
|
|
328
|
-
|
|
329
|
-
def similarity_search(
|
|
330
|
-
self, query: str, k: int = 4, **kwargs: Any
|
|
331
|
-
) -> List[Document]:
|
|
332
|
-
embedding = self.embedding.embed_query(query)
|
|
333
|
-
return [
|
|
334
|
-
doc
|
|
335
|
-
for doc, _, _ in self._similarity_search_with_score_by_vector(
|
|
336
|
-
embedding=embedding, k=k, **kwargs
|
|
337
|
-
)
|
|
338
|
-
]
|
|
339
|
-
|
|
340
|
-
# optional: add custom async implementations
|
|
341
|
-
# async def asimilarity_search(
|
|
342
|
-
# self, query: str, k: int = 4, **kwargs: Any
|
|
343
|
-
# ) -> List[Document]:
|
|
344
|
-
# # This is a temporary workaround to make the similarity search
|
|
345
|
-
# # asynchronous. The proper solution is to make the similarity search
|
|
346
|
-
# # asynchronous in the vector store implementations.
|
|
347
|
-
# func = partial(self.similarity_search, query, k=k, **kwargs)
|
|
348
|
-
# return await asyncio.get_event_loop().run_in_executor(None, func)
|
|
349
|
-
|
|
350
|
-
def similarity_search_with_score(
|
|
351
|
-
self, query: str, k: int = 4, **kwargs: Any
|
|
352
|
-
) -> List[Tuple[Document, float]]:
|
|
353
|
-
embedding = self.embedding.embed_query(query)
|
|
354
|
-
return [
|
|
355
|
-
(doc, similarity)
|
|
356
|
-
for doc, similarity, _ in self._similarity_search_with_score_by_vector(
|
|
357
|
-
embedding=embedding, k=k, **kwargs
|
|
358
|
-
)
|
|
359
|
-
]
|
|
360
|
-
|
|
361
|
-
# optional: add custom async implementations
|
|
362
|
-
# async def asimilarity_search_with_score(
|
|
363
|
-
# self, *args: Any, **kwargs: Any
|
|
364
|
-
# ) -> List[Tuple[Document, float]]:
|
|
365
|
-
# # This is a temporary workaround to make the similarity search
|
|
366
|
-
# # asynchronous. The proper solution is to make the similarity search
|
|
367
|
-
# # asynchronous in the vector store implementations.
|
|
368
|
-
# func = partial(self.similarity_search_with_score, *args, **kwargs)
|
|
369
|
-
# return await asyncio.get_event_loop().run_in_executor(None, func)
|
|
370
|
-
|
|
371
|
-
### ADDITIONAL OPTIONAL SEARCH METHODS BELOW ###
|
|
372
|
-
|
|
373
|
-
# def similarity_search_by_vector(
|
|
374
|
-
# self, embedding: List[float], k: int = 4, **kwargs: Any
|
|
375
|
-
# ) -> List[Document]:
|
|
376
|
-
# raise NotImplementedError
|
|
377
|
-
|
|
378
|
-
# optional: add custom async implementations
|
|
379
|
-
# async def asimilarity_search_by_vector(
|
|
380
|
-
# self, embedding: List[float], k: int = 4, **kwargs: Any
|
|
381
|
-
# ) -> List[Document]:
|
|
382
|
-
# # This is a temporary workaround to make the similarity search
|
|
383
|
-
# # asynchronous. The proper solution is to make the similarity search
|
|
384
|
-
# # asynchronous in the vector store implementations.
|
|
385
|
-
# func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
|
|
386
|
-
# return await asyncio.get_event_loop().run_in_executor(None, func)
|
|
387
|
-
|
|
388
|
-
# def max_marginal_relevance_search(
|
|
389
|
-
# self,
|
|
390
|
-
# query: str,
|
|
391
|
-
# k: int = 4,
|
|
392
|
-
# fetch_k: int = 20,
|
|
393
|
-
# lambda_mult: float = 0.5,
|
|
394
|
-
# **kwargs: Any,
|
|
395
|
-
# ) -> List[Document]:
|
|
396
|
-
# raise NotImplementedError
|
|
397
|
-
|
|
398
|
-
# optional: add custom async implementations
|
|
399
|
-
# async def amax_marginal_relevance_search(
|
|
400
|
-
# self,
|
|
401
|
-
# query: str,
|
|
402
|
-
# k: int = 4,
|
|
403
|
-
# fetch_k: int = 20,
|
|
404
|
-
# lambda_mult: float = 0.5,
|
|
405
|
-
# **kwargs: Any,
|
|
406
|
-
# ) -> List[Document]:
|
|
407
|
-
# # This is a temporary workaround to make the similarity search
|
|
408
|
-
# # asynchronous. The proper solution is to make the similarity search
|
|
409
|
-
# # asynchronous in the vector store implementations.
|
|
410
|
-
# func = partial(
|
|
411
|
-
# self.max_marginal_relevance_search,
|
|
412
|
-
# query,
|
|
413
|
-
# k=k,
|
|
414
|
-
# fetch_k=fetch_k,
|
|
415
|
-
# lambda_mult=lambda_mult,
|
|
416
|
-
# **kwargs,
|
|
417
|
-
# )
|
|
418
|
-
# return await asyncio.get_event_loop().run_in_executor(None, func)
|
|
419
|
-
|
|
420
|
-
# def max_marginal_relevance_search_by_vector(
|
|
421
|
-
# self,
|
|
422
|
-
# embedding: List[float],
|
|
423
|
-
# k: int = 4,
|
|
424
|
-
# fetch_k: int = 20,
|
|
425
|
-
# lambda_mult: float = 0.5,
|
|
426
|
-
# **kwargs: Any,
|
|
427
|
-
# ) -> List[Document]:
|
|
428
|
-
# raise NotImplementedError
|
|
429
|
-
|
|
430
|
-
# optional: add custom async implementations
|
|
431
|
-
# async def amax_marginal_relevance_search_by_vector(
|
|
432
|
-
# self,
|
|
433
|
-
# embedding: List[float],
|
|
434
|
-
# k: int = 4,
|
|
435
|
-
# fetch_k: int = 20,
|
|
436
|
-
# lambda_mult: float = 0.5,
|
|
437
|
-
# **kwargs: Any,
|
|
438
|
-
# ) -> List[Document]:
|
|
439
|
-
# raise NotImplementedError
|
|
File without changes
|
|
File without changes
|
|
File without changes
|