langchain-githubcopilot-chat 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langchain-githubcopilot-chat
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: An integration package connecting GithubcopilotChat and LangChain
5
5
  Home-page: https://github.com/langchain-ai/langchain
6
6
  License: MIT
@@ -1,15 +1,11 @@
1
1
  from importlib import metadata
2
2
 
3
+ from langchain_githubcopilot_chat.auth import get_copilot_token
3
4
  from langchain_githubcopilot_chat.chat_models import (
4
5
  ChatGithubCopilot,
5
6
  ChatGithubcopilotChat,
6
7
  )
7
- from langchain_githubcopilot_chat.document_loaders import GithubcopilotChatLoader
8
8
  from langchain_githubcopilot_chat.embeddings import GithubcopilotChatEmbeddings
9
- from langchain_githubcopilot_chat.retrievers import GithubcopilotChatRetriever
10
- from langchain_githubcopilot_chat.toolkits import GithubcopilotChatToolkit
11
- from langchain_githubcopilot_chat.tools import GithubcopilotChatTool
12
- from langchain_githubcopilot_chat.vectorstores import GithubcopilotChatVectorStore
13
9
 
14
10
  try:
15
11
  __version__ = metadata.version(__package__)
@@ -18,14 +14,15 @@ except metadata.PackageNotFoundError:
18
14
  __version__ = ""
19
15
  del metadata # optional, avoids polluting the results of dir(__package__)
20
16
 
17
+ get_available_models = ChatGithubCopilot.get_available_models
18
+ get_vscode_token = get_copilot_token
19
+
21
20
  __all__ = [
22
21
  "ChatGithubCopilot",
23
22
  "ChatGithubcopilotChat", # backwards-compatible alias
24
- "GithubcopilotChatVectorStore",
25
23
  "GithubcopilotChatEmbeddings",
26
- "GithubcopilotChatLoader",
27
- "GithubcopilotChatRetriever",
28
- "GithubcopilotChatToolkit",
29
- "GithubcopilotChatTool",
24
+ "get_copilot_token",
25
+ "get_vscode_token",
26
+ "get_available_models",
30
27
  "__version__",
31
28
  ]
@@ -0,0 +1,85 @@
1
+ import time
2
+ from typing import Optional
3
+
4
+ import httpx
5
+
6
+ CLIENT_ID = "Iv1.b507a08c87ecfe98"
7
+
8
+
9
+ def get_copilot_token(client_id: str = CLIENT_ID) -> Optional[str]:
10
+ """
11
+ Authenticate via GitHub Device Flow to get a Copilot Token.
12
+ This function will block and wait for the user to complete the
13
+ authorization in their browser.
14
+
15
+ Args:
16
+ client_id: The GitHub OAuth App Client ID to use. Defaults
17
+ to the VS Code Copilot Chat client ID.
18
+
19
+ Returns:
20
+ The fetched Copilot Token string, or None if authentication failed.
21
+ """
22
+ print("1. Requesting device code from GitHub...") # noqa: T201 # noqa: T201
23
+ with httpx.Client() as client:
24
+ res = client.post(
25
+ "https://github.com/login/device/code",
26
+ headers={"Accept": "application/json"},
27
+ data={"client_id": client_id, "scope": "read:user"},
28
+ )
29
+ res.raise_for_status()
30
+ data = res.json()
31
+
32
+ device_code = data.get("device_code")
33
+ user_code = data.get("user_code")
34
+ verification_uri = data.get("verification_uri")
35
+ interval = data.get("interval", 5)
36
+
37
+ print("\n==========================================") # noqa: T201
38
+ print(f"Please open your browser to: {verification_uri}") # noqa: T201
39
+ print(f"And enter the authorization code: {user_code}") # noqa: T201
40
+ print("==========================================\n") # noqa: T201
41
+ print(f"Waiting for authorization (checking every {interval} seconds)...") # noqa: T201
42
+
43
+ access_token = None
44
+ with httpx.Client() as client:
45
+ while True:
46
+ token_res = client.post(
47
+ "https://github.com/login/oauth/access_token",
48
+ headers={"Accept": "application/json"},
49
+ data={
50
+ "client_id": client_id,
51
+ "device_code": device_code,
52
+ "grant_type": "urn:ietf:params:oauth:grant-type:device_code",
53
+ },
54
+ ).json()
55
+
56
+ if "access_token" in token_res:
57
+ access_token = token_res["access_token"]
58
+ print( # noqa: T201 # noqa: T201
59
+ "\n✅ Authorization successful! Exchanging for Copilot Token..."
60
+ )
61
+ break
62
+ elif token_res.get("error") == "authorization_pending":
63
+ time.sleep(interval)
64
+ else:
65
+ print(f"\n❌ Authorization failed: {token_res}") # noqa: T201 # noqa: T201
66
+ return None
67
+
68
+ # Exchange the standard access token for a Copilot internal token
69
+ copilot_res = client.get(
70
+ "https://api.github.com/copilot_internal/v2/token",
71
+ headers={
72
+ "Authorization": f"token {access_token}",
73
+ "Accept": "application/json",
74
+ "Editor-Version": "vscode/1.104.1",
75
+ "Editor-Plugin-Version": "copilot-chat/0.26.7",
76
+ },
77
+ )
78
+
79
+ if copilot_res.status_code == 200:
80
+ copilot_token = copilot_res.json().get("token")
81
+ print("🎉 Successfully acquired Copilot Token!") # noqa: T201 # noqa: T201
82
+ return copilot_token
83
+ else:
84
+ print(f"❌ Failed to acquire Copilot Token: {copilot_res.text}") # noqa: T201 # noqa: T201
85
+ return None
@@ -56,10 +56,23 @@ _ROLE_MAP = {
56
56
  "tool": "tool",
57
57
  }
58
58
 
59
- _GITHUB_MODELS_BASE_URL = "https://models.github.ai"
60
- _INFERENCE_PATH = "/inference/chat/completions"
61
- _ORG_INFERENCE_PATH = "/orgs/{org}/inference/chat/completions"
62
- _API_VERSION = "2026-03-10"
59
+ _GITHUB_COPILOT_BASE_URL = "https://api.githubcopilot.com"
60
+ _INFERENCE_PATH = "/chat/completions"
61
+
62
+ COPILOT_EDITOR_VERSION = "vscode/1.104.1"
63
+ COPILOT_PLUGIN_VERSION = "copilot-chat/0.26.7"
64
+ COPILOT_INTEGRATION_ID = "vscode-chat"
65
+ COPILOT_USER_AGENT = "GitHubCopilotChat/0.26.7"
66
+
67
+ COPILOT_DEFAULT_HEADERS = {
68
+ "Copilot-Integration-Id": COPILOT_INTEGRATION_ID,
69
+ "User-Agent": COPILOT_USER_AGENT,
70
+ "Editor-Version": COPILOT_EDITOR_VERSION,
71
+ "Editor-Plugin-Version": COPILOT_PLUGIN_VERSION,
72
+ "editor-version": COPILOT_EDITOR_VERSION,
73
+ "editor-plugin-version": COPILOT_PLUGIN_VERSION,
74
+ "copilot-vision-request": "true",
75
+ }
63
76
 
64
77
 
65
78
  def _message_to_dict(message: BaseMessage) -> Dict[str, Any]:
@@ -403,19 +416,8 @@ class ChatGithubCopilot(BaseChatModel):
403
416
  is used.
404
417
  """
405
418
 
406
- base_url: str = _GITHUB_MODELS_BASE_URL
407
- """Base URL for the GitHub Models REST API."""
408
-
409
- org: Optional[str] = None
410
- """Organisation login for attributed inference requests.
411
-
412
- When set, requests are sent to
413
- ``/orgs/{org}/inference/chat/completions`` instead of
414
- ``/inference/chat/completions``.
415
- """
416
-
417
- api_version: str = _API_VERSION
418
- """GitHub Models API version sent as the ``X-GitHub-Api-Version`` header."""
419
+ base_url: str = _GITHUB_COPILOT_BASE_URL
420
+ """Base URL for the GitHub Copilot API."""
419
421
 
420
422
  temperature: Optional[float] = None
421
423
  """Sampling temperature in ``[0, 1]``."""
@@ -491,19 +493,43 @@ class ChatGithubCopilot(BaseChatModel):
491
493
  @property
492
494
  def _inference_url(self) -> str:
493
495
  """Return the full chat-completions endpoint URL."""
494
- if self.org:
495
- path = _ORG_INFERENCE_PATH.format(org=self.org)
496
- else:
497
- path = _INFERENCE_PATH
498
- return self.base_url.rstrip("/") + path
496
+ return self.base_url.rstrip("/") + _INFERENCE_PATH
499
497
 
500
498
  def _build_headers(self) -> Dict[str, str]:
501
- return {
499
+ headers = {
502
500
  "Authorization": f"Bearer {self._token}",
503
- "Accept": "application/vnd.github+json",
501
+ "Accept": "application/json",
502
+ "Content-Type": "application/json",
503
+ }
504
+ headers.update(COPILOT_DEFAULT_HEADERS)
505
+ return headers
506
+
507
+ @classmethod
508
+ def get_available_models(
509
+ cls, github_token: Optional[str] = None
510
+ ) -> List[Dict[str, Any]]:
511
+ """Get the list of available models from the GitHub Copilot API."""
512
+ token = github_token or os.environ.get("GITHUB_TOKEN")
513
+ if not token:
514
+ raise ValueError(
515
+ "A GitHub token is required. Set the GITHUB_TOKEN environment "
516
+ "variable or pass ``github_token``."
517
+ )
518
+
519
+ headers = {
520
+ "Authorization": f"Bearer {token}",
521
+ "Accept": "application/json",
504
522
  "Content-Type": "application/json",
505
- "X-GitHub-Api-Version": self.api_version,
506
523
  }
524
+ headers.update(COPILOT_DEFAULT_HEADERS)
525
+
526
+ url = f"{_GITHUB_COPILOT_BASE_URL}/models"
527
+
528
+ with httpx.Client() as client:
529
+ response = client.get(url, headers=headers)
530
+ response.raise_for_status()
531
+ data = response.json()
532
+ return data.get("data", [])
507
533
 
508
534
  def _build_payload(
509
535
  self,
@@ -712,7 +738,7 @@ class ChatGithubCopilot(BaseChatModel):
712
738
 
713
739
  @property
714
740
  def _llm_type(self) -> str:
715
- return "chat-github-copilot"
741
+ return "github-copilot"
716
742
 
717
743
  @property
718
744
  def _identifying_params(self) -> Dict[str, Any]:
@@ -9,10 +9,23 @@ import httpx
9
9
  from langchain_core.embeddings import Embeddings
10
10
  from pydantic import BaseModel, Field, SecretStr, model_validator
11
11
 
12
- _GITHUB_MODELS_BASE_URL = "https://models.github.ai"
13
- _EMBEDDINGS_PATH = "/inference/embeddings"
14
- _ORG_EMBEDDINGS_PATH = "/orgs/{org}/inference/embeddings"
15
- _API_VERSION = "2026-03-10"
12
+ _GITHUB_COPILOT_BASE_URL = "https://api.githubcopilot.com"
13
+ _EMBEDDINGS_PATH = "/embeddings"
14
+
15
+ COPILOT_EDITOR_VERSION = "vscode/1.104.1"
16
+ COPILOT_PLUGIN_VERSION = "copilot-chat/0.26.7"
17
+ COPILOT_INTEGRATION_ID = "vscode-chat"
18
+ COPILOT_USER_AGENT = "GitHubCopilotChat/0.26.7"
19
+
20
+ COPILOT_DEFAULT_HEADERS = {
21
+ "Copilot-Integration-Id": COPILOT_INTEGRATION_ID,
22
+ "User-Agent": COPILOT_USER_AGENT,
23
+ "Editor-Version": COPILOT_EDITOR_VERSION,
24
+ "Editor-Plugin-Version": COPILOT_PLUGIN_VERSION,
25
+ "editor-version": COPILOT_EDITOR_VERSION,
26
+ "editor-plugin-version": COPILOT_PLUGIN_VERSION,
27
+ "copilot-vision-request": "true",
28
+ }
16
29
 
17
30
 
18
31
  class GithubcopilotChatEmbeddings(BaseModel, Embeddings):
@@ -109,18 +122,8 @@ class GithubcopilotChatEmbeddings(BaseModel, Embeddings):
109
122
  is used.
110
123
  """
111
124
 
112
- base_url: str = _GITHUB_MODELS_BASE_URL
113
- """Base URL for the GitHub Models REST API."""
114
-
115
- org: Optional[str] = None
116
- """Organisation login for attributed inference requests.
117
-
118
- When set, requests are sent to
119
- ``/orgs/{org}/inference/embeddings`` instead of ``/inference/embeddings``.
120
- """
121
-
122
- api_version: str = _API_VERSION
123
- """GitHub Models API version sent as the ``X-GitHub-Api-Version`` header."""
125
+ base_url: str = _GITHUB_COPILOT_BASE_URL
126
+ """Base URL for the GitHub Copilot API."""
124
127
 
125
128
  dimensions: Optional[int] = None
126
129
  """Number of output embedding dimensions.
@@ -173,19 +176,16 @@ class GithubcopilotChatEmbeddings(BaseModel, Embeddings):
173
176
  @property
174
177
  def _embeddings_url(self) -> str:
175
178
  """Return the full embeddings endpoint URL."""
176
- if self.org:
177
- path = _ORG_EMBEDDINGS_PATH.format(org=self.org)
178
- else:
179
- path = _EMBEDDINGS_PATH
180
- return self.base_url.rstrip("/") + path
179
+ return self.base_url.rstrip("/") + _EMBEDDINGS_PATH
181
180
 
182
181
  def _build_headers(self) -> Dict[str, str]:
183
- return {
182
+ headers = {
184
183
  "Authorization": f"Bearer {self._token}",
185
- "Accept": "application/vnd.github+json",
184
+ "Accept": "application/json",
186
185
  "Content-Type": "application/json",
187
- "X-GitHub-Api-Version": self.api_version,
188
186
  }
187
+ headers.update(COPILOT_DEFAULT_HEADERS)
188
+ return headers
189
189
 
190
190
  def _build_payload(self, input: Union[str, List[str]]) -> Dict[str, Any]:
191
191
  """Assemble the JSON body for the embeddings API."""
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "langchain-githubcopilot-chat"
7
- version = "0.1.2"
7
+ version = "0.2.0"
8
8
  description = "An integration package connecting GithubcopilotChat and LangChain"
9
9
  authors = ["YIhan Wu <iumm@ibat.ac.cn>"]
10
10
  readme = "README.md"
@@ -1,73 +0,0 @@
1
- """GithubcopilotChat document loader."""
2
-
3
- from typing import Iterator
4
-
5
- from langchain_core.document_loaders.base import BaseLoader
6
- from langchain_core.documents import Document
7
-
8
-
9
- class GithubcopilotChatLoader(BaseLoader):
10
- # TODO: Replace all TODOs in docstring. See example docstring:
11
- # https://github.com/langchain-ai/langchain/blob/869523ad728e6b76d77f170cce13925b4ebc3c1e/libs/community/langchain_community/document_loaders/recursive_url_loader.py#L54
12
- """
13
- GithubcopilotChat document loader integration
14
-
15
- # TODO: Replace with relevant packages, env vars.
16
- Setup:
17
- Install ``langchain-githubcopilot-chat`` and set environment variable ``GITHUBCOPILOTCHAT_API_KEY``.
18
-
19
- .. code-block:: bash
20
-
21
- pip install -U langchain-githubcopilot-chat
22
- export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
23
-
24
- # TODO: Replace with relevant init params.
25
- Instantiate:
26
- .. code-block:: python
27
-
28
- from langchain_community.document_loaders import GithubcopilotChatLoader
29
-
30
- loader = GithubcopilotChatLoader(
31
- # required params = ...
32
- # other params = ...
33
- )
34
-
35
- Lazy load:
36
- .. code-block:: python
37
-
38
- docs = []
39
- docs_lazy = loader.lazy_load()
40
-
41
- # async variant:
42
- # docs_lazy = await loader.alazy_load()
43
-
44
- for doc in docs_lazy:
45
- docs.append(doc)
46
- print(docs[0].page_content[:100])
47
- print(docs[0].metadata)
48
-
49
- .. code-block:: python
50
-
51
- TODO: Example output
52
-
53
- # TODO: Delete if async load is not implemented
54
- Async load:
55
- .. code-block:: python
56
-
57
- docs = await loader.aload()
58
- print(docs[0].page_content[:100])
59
- print(docs[0].metadata)
60
-
61
- .. code-block:: python
62
-
63
- TODO: Example output
64
-
65
- """ # noqa: E501
66
-
67
- # TODO: This method must be implemented to load documents.
68
- # Do not implement load(), a default implementation is already available.
69
- def lazy_load(self) -> Iterator[Document]:
70
- raise NotImplementedError()
71
-
72
- # TODO: Implement if you would like to change default BaseLoader implementation
73
- # async def alazy_load(self) -> AsyncIterator[Document]:
@@ -1,107 +0,0 @@
1
- """GithubcopilotChat retrievers."""
2
-
3
- from typing import Any, List
4
-
5
- from langchain_core.callbacks import CallbackManagerForRetrieverRun
6
- from langchain_core.documents import Document
7
- from langchain_core.retrievers import BaseRetriever
8
-
9
-
10
- class GithubcopilotChatRetriever(BaseRetriever):
11
- # TODO: Replace all TODOs in docstring. See example docstring:
12
- # https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/retrievers/tavily_search_api.py#L17
13
- """GithubcopilotChat retriever.
14
-
15
- # TODO: Replace with relevant packages, env vars, etc.
16
- Setup:
17
- Install ``langchain-githubcopilot-chat`` and set environment variable
18
- ``GITHUBCOPILOTCHAT_API_KEY``.
19
-
20
- .. code-block:: bash
21
-
22
- pip install -U langchain-githubcopilot-chat
23
- export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
24
-
25
- # TODO: Populate with relevant params.
26
- Key init args:
27
- arg 1: type
28
- description
29
- arg 2: type
30
- description
31
-
32
- # TODO: Replace with relevant init params.
33
- Instantiate:
34
- .. code-block:: python
35
-
36
- from langchain-githubcopilot-chat import GithubcopilotChatRetriever
37
-
38
- retriever = GithubcopilotChatRetriever(
39
- # ...
40
- )
41
-
42
- Usage:
43
- .. code-block:: python
44
-
45
- query = "..."
46
-
47
- retriever.invoke(query)
48
-
49
- .. code-block:: none
50
-
51
- # TODO: Example output.
52
-
53
- Use within a chain:
54
- .. code-block:: python
55
-
56
- from langchain_core.output_parsers import StrOutputParser
57
- from langchain_core.prompts import ChatPromptTemplate
58
- from langchain_core.runnables import RunnablePassthrough
59
- from langchain_openai import ChatOpenAI
60
-
61
- prompt = ChatPromptTemplate.from_template(
62
- \"\"\"Answer the question based only on the context provided.
63
-
64
- Context: {context}
65
-
66
- Question: {question}\"\"\"
67
- )
68
-
69
- llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
70
-
71
- def format_docs(docs):
72
- return "\\n\\n".join(doc.page_content for doc in docs)
73
-
74
- chain = (
75
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
76
- | prompt
77
- | llm
78
- | StrOutputParser()
79
- )
80
-
81
- chain.invoke("...")
82
-
83
- .. code-block:: none
84
-
85
- # TODO: Example output.
86
-
87
- """
88
-
89
- k: int = 3
90
-
91
- # TODO: This method must be implemented to retrieve documents.
92
- def _get_relevant_documents(
93
- self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any
94
- ) -> List[Document]:
95
- k = kwargs.get("k", self.k)
96
- return [
97
- Document(page_content=f"Result {i} for query: {query}") for i in range(k)
98
- ]
99
-
100
- # optional: add custom async implementations here
101
- # async def _aget_relevant_documents(
102
- # self,
103
- # query: str,
104
- # *,
105
- # run_manager: AsyncCallbackManagerForRetrieverRun,
106
- # **kwargs: Any,
107
- # ) -> List[Document]: ...
@@ -1,72 +0,0 @@
1
- """GithubcopilotChat toolkits."""
2
-
3
- from typing import List
4
-
5
- from langchain_core.tools import BaseTool, BaseToolkit
6
-
7
-
8
- class GithubcopilotChatToolkit(BaseToolkit):
9
- # TODO: Replace all TODOs in docstring. See example docstring:
10
- # https://github.com/langchain-ai/langchain/blob/c123cb2b304f52ab65db4714eeec46af69a861ec/libs/community/langchain_community/agent_toolkits/sql/toolkit.py#L19
11
- """GithubcopilotChat toolkit.
12
-
13
- # TODO: Replace with relevant packages, env vars, etc.
14
- Setup:
15
- Install ``langchain-githubcopilot-chat`` and set environment variable ``GITHUBCOPILOTCHAT_API_KEY``.
16
-
17
- .. code-block:: bash
18
-
19
- pip install -U langchain-githubcopilot-chat
20
- export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
21
-
22
- # TODO: Populate with relevant params.
23
- Key init args:
24
- arg 1: type
25
- description
26
- arg 2: type
27
- description
28
-
29
- # TODO: Replace with relevant init params.
30
- Instantiate:
31
- .. code-block:: python
32
-
33
- from langchain-githubcopilot-chat import GithubcopilotChatToolkit
34
-
35
- toolkit = GithubcopilotChatToolkit(
36
- # ...
37
- )
38
-
39
- Tools:
40
- .. code-block:: python
41
-
42
- toolkit.get_tools()
43
-
44
- .. code-block:: none
45
-
46
- # TODO: Example output.
47
-
48
- Use within an agent:
49
- .. code-block:: python
50
-
51
- from langgraph.prebuilt import create_react_agent
52
-
53
- agent_executor = create_react_agent(llm, tools)
54
-
55
- example_query = "..."
56
-
57
- events = agent_executor.stream(
58
- {"messages": [("user", example_query)]},
59
- stream_mode="values",
60
- )
61
- for event in events:
62
- event["messages"][-1].pretty_print()
63
-
64
- .. code-block:: none
65
-
66
- # TODO: Example output.
67
-
68
- """ # noqa: E501
69
-
70
- # TODO: This method must be implemented to list tools.
71
- def get_tools(self) -> List[BaseTool]:
72
- raise NotImplementedError()
@@ -1,94 +0,0 @@
1
- """GithubcopilotChat tools."""
2
-
3
- from typing import Optional, Type
4
-
5
- from langchain_core.callbacks import (
6
- CallbackManagerForToolRun,
7
- )
8
- from langchain_core.tools import BaseTool
9
- from pydantic import BaseModel, Field
10
-
11
-
12
- class GithubcopilotChatToolInput(BaseModel):
13
- """Input schema for GithubcopilotChat tool.
14
-
15
- This docstring is **not** part of what is sent to the model when performing tool
16
- calling. The Field default values and descriptions **are** part of what is sent to
17
- the model when performing tool calling.
18
- """
19
-
20
- # TODO: Add input args and descriptions.
21
- a: int = Field(..., description="first number to add")
22
- b: int = Field(..., description="second number to add")
23
-
24
-
25
- class GithubcopilotChatTool(BaseTool): # type: ignore[override]
26
- """GithubcopilotChat tool.
27
-
28
- Setup:
29
- # TODO: Replace with relevant packages, env vars.
30
- Install ``langchain-githubcopilot-chat`` and set environment variable ``GITHUBCOPILOTCHAT_API_KEY``.
31
-
32
- .. code-block:: bash
33
-
34
- pip install -U langchain-githubcopilot-chat
35
- export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
36
-
37
- Instantiation:
38
- .. code-block:: python
39
-
40
- tool = GithubcopilotChatTool(
41
- # TODO: init params
42
- )
43
-
44
- Invocation with args:
45
- .. code-block:: python
46
-
47
- # TODO: invoke args
48
- tool.invoke({...})
49
-
50
- .. code-block:: python
51
-
52
- # TODO: output of invocation
53
-
54
- Invocation with ToolCall:
55
-
56
- .. code-block:: python
57
-
58
- # TODO: invoke args
59
- tool.invoke({"args": {...}, "id": "1", "name": tool.name, "type": "tool_call"})
60
-
61
- .. code-block:: python
62
-
63
- # TODO: output of invocation
64
-
65
- """ # noqa: E501
66
-
67
- # TODO: Set tool name and description
68
- name: str = "TODO: Tool name"
69
- """The name that is passed to the model when performing tool calling."""
70
- description: str = "TODO: Tool description."
71
- """The description that is passed to the model when performing tool calling."""
72
- args_schema: Type[BaseModel] = GithubcopilotChatToolInput
73
- """The schema that is passed to the model when performing tool calling."""
74
-
75
- # TODO: Add any other init params for the tool.
76
- # param1: Optional[str]
77
- # """param1 determines foobar"""
78
-
79
- # TODO: Replaced (a, b) with real tool arguments.
80
- def _run(
81
- self, a: int, b: int, *, run_manager: Optional[CallbackManagerForToolRun] = None
82
- ) -> str:
83
- return str(a + b + 80)
84
-
85
- # TODO: Implement if tool has native async functionality, otherwise delete.
86
-
87
- # async def _arun(
88
- # self,
89
- # a: int,
90
- # b: int,
91
- # *,
92
- # run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
93
- # ) -> str:
94
- # ...
@@ -1,439 +0,0 @@
1
- """GithubcopilotChat vector stores."""
2
-
3
- from __future__ import annotations
4
-
5
- import uuid
6
- from typing import (
7
- Any,
8
- Callable,
9
- Iterator,
10
- List,
11
- Optional,
12
- Sequence,
13
- Tuple,
14
- Type,
15
- TypeVar,
16
- )
17
-
18
- from langchain_core.documents import Document
19
- from langchain_core.embeddings import Embeddings
20
- from langchain_core.vectorstores import VectorStore
21
- from langchain_core.vectorstores.utils import _cosine_similarity as cosine_similarity
22
-
23
- VST = TypeVar("VST", bound=VectorStore)
24
-
25
-
26
- class GithubcopilotChatVectorStore(VectorStore):
27
- # TODO: Replace all TODOs in docstring.
28
- """GithubcopilotChat vector store integration.
29
-
30
- # TODO: Replace with relevant packages, env vars.
31
- Setup:
32
- Install ``langchain-githubcopilot-chat`` and set environment variable ``GITHUBCOPILOTCHAT_API_KEY``.
33
-
34
- .. code-block:: bash
35
-
36
- pip install -U langchain-githubcopilot-chat
37
- export GITHUBCOPILOTCHAT_API_KEY="your-api-key"
38
-
39
- # TODO: Populate with relevant params.
40
- Key init args — indexing params:
41
- collection_name: str
42
- Name of the collection.
43
- embedding_function: Embeddings
44
- Embedding function to use.
45
-
46
- # TODO: Populate with relevant params.
47
- Key init args — client params:
48
- client: Optional[Client]
49
- Client to use.
50
- connection_args: Optional[dict]
51
- Connection arguments.
52
-
53
- # TODO: Replace with relevant init params.
54
- Instantiate:
55
- .. code-block:: python
56
-
57
- from langchain_githubcopilot_chat.vectorstores import GithubcopilotChatVectorStore
58
- from langchain_openai import OpenAIEmbeddings
59
-
60
- vector_store = GithubcopilotChatVectorStore(
61
- collection_name="foo",
62
- embedding_function=OpenAIEmbeddings(),
63
- connection_args={"uri": "./foo.db"},
64
- # other params...
65
- )
66
-
67
- # TODO: Populate with relevant variables.
68
- Add Documents:
69
- .. code-block:: python
70
-
71
- from langchain_core.documents import Document
72
-
73
- document_1 = Document(page_content="foo", metadata={"baz": "bar"})
74
- document_2 = Document(page_content="thud", metadata={"bar": "baz"})
75
- document_3 = Document(page_content="i will be deleted :(")
76
-
77
- documents = [document_1, document_2, document_3]
78
- ids = ["1", "2", "3"]
79
- vector_store.add_documents(documents=documents, ids=ids)
80
-
81
- # TODO: Populate with relevant variables.
82
- Delete Documents:
83
- .. code-block:: python
84
-
85
- vector_store.delete(ids=["3"])
86
-
87
- # TODO: Fill out with relevant variables and example output.
88
- Search:
89
- .. code-block:: python
90
-
91
- results = vector_store.similarity_search(query="thud",k=1)
92
- for doc in results:
93
- print(f"* {doc.page_content} [{doc.metadata}]")
94
-
95
- .. code-block:: python
96
-
97
- # TODO: Example output
98
-
99
- # TODO: Fill out with relevant variables and example output.
100
- Search with filter:
101
- .. code-block:: python
102
-
103
- results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
104
- for doc in results:
105
- print(f"* {doc.page_content} [{doc.metadata}]")
106
-
107
- .. code-block:: python
108
-
109
- # TODO: Example output
110
-
111
- # TODO: Fill out with relevant variables and example output.
112
- Search with score:
113
- .. code-block:: python
114
-
115
- results = vector_store.similarity_search_with_score(query="qux",k=1)
116
- for doc, score in results:
117
- print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
118
-
119
- .. code-block:: python
120
-
121
- # TODO: Example output
122
-
123
- # TODO: Fill out with relevant variables and example output.
124
- Async:
125
- .. code-block:: python
126
-
127
- # add documents
128
- # await vector_store.aadd_documents(documents=documents, ids=ids)
129
-
130
- # delete documents
131
- # await vector_store.adelete(ids=["3"])
132
-
133
- # search
134
- # results = vector_store.asimilarity_search(query="thud",k=1)
135
-
136
- # search with score
137
- results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
138
- for doc,score in results:
139
- print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
140
-
141
- .. code-block:: python
142
-
143
- # TODO: Example output
144
-
145
- # TODO: Fill out with relevant variables and example output.
146
- Use as Retriever:
147
- .. code-block:: python
148
-
149
- retriever = vector_store.as_retriever(
150
- search_type="mmr",
151
- search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
152
- )
153
- retriever.invoke("thud")
154
-
155
- .. code-block:: python
156
-
157
- # TODO: Example output
158
-
159
- """ # noqa: E501
160
-
161
- def __init__(self, embedding: Embeddings) -> None:
162
- """Initialize with the given embedding function.
163
-
164
- Args:
165
- embedding: embedding function to use.
166
- """
167
- self._database: dict[str, dict[str, Any]] = {}
168
- self.embedding = embedding
169
-
170
- @classmethod
171
- def from_texts(
172
- cls: Type[GithubcopilotChatVectorStore],
173
- texts: List[str],
174
- embedding: Embeddings,
175
- metadatas: Optional[List[dict]] = None,
176
- **kwargs: Any,
177
- ) -> GithubcopilotChatVectorStore:
178
- store = cls(
179
- embedding=embedding,
180
- )
181
- store.add_texts(texts=texts, metadatas=metadatas, **kwargs)
182
- return store
183
-
184
- # optional: add custom async implementations
185
- # @classmethod
186
- # async def afrom_texts(
187
- # cls: Type[VST],
188
- # texts: List[str],
189
- # embedding: Embeddings,
190
- # metadatas: Optional[List[dict]] = None,
191
- # **kwargs: Any,
192
- # ) -> VST:
193
- # return await asyncio.get_running_loop().run_in_executor(
194
- # None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas
195
- # )
196
-
197
- @property
198
- def embeddings(self) -> Embeddings:
199
- return self.embedding
200
-
201
- def add_documents(
202
- self,
203
- documents: List[Document],
204
- ids: Optional[List[str]] = None,
205
- **kwargs: Any,
206
- ) -> List[str]:
207
- """Add documents to the store."""
208
- texts = [doc.page_content for doc in documents]
209
- vectors = self.embedding.embed_documents(texts)
210
-
211
- if ids and len(ids) != len(texts):
212
- msg = (
213
- f"ids must be the same length as texts. "
214
- f"Got {len(ids)} ids and {len(texts)} texts."
215
- )
216
- raise ValueError(msg)
217
-
218
- id_iterator: Iterator[Optional[str]] = (
219
- iter(ids) if ids else iter(doc.id for doc in documents)
220
- )
221
-
222
- ids_ = []
223
-
224
- for doc, vector in zip(documents, vectors):
225
- doc_id = next(id_iterator)
226
- doc_id_ = doc_id if doc_id else str(uuid.uuid4())
227
- ids_.append(doc_id_)
228
- self._database[doc_id_] = {
229
- "id": doc_id_,
230
- "vector": vector,
231
- "text": doc.page_content,
232
- "metadata": doc.metadata,
233
- }
234
-
235
- return ids_
236
-
237
- # optional: add custom async implementations
238
- # async def aadd_documents(
239
- # self,
240
- # documents: List[Document],
241
- # ids: Optional[List[str]] = None,
242
- # **kwargs: Any,
243
- # ) -> List[str]:
244
- # raise NotImplementedError
245
-
246
- def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
247
- if ids:
248
- for _id in ids:
249
- self._database.pop(_id, None)
250
-
251
- # optional: add custom async implementations
252
- # async def adelete(
253
- # self, ids: Optional[List[str]] = None, **kwargs: Any
254
- # ) -> None:
255
- # raise NotImplementedError
256
-
257
- def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
258
- """Get documents by their ids.
259
-
260
- Args:
261
- ids: The ids of the documents to get.
262
-
263
- Returns:
264
- A list of Document objects.
265
- """
266
- documents = []
267
-
268
- for doc_id in ids:
269
- doc = self._database.get(doc_id)
270
- if doc:
271
- documents.append(
272
- Document(
273
- id=doc["id"],
274
- page_content=doc["text"],
275
- metadata=doc["metadata"],
276
- )
277
- )
278
- return documents
279
-
280
- # optional: add custom async implementations
281
- # async def aget_by_ids(self, ids: Sequence[str], /) -> list[Document]:
282
- # raise NotImplementedError
283
-
284
- # NOTE: the below helper method implements similarity search for in-memory
285
- # storage. It is optional and not a part of the vector store interface.
286
- def _similarity_search_with_score_by_vector(
287
- self,
288
- embedding: List[float],
289
- k: int = 4,
290
- filter: Optional[Callable[[Document], bool]] = None,
291
- **kwargs: Any,
292
- ) -> List[tuple[Document, float, List[float]]]:
293
- # get all docs with fixed order in list
294
- docs = list(self._database.values())
295
-
296
- if filter is not None:
297
- docs = [
298
- doc
299
- for doc in docs
300
- if filter(Document(page_content=doc["text"], metadata=doc["metadata"]))
301
- ]
302
-
303
- if not docs:
304
- return []
305
-
306
- similarity = cosine_similarity([embedding], [doc["vector"] for doc in docs])[0]
307
-
308
- # get the indices ordered by similarity score
309
- top_k_idx = similarity.argsort()[::-1][:k]
310
-
311
- return [
312
- (
313
- # Document
314
- Document(
315
- id=doc_dict["id"],
316
- page_content=doc_dict["text"],
317
- metadata=doc_dict["metadata"],
318
- ),
319
- # Score
320
- float(similarity[idx].item()),
321
- # Embedding vector
322
- doc_dict["vector"],
323
- )
324
- for idx in top_k_idx
325
- # Assign using walrus operator to avoid multiple lookups
326
- if (doc_dict := docs[idx])
327
- ]
328
-
329
- def similarity_search(
330
- self, query: str, k: int = 4, **kwargs: Any
331
- ) -> List[Document]:
332
- embedding = self.embedding.embed_query(query)
333
- return [
334
- doc
335
- for doc, _, _ in self._similarity_search_with_score_by_vector(
336
- embedding=embedding, k=k, **kwargs
337
- )
338
- ]
339
-
340
- # optional: add custom async implementations
341
- # async def asimilarity_search(
342
- # self, query: str, k: int = 4, **kwargs: Any
343
- # ) -> List[Document]:
344
- # # This is a temporary workaround to make the similarity search
345
- # # asynchronous. The proper solution is to make the similarity search
346
- # # asynchronous in the vector store implementations.
347
- # func = partial(self.similarity_search, query, k=k, **kwargs)
348
- # return await asyncio.get_event_loop().run_in_executor(None, func)
349
-
350
- def similarity_search_with_score(
351
- self, query: str, k: int = 4, **kwargs: Any
352
- ) -> List[Tuple[Document, float]]:
353
- embedding = self.embedding.embed_query(query)
354
- return [
355
- (doc, similarity)
356
- for doc, similarity, _ in self._similarity_search_with_score_by_vector(
357
- embedding=embedding, k=k, **kwargs
358
- )
359
- ]
360
-
361
- # optional: add custom async implementations
362
- # async def asimilarity_search_with_score(
363
- # self, *args: Any, **kwargs: Any
364
- # ) -> List[Tuple[Document, float]]:
365
- # # This is a temporary workaround to make the similarity search
366
- # # asynchronous. The proper solution is to make the similarity search
367
- # # asynchronous in the vector store implementations.
368
- # func = partial(self.similarity_search_with_score, *args, **kwargs)
369
- # return await asyncio.get_event_loop().run_in_executor(None, func)
370
-
371
- ### ADDITIONAL OPTIONAL SEARCH METHODS BELOW ###
372
-
373
- # def similarity_search_by_vector(
374
- # self, embedding: List[float], k: int = 4, **kwargs: Any
375
- # ) -> List[Document]:
376
- # raise NotImplementedError
377
-
378
- # optional: add custom async implementations
379
- # async def asimilarity_search_by_vector(
380
- # self, embedding: List[float], k: int = 4, **kwargs: Any
381
- # ) -> List[Document]:
382
- # # This is a temporary workaround to make the similarity search
383
- # # asynchronous. The proper solution is to make the similarity search
384
- # # asynchronous in the vector store implementations.
385
- # func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
386
- # return await asyncio.get_event_loop().run_in_executor(None, func)
387
-
388
- # def max_marginal_relevance_search(
389
- # self,
390
- # query: str,
391
- # k: int = 4,
392
- # fetch_k: int = 20,
393
- # lambda_mult: float = 0.5,
394
- # **kwargs: Any,
395
- # ) -> List[Document]:
396
- # raise NotImplementedError
397
-
398
- # optional: add custom async implementations
399
- # async def amax_marginal_relevance_search(
400
- # self,
401
- # query: str,
402
- # k: int = 4,
403
- # fetch_k: int = 20,
404
- # lambda_mult: float = 0.5,
405
- # **kwargs: Any,
406
- # ) -> List[Document]:
407
- # # This is a temporary workaround to make the similarity search
408
- # # asynchronous. The proper solution is to make the similarity search
409
- # # asynchronous in the vector store implementations.
410
- # func = partial(
411
- # self.max_marginal_relevance_search,
412
- # query,
413
- # k=k,
414
- # fetch_k=fetch_k,
415
- # lambda_mult=lambda_mult,
416
- # **kwargs,
417
- # )
418
- # return await asyncio.get_event_loop().run_in_executor(None, func)
419
-
420
- # def max_marginal_relevance_search_by_vector(
421
- # self,
422
- # embedding: List[float],
423
- # k: int = 4,
424
- # fetch_k: int = 20,
425
- # lambda_mult: float = 0.5,
426
- # **kwargs: Any,
427
- # ) -> List[Document]:
428
- # raise NotImplementedError
429
-
430
- # optional: add custom async implementations
431
- # async def amax_marginal_relevance_search_by_vector(
432
- # self,
433
- # embedding: List[float],
434
- # k: int = 4,
435
- # fetch_k: int = 20,
436
- # lambda_mult: float = 0.5,
437
- # **kwargs: Any,
438
- # ) -> List[Document]:
439
- # raise NotImplementedError