langchain-githubcopilot-chat 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langchain_githubcopilot_chat-0.1.0 → langchain_githubcopilot_chat-0.2.0}/PKG-INFO +4 -5
- {langchain_githubcopilot_chat-0.1.0 → langchain_githubcopilot_chat-0.2.0}/langchain_githubcopilot_chat/__init__.py +7 -10
- langchain_githubcopilot_chat-0.2.0/langchain_githubcopilot_chat/auth.py +85 -0
- {langchain_githubcopilot_chat-0.1.0 → langchain_githubcopilot_chat-0.2.0}/langchain_githubcopilot_chat/chat_models.py +57 -26
- langchain_githubcopilot_chat-0.2.0/langchain_githubcopilot_chat/embeddings.py +321 -0
- {langchain_githubcopilot_chat-0.1.0 → langchain_githubcopilot_chat-0.2.0}/pyproject.toml +5 -5
- langchain_githubcopilot_chat-0.1.0/langchain_githubcopilot_chat/document_loaders.py +0 -73
- langchain_githubcopilot_chat-0.1.0/langchain_githubcopilot_chat/embeddings.py +0 -96
- langchain_githubcopilot_chat-0.1.0/langchain_githubcopilot_chat/retrievers.py +0 -107
- langchain_githubcopilot_chat-0.1.0/langchain_githubcopilot_chat/toolkits.py +0 -72
- langchain_githubcopilot_chat-0.1.0/langchain_githubcopilot_chat/tools.py +0 -94
- langchain_githubcopilot_chat-0.1.0/langchain_githubcopilot_chat/vectorstores.py +0 -439
- {langchain_githubcopilot_chat-0.1.0 → langchain_githubcopilot_chat-0.2.0}/LICENSE +0 -0
- {langchain_githubcopilot_chat-0.1.0 → langchain_githubcopilot_chat-0.2.0}/README.md +0 -0
- {langchain_githubcopilot_chat-0.1.0 → langchain_githubcopilot_chat-0.2.0}/langchain_githubcopilot_chat/py.typed +0 -0
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: langchain-githubcopilot-chat
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: An integration package connecting GithubcopilotChat and LangChain
|
|
5
5
|
Home-page: https://github.com/langchain-ai/langchain
|
|
6
6
|
License: MIT
|
|
7
7
|
Author: YIhan Wu
|
|
8
8
|
Author-email: iumm@ibat.ac.cn
|
|
9
|
-
Requires-Python: >=3.
|
|
9
|
+
Requires-Python: >=3.10,<4.0
|
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
-
Requires-Dist: httpx (>=0.
|
|
18
|
-
Requires-Dist: langchain-core (>=
|
|
16
|
+
Requires-Dist: httpx (>=0.28.1)
|
|
17
|
+
Requires-Dist: langchain-core (>=1.1.0,<2.0.0)
|
|
19
18
|
Project-URL: Repository, https://github.com/langchain-ai/langchain
|
|
20
19
|
Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22githubcopilot-chat%3D%3D0%22&expanded=true
|
|
21
20
|
Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/githubcopilot-chat
|
|
@@ -1,15 +1,11 @@
|
|
|
1
1
|
from importlib import metadata
|
|
2
2
|
|
|
3
|
+
from langchain_githubcopilot_chat.auth import get_copilot_token
|
|
3
4
|
from langchain_githubcopilot_chat.chat_models import (
|
|
4
5
|
ChatGithubCopilot,
|
|
5
6
|
ChatGithubcopilotChat,
|
|
6
7
|
)
|
|
7
|
-
from langchain_githubcopilot_chat.document_loaders import GithubcopilotChatLoader
|
|
8
8
|
from langchain_githubcopilot_chat.embeddings import GithubcopilotChatEmbeddings
|
|
9
|
-
from langchain_githubcopilot_chat.retrievers import GithubcopilotChatRetriever
|
|
10
|
-
from langchain_githubcopilot_chat.toolkits import GithubcopilotChatToolkit
|
|
11
|
-
from langchain_githubcopilot_chat.tools import GithubcopilotChatTool
|
|
12
|
-
from langchain_githubcopilot_chat.vectorstores import GithubcopilotChatVectorStore
|
|
13
9
|
|
|
14
10
|
try:
|
|
15
11
|
__version__ = metadata.version(__package__)
|
|
@@ -18,14 +14,15 @@ except metadata.PackageNotFoundError:
|
|
|
18
14
|
__version__ = ""
|
|
19
15
|
del metadata # optional, avoids polluting the results of dir(__package__)
|
|
20
16
|
|
|
17
|
+
get_available_models = ChatGithubCopilot.get_available_models
|
|
18
|
+
get_vscode_token = get_copilot_token
|
|
19
|
+
|
|
21
20
|
__all__ = [
|
|
22
21
|
"ChatGithubCopilot",
|
|
23
22
|
"ChatGithubcopilotChat", # backwards-compatible alias
|
|
24
|
-
"GithubcopilotChatVectorStore",
|
|
25
23
|
"GithubcopilotChatEmbeddings",
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"GithubcopilotChatTool",
|
|
24
|
+
"get_copilot_token",
|
|
25
|
+
"get_vscode_token",
|
|
26
|
+
"get_available_models",
|
|
30
27
|
"__version__",
|
|
31
28
|
]
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import httpx
|
|
5
|
+
|
|
6
|
+
CLIENT_ID = "Iv1.b507a08c87ecfe98"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_copilot_token(client_id: str = CLIENT_ID) -> Optional[str]:
|
|
10
|
+
"""
|
|
11
|
+
Authenticate via GitHub Device Flow to get a Copilot Token.
|
|
12
|
+
This function will block and wait for the user to complete the
|
|
13
|
+
authorization in their browser.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
client_id: The GitHub OAuth App Client ID to use. Defaults
|
|
17
|
+
to the VS Code Copilot Chat client ID.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
The fetched Copilot Token string, or None if authentication failed.
|
|
21
|
+
"""
|
|
22
|
+
print("1. Requesting device code from GitHub...") # noqa: T201 # noqa: T201
|
|
23
|
+
with httpx.Client() as client:
|
|
24
|
+
res = client.post(
|
|
25
|
+
"https://github.com/login/device/code",
|
|
26
|
+
headers={"Accept": "application/json"},
|
|
27
|
+
data={"client_id": client_id, "scope": "read:user"},
|
|
28
|
+
)
|
|
29
|
+
res.raise_for_status()
|
|
30
|
+
data = res.json()
|
|
31
|
+
|
|
32
|
+
device_code = data.get("device_code")
|
|
33
|
+
user_code = data.get("user_code")
|
|
34
|
+
verification_uri = data.get("verification_uri")
|
|
35
|
+
interval = data.get("interval", 5)
|
|
36
|
+
|
|
37
|
+
print("\n==========================================") # noqa: T201
|
|
38
|
+
print(f"Please open your browser to: {verification_uri}") # noqa: T201
|
|
39
|
+
print(f"And enter the authorization code: {user_code}") # noqa: T201
|
|
40
|
+
print("==========================================\n") # noqa: T201
|
|
41
|
+
print(f"Waiting for authorization (checking every {interval} seconds)...") # noqa: T201
|
|
42
|
+
|
|
43
|
+
access_token = None
|
|
44
|
+
with httpx.Client() as client:
|
|
45
|
+
while True:
|
|
46
|
+
token_res = client.post(
|
|
47
|
+
"https://github.com/login/oauth/access_token",
|
|
48
|
+
headers={"Accept": "application/json"},
|
|
49
|
+
data={
|
|
50
|
+
"client_id": client_id,
|
|
51
|
+
"device_code": device_code,
|
|
52
|
+
"grant_type": "urn:ietf:params:oauth:grant-type:device_code",
|
|
53
|
+
},
|
|
54
|
+
).json()
|
|
55
|
+
|
|
56
|
+
if "access_token" in token_res:
|
|
57
|
+
access_token = token_res["access_token"]
|
|
58
|
+
print( # noqa: T201 # noqa: T201
|
|
59
|
+
"\n✅ Authorization successful! Exchanging for Copilot Token..."
|
|
60
|
+
)
|
|
61
|
+
break
|
|
62
|
+
elif token_res.get("error") == "authorization_pending":
|
|
63
|
+
time.sleep(interval)
|
|
64
|
+
else:
|
|
65
|
+
print(f"\n❌ Authorization failed: {token_res}") # noqa: T201 # noqa: T201
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
# Exchange the standard access token for a Copilot internal token
|
|
69
|
+
copilot_res = client.get(
|
|
70
|
+
"https://api.github.com/copilot_internal/v2/token",
|
|
71
|
+
headers={
|
|
72
|
+
"Authorization": f"token {access_token}",
|
|
73
|
+
"Accept": "application/json",
|
|
74
|
+
"Editor-Version": "vscode/1.104.1",
|
|
75
|
+
"Editor-Plugin-Version": "copilot-chat/0.26.7",
|
|
76
|
+
},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if copilot_res.status_code == 200:
|
|
80
|
+
copilot_token = copilot_res.json().get("token")
|
|
81
|
+
print("🎉 Successfully acquired Copilot Token!") # noqa: T201 # noqa: T201
|
|
82
|
+
return copilot_token
|
|
83
|
+
else:
|
|
84
|
+
print(f"❌ Failed to acquire Copilot Token: {copilot_res.text}") # noqa: T201 # noqa: T201
|
|
85
|
+
return None
|
|
@@ -56,10 +56,23 @@ _ROLE_MAP = {
|
|
|
56
56
|
"tool": "tool",
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
_INFERENCE_PATH = "/
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
_GITHUB_COPILOT_BASE_URL = "https://api.githubcopilot.com"
|
|
60
|
+
_INFERENCE_PATH = "/chat/completions"
|
|
61
|
+
|
|
62
|
+
COPILOT_EDITOR_VERSION = "vscode/1.104.1"
|
|
63
|
+
COPILOT_PLUGIN_VERSION = "copilot-chat/0.26.7"
|
|
64
|
+
COPILOT_INTEGRATION_ID = "vscode-chat"
|
|
65
|
+
COPILOT_USER_AGENT = "GitHubCopilotChat/0.26.7"
|
|
66
|
+
|
|
67
|
+
COPILOT_DEFAULT_HEADERS = {
|
|
68
|
+
"Copilot-Integration-Id": COPILOT_INTEGRATION_ID,
|
|
69
|
+
"User-Agent": COPILOT_USER_AGENT,
|
|
70
|
+
"Editor-Version": COPILOT_EDITOR_VERSION,
|
|
71
|
+
"Editor-Plugin-Version": COPILOT_PLUGIN_VERSION,
|
|
72
|
+
"editor-version": COPILOT_EDITOR_VERSION,
|
|
73
|
+
"editor-plugin-version": COPILOT_PLUGIN_VERSION,
|
|
74
|
+
"copilot-vision-request": "true",
|
|
75
|
+
}
|
|
63
76
|
|
|
64
77
|
|
|
65
78
|
def _message_to_dict(message: BaseMessage) -> Dict[str, Any]:
|
|
@@ -403,19 +416,8 @@ class ChatGithubCopilot(BaseChatModel):
|
|
|
403
416
|
is used.
|
|
404
417
|
"""
|
|
405
418
|
|
|
406
|
-
base_url: str =
|
|
407
|
-
"""Base URL for the GitHub
|
|
408
|
-
|
|
409
|
-
org: Optional[str] = None
|
|
410
|
-
"""Organisation login for attributed inference requests.
|
|
411
|
-
|
|
412
|
-
When set, requests are sent to
|
|
413
|
-
``/orgs/{org}/inference/chat/completions`` instead of
|
|
414
|
-
``/inference/chat/completions``.
|
|
415
|
-
"""
|
|
416
|
-
|
|
417
|
-
api_version: str = _API_VERSION
|
|
418
|
-
"""GitHub Models API version sent as the ``X-GitHub-Api-Version`` header."""
|
|
419
|
+
base_url: str = _GITHUB_COPILOT_BASE_URL
|
|
420
|
+
"""Base URL for the GitHub Copilot API."""
|
|
419
421
|
|
|
420
422
|
temperature: Optional[float] = None
|
|
421
423
|
"""Sampling temperature in ``[0, 1]``."""
|
|
@@ -444,6 +446,11 @@ class ChatGithubCopilot(BaseChatModel):
|
|
|
444
446
|
max_retries: int = 2
|
|
445
447
|
"""Number of automatic retries on transient errors."""
|
|
446
448
|
|
|
449
|
+
# ------------------------------------------------------------------
|
|
450
|
+
# Pydantic v2 config — allow the ``model`` alias on construction
|
|
451
|
+
# ------------------------------------------------------------------
|
|
452
|
+
model_config = {"populate_by_name": True}
|
|
453
|
+
|
|
447
454
|
# ------------------------------------------------------------------
|
|
448
455
|
# Validators / setup
|
|
449
456
|
# ------------------------------------------------------------------
|
|
@@ -486,19 +493,43 @@ class ChatGithubCopilot(BaseChatModel):
|
|
|
486
493
|
@property
|
|
487
494
|
def _inference_url(self) -> str:
|
|
488
495
|
"""Return the full chat-completions endpoint URL."""
|
|
489
|
-
|
|
490
|
-
path = _ORG_INFERENCE_PATH.format(org=self.org)
|
|
491
|
-
else:
|
|
492
|
-
path = _INFERENCE_PATH
|
|
493
|
-
return self.base_url.rstrip("/") + path
|
|
496
|
+
return self.base_url.rstrip("/") + _INFERENCE_PATH
|
|
494
497
|
|
|
495
498
|
def _build_headers(self) -> Dict[str, str]:
|
|
496
|
-
|
|
499
|
+
headers = {
|
|
497
500
|
"Authorization": f"Bearer {self._token}",
|
|
498
|
-
"Accept": "application/
|
|
501
|
+
"Accept": "application/json",
|
|
499
502
|
"Content-Type": "application/json",
|
|
500
|
-
"X-GitHub-Api-Version": self.api_version,
|
|
501
503
|
}
|
|
504
|
+
headers.update(COPILOT_DEFAULT_HEADERS)
|
|
505
|
+
return headers
|
|
506
|
+
|
|
507
|
+
@classmethod
|
|
508
|
+
def get_available_models(
|
|
509
|
+
cls, github_token: Optional[str] = None
|
|
510
|
+
) -> List[Dict[str, Any]]:
|
|
511
|
+
"""Get the list of available models from the GitHub Copilot API."""
|
|
512
|
+
token = github_token or os.environ.get("GITHUB_TOKEN")
|
|
513
|
+
if not token:
|
|
514
|
+
raise ValueError(
|
|
515
|
+
"A GitHub token is required. Set the GITHUB_TOKEN environment "
|
|
516
|
+
"variable or pass ``github_token``."
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
headers = {
|
|
520
|
+
"Authorization": f"Bearer {token}",
|
|
521
|
+
"Accept": "application/json",
|
|
522
|
+
"Content-Type": "application/json",
|
|
523
|
+
}
|
|
524
|
+
headers.update(COPILOT_DEFAULT_HEADERS)
|
|
525
|
+
|
|
526
|
+
url = f"{_GITHUB_COPILOT_BASE_URL}/models"
|
|
527
|
+
|
|
528
|
+
with httpx.Client() as client:
|
|
529
|
+
response = client.get(url, headers=headers)
|
|
530
|
+
response.raise_for_status()
|
|
531
|
+
data = response.json()
|
|
532
|
+
return data.get("data", [])
|
|
502
533
|
|
|
503
534
|
def _build_payload(
|
|
504
535
|
self,
|
|
@@ -707,7 +738,7 @@ class ChatGithubCopilot(BaseChatModel):
|
|
|
707
738
|
|
|
708
739
|
@property
|
|
709
740
|
def _llm_type(self) -> str:
|
|
710
|
-
return "
|
|
741
|
+
return "github-copilot"
|
|
711
742
|
|
|
712
743
|
@property
|
|
713
744
|
def _identifying_params(self) -> Dict[str, Any]:
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
"""GitHub Copilot Chat embeddings integration via GitHub Models Embeddings API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import Any, Dict, List, Optional, Union
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
from langchain_core.embeddings import Embeddings
|
|
10
|
+
from pydantic import BaseModel, Field, SecretStr, model_validator
|
|
11
|
+
|
|
12
|
+
_GITHUB_COPILOT_BASE_URL = "https://api.githubcopilot.com"
|
|
13
|
+
_EMBEDDINGS_PATH = "/embeddings"
|
|
14
|
+
|
|
15
|
+
COPILOT_EDITOR_VERSION = "vscode/1.104.1"
|
|
16
|
+
COPILOT_PLUGIN_VERSION = "copilot-chat/0.26.7"
|
|
17
|
+
COPILOT_INTEGRATION_ID = "vscode-chat"
|
|
18
|
+
COPILOT_USER_AGENT = "GitHubCopilotChat/0.26.7"
|
|
19
|
+
|
|
20
|
+
COPILOT_DEFAULT_HEADERS = {
|
|
21
|
+
"Copilot-Integration-Id": COPILOT_INTEGRATION_ID,
|
|
22
|
+
"User-Agent": COPILOT_USER_AGENT,
|
|
23
|
+
"Editor-Version": COPILOT_EDITOR_VERSION,
|
|
24
|
+
"Editor-Plugin-Version": COPILOT_PLUGIN_VERSION,
|
|
25
|
+
"editor-version": COPILOT_EDITOR_VERSION,
|
|
26
|
+
"editor-plugin-version": COPILOT_PLUGIN_VERSION,
|
|
27
|
+
"copilot-vision-request": "true",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GithubcopilotChatEmbeddings(BaseModel, Embeddings):
|
|
32
|
+
"""GitHub Copilot Chat embedding model integration via the GitHub Models API.
|
|
33
|
+
|
|
34
|
+
GitHub Models provides access to embedding models (e.g. OpenAI
|
|
35
|
+
``text-embedding-3-small``, ``text-embedding-3-large``) through a unified
|
|
36
|
+
OpenAI-compatible REST API. This class wraps the ``/inference/embeddings``
|
|
37
|
+
endpoint so that any embedding model available in the GitHub Models catalog
|
|
38
|
+
can be used as a drop-in LangChain ``Embeddings`` implementation.
|
|
39
|
+
|
|
40
|
+
Setup:
|
|
41
|
+
Install ``langchain-githubcopilot-chat`` and set the
|
|
42
|
+
``GITHUB_TOKEN`` environment variable (a classic or fine-grained PAT
|
|
43
|
+
with the ``models: read`` scope, or a GitHub Copilot subscription token).
|
|
44
|
+
|
|
45
|
+
.. code-block:: bash
|
|
46
|
+
|
|
47
|
+
pip install -U langchain-githubcopilot-chat
|
|
48
|
+
export GITHUB_TOKEN="github_pat_..."
|
|
49
|
+
|
|
50
|
+
Key init args:
|
|
51
|
+
model: str
|
|
52
|
+
Model ID in the ``{publisher}/{model_name}`` format, e.g.
|
|
53
|
+
``"openai/text-embedding-3-small"``.
|
|
54
|
+
github_token: Optional[SecretStr]
|
|
55
|
+
GitHub token. Falls back to ``GITHUB_TOKEN`` env var.
|
|
56
|
+
base_url: str
|
|
57
|
+
Base URL of the GitHub Models API.
|
|
58
|
+
Defaults to ``"https://models.github.ai"``.
|
|
59
|
+
org: Optional[str]
|
|
60
|
+
Organisation login. When set, requests are attributed to that org.
|
|
61
|
+
api_version: str
|
|
62
|
+
GitHub Models REST API version header value.
|
|
63
|
+
Defaults to ``"2026-03-10"``.
|
|
64
|
+
dimensions: Optional[int]
|
|
65
|
+
The number of dimensions for the output embeddings. Only supported
|
|
66
|
+
by ``text-embedding-3`` and later models.
|
|
67
|
+
encoding_format: str
|
|
68
|
+
The format to return embeddings in. Either ``"float"`` (default)
|
|
69
|
+
or ``"base64"``.
|
|
70
|
+
timeout: Optional[float]
|
|
71
|
+
HTTP request timeout in seconds.
|
|
72
|
+
max_retries: int
|
|
73
|
+
Number of automatic retries on transient errors (default ``2``).
|
|
74
|
+
|
|
75
|
+
Instantiate:
|
|
76
|
+
.. code-block:: python
|
|
77
|
+
|
|
78
|
+
from langchain_githubcopilot_chat import GithubcopilotChatEmbeddings
|
|
79
|
+
|
|
80
|
+
embed = GithubcopilotChatEmbeddings(
|
|
81
|
+
model="openai/text-embedding-3-small",
|
|
82
|
+
# github_token="github_pat_...", # or set GITHUB_TOKEN env var
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
Embed single text:
|
|
86
|
+
.. code-block:: python
|
|
87
|
+
|
|
88
|
+
vector = embed.embed_query("What is the meaning of life?")
|
|
89
|
+
print(len(vector)) # e.g. 1536
|
|
90
|
+
|
|
91
|
+
Embed multiple texts:
|
|
92
|
+
.. code-block:: python
|
|
93
|
+
|
|
94
|
+
vectors = embed.embed_documents(
|
|
95
|
+
["Document one.", "Document two."]
|
|
96
|
+
)
|
|
97
|
+
print(len(vectors), len(vectors[0]))
|
|
98
|
+
|
|
99
|
+
Async:
|
|
100
|
+
.. code-block:: python
|
|
101
|
+
|
|
102
|
+
vector = await embed.aembed_query("What is the meaning of life?")
|
|
103
|
+
|
|
104
|
+
vectors = await embed.aembed_documents(
|
|
105
|
+
["Document one.", "Document two."]
|
|
106
|
+
)
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
model_config = {"populate_by_name": True}
|
|
110
|
+
|
|
111
|
+
model_name: str = Field(alias="model")
|
|
112
|
+
"""Embedding model ID in the ``{publisher}/{model_name}`` format.
|
|
113
|
+
|
|
114
|
+
Examples: ``"openai/text-embedding-3-small"``,
|
|
115
|
+
``"openai/text-embedding-3-large"``.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
github_token: Optional[SecretStr] = Field(default=None)
|
|
119
|
+
"""GitHub token with ``models: read`` scope.
|
|
120
|
+
|
|
121
|
+
If not provided, the value of the ``GITHUB_TOKEN`` environment variable
|
|
122
|
+
is used.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
base_url: str = _GITHUB_COPILOT_BASE_URL
|
|
126
|
+
"""Base URL for the GitHub Copilot API."""
|
|
127
|
+
|
|
128
|
+
dimensions: Optional[int] = None
|
|
129
|
+
"""Number of output embedding dimensions.
|
|
130
|
+
|
|
131
|
+
Only supported by ``text-embedding-3`` and later models.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
encoding_format: str = "float"
|
|
135
|
+
"""Format of the returned embeddings. Either ``"float"`` or ``"base64"``."""
|
|
136
|
+
|
|
137
|
+
timeout: Optional[float] = None
|
|
138
|
+
"""HTTP request timeout in seconds."""
|
|
139
|
+
|
|
140
|
+
max_retries: int = 2
|
|
141
|
+
"""Number of automatic retries on transient errors."""
|
|
142
|
+
|
|
143
|
+
# ------------------------------------------------------------------
|
|
144
|
+
# Validators / setup
|
|
145
|
+
# ------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
@model_validator(mode="before")
|
|
148
|
+
@classmethod
|
|
149
|
+
def _resolve_token(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
150
|
+
"""Resolve the GitHub token from the environment if not supplied."""
|
|
151
|
+
token = values.get("github_token") or values.get("api_key")
|
|
152
|
+
if not token:
|
|
153
|
+
token = os.environ.get("GITHUB_TOKEN")
|
|
154
|
+
if token:
|
|
155
|
+
values["github_token"] = token
|
|
156
|
+
return values
|
|
157
|
+
|
|
158
|
+
# ------------------------------------------------------------------
|
|
159
|
+
# Internal helpers
|
|
160
|
+
# ------------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def _token(self) -> str:
|
|
164
|
+
"""Return the raw GitHub token string."""
|
|
165
|
+
if self.github_token:
|
|
166
|
+
return self.github_token.get_secret_value()
|
|
167
|
+
env_token = os.environ.get("GITHUB_TOKEN", "")
|
|
168
|
+
if not env_token:
|
|
169
|
+
raise ValueError(
|
|
170
|
+
"A GitHub token is required. Set the GITHUB_TOKEN environment "
|
|
171
|
+
"variable or pass ``github_token`` when instantiating "
|
|
172
|
+
"GithubcopilotChatEmbeddings."
|
|
173
|
+
)
|
|
174
|
+
return env_token
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def _embeddings_url(self) -> str:
|
|
178
|
+
"""Return the full embeddings endpoint URL."""
|
|
179
|
+
return self.base_url.rstrip("/") + _EMBEDDINGS_PATH
|
|
180
|
+
|
|
181
|
+
def _build_headers(self) -> Dict[str, str]:
|
|
182
|
+
headers = {
|
|
183
|
+
"Authorization": f"Bearer {self._token}",
|
|
184
|
+
"Accept": "application/json",
|
|
185
|
+
"Content-Type": "application/json",
|
|
186
|
+
}
|
|
187
|
+
headers.update(COPILOT_DEFAULT_HEADERS)
|
|
188
|
+
return headers
|
|
189
|
+
|
|
190
|
+
def _build_payload(self, input: Union[str, List[str]]) -> Dict[str, Any]:
|
|
191
|
+
"""Assemble the JSON body for the embeddings API."""
|
|
192
|
+
payload: Dict[str, Any] = {
|
|
193
|
+
"model": self.model_name,
|
|
194
|
+
"input": input,
|
|
195
|
+
"encoding_format": self.encoding_format,
|
|
196
|
+
}
|
|
197
|
+
if self.dimensions is not None:
|
|
198
|
+
payload["dimensions"] = self.dimensions
|
|
199
|
+
return payload
|
|
200
|
+
|
|
201
|
+
def _do_request(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
202
|
+
"""Perform a synchronous HTTP POST with retries."""
|
|
203
|
+
headers = self._build_headers()
|
|
204
|
+
last_exc: Optional[Exception] = None
|
|
205
|
+
for attempt in range(self.max_retries + 1):
|
|
206
|
+
try:
|
|
207
|
+
response = httpx.post(
|
|
208
|
+
self._embeddings_url,
|
|
209
|
+
headers=headers,
|
|
210
|
+
json=payload,
|
|
211
|
+
timeout=self.timeout,
|
|
212
|
+
)
|
|
213
|
+
response.raise_for_status()
|
|
214
|
+
return response.json()
|
|
215
|
+
except (httpx.TimeoutException, httpx.TransportError) as exc:
|
|
216
|
+
last_exc = exc
|
|
217
|
+
if attempt == self.max_retries:
|
|
218
|
+
raise
|
|
219
|
+
except httpx.HTTPStatusError as exc:
|
|
220
|
+
if exc.response.status_code < 500:
|
|
221
|
+
raise
|
|
222
|
+
last_exc = exc
|
|
223
|
+
if attempt == self.max_retries:
|
|
224
|
+
raise
|
|
225
|
+
raise RuntimeError("Unexpected retry loop exit") from last_exc
|
|
226
|
+
|
|
227
|
+
async def _do_request_async(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
228
|
+
"""Perform an asynchronous HTTP POST with retries."""
|
|
229
|
+
headers = self._build_headers()
|
|
230
|
+
last_exc: Optional[Exception] = None
|
|
231
|
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
232
|
+
for attempt in range(self.max_retries + 1):
|
|
233
|
+
try:
|
|
234
|
+
response = await client.post(
|
|
235
|
+
self._embeddings_url,
|
|
236
|
+
headers=headers,
|
|
237
|
+
json=payload,
|
|
238
|
+
)
|
|
239
|
+
response.raise_for_status()
|
|
240
|
+
return response.json()
|
|
241
|
+
except (httpx.TimeoutException, httpx.TransportError) as exc:
|
|
242
|
+
last_exc = exc
|
|
243
|
+
if attempt == self.max_retries:
|
|
244
|
+
raise
|
|
245
|
+
except httpx.HTTPStatusError as exc:
|
|
246
|
+
if exc.response.status_code < 500:
|
|
247
|
+
raise
|
|
248
|
+
last_exc = exc
|
|
249
|
+
if attempt == self.max_retries:
|
|
250
|
+
raise
|
|
251
|
+
raise RuntimeError("Unexpected retry loop exit") from last_exc
|
|
252
|
+
|
|
253
|
+
@staticmethod
|
|
254
|
+
def _extract_embeddings(response_data: Dict[str, Any]) -> List[List[float]]:
|
|
255
|
+
"""Extract the list of embedding vectors from an API response."""
|
|
256
|
+
data = response_data.get("data", [])
|
|
257
|
+
if not data:
|
|
258
|
+
raise ValueError(
|
|
259
|
+
f"GitHub Models Embeddings API returned no data. "
|
|
260
|
+
f"Response: {response_data}"
|
|
261
|
+
)
|
|
262
|
+
# Sort by index to preserve input order (the API may reorder items)
|
|
263
|
+
sorted_data = sorted(data, key=lambda x: x.get("index", 0))
|
|
264
|
+
return [item["embedding"] for item in sorted_data]
|
|
265
|
+
|
|
266
|
+
# ------------------------------------------------------------------
|
|
267
|
+
# LangChain Embeddings interface
|
|
268
|
+
# ------------------------------------------------------------------
|
|
269
|
+
|
|
270
|
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
271
|
+
"""Embed a list of documents using the GitHub Models Embeddings API.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
texts: The list of texts to embed.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
A list of embedding vectors, one per input text.
|
|
278
|
+
"""
|
|
279
|
+
if not texts:
|
|
280
|
+
return []
|
|
281
|
+
payload = self._build_payload(texts)
|
|
282
|
+
response_data = self._do_request(payload)
|
|
283
|
+
return self._extract_embeddings(response_data)
|
|
284
|
+
|
|
285
|
+
def embed_query(self, text: str) -> List[float]:
|
|
286
|
+
"""Embed a single query text using the GitHub Models Embeddings API.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
text: The text to embed.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
An embedding vector.
|
|
293
|
+
"""
|
|
294
|
+
return self.embed_documents([text])[0]
|
|
295
|
+
|
|
296
|
+
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
297
|
+
"""Asynchronously embed a list of documents.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
texts: The list of texts to embed.
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
A list of embedding vectors, one per input text.
|
|
304
|
+
"""
|
|
305
|
+
if not texts:
|
|
306
|
+
return []
|
|
307
|
+
payload = self._build_payload(texts)
|
|
308
|
+
response_data = await self._do_request_async(payload)
|
|
309
|
+
return self._extract_embeddings(response_data)
|
|
310
|
+
|
|
311
|
+
async def aembed_query(self, text: str) -> List[float]:
|
|
312
|
+
"""Asynchronously embed a single query text.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
text: The text to embed.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
An embedding vector.
|
|
319
|
+
"""
|
|
320
|
+
results = await self.aembed_documents([text])
|
|
321
|
+
return results[0]
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "langchain-githubcopilot-chat"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "An integration package connecting GithubcopilotChat and LangChain"
|
|
9
9
|
authors = ["YIhan Wu <iumm@ibat.ac.cn>"]
|
|
10
10
|
readme = "README.md"
|
|
@@ -19,9 +19,9 @@ disallow_untyped_defs = "True"
|
|
|
19
19
|
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22githubcopilot-chat%3D%3D0%22&expanded=true"
|
|
20
20
|
|
|
21
21
|
[tool.poetry.dependencies]
|
|
22
|
-
python = ">=3.
|
|
23
|
-
langchain-core = "
|
|
24
|
-
httpx = ">=0.
|
|
22
|
+
python = ">=3.10,<4.0"
|
|
23
|
+
langchain-core = ">=1.1.0,<2.0.0"
|
|
24
|
+
httpx = ">=0.28.1"
|
|
25
25
|
|
|
26
26
|
[tool.ruff.lint]
|
|
27
27
|
select = ["E", "F", "I", "T201"]
|
|
@@ -59,7 +59,7 @@ pytest = "^7.4.3"
|
|
|
59
59
|
pytest-asyncio = "^0.23.2"
|
|
60
60
|
pytest-socket = "^0.7.0"
|
|
61
61
|
pytest-watcher = "^0.3.4"
|
|
62
|
-
langchain-tests = "^
|
|
62
|
+
langchain-tests = "^1.1.5"
|
|
63
63
|
|
|
64
64
|
[tool.poetry.group.codespell.dependencies]
|
|
65
65
|
codespell = "^2.2.6"
|