kodit 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/infrastructure/cloning/git/factory.py +12 -6
- kodit/infrastructure/cloning/git/working_copy.py +15 -4
- kodit/infrastructure/git/git_utils.py +63 -0
- {kodit-0.2.6.dist-info → kodit-0.2.8.dist-info}/METADATA +1 -1
- {kodit-0.2.6.dist-info → kodit-0.2.8.dist-info}/RECORD +9 -9
- {kodit-0.2.6.dist-info → kodit-0.2.8.dist-info}/WHEEL +0 -0
- {kodit-0.2.6.dist-info → kodit-0.2.8.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.6.dist-info → kodit-0.2.8.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -17,6 +17,7 @@ from kodit.infrastructure.cloning.metadata import (
|
|
|
17
17
|
GitAuthorExtractor,
|
|
18
18
|
GitFileMetadataExtractor,
|
|
19
19
|
)
|
|
20
|
+
from kodit.infrastructure.git.git_utils import sanitize_git_url
|
|
20
21
|
from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
|
|
21
22
|
|
|
22
23
|
|
|
@@ -46,28 +47,33 @@ class GitSourceFactory:
|
|
|
46
47
|
progress_callback = NullProgressCallback()
|
|
47
48
|
|
|
48
49
|
# Normalize the URI
|
|
49
|
-
|
|
50
|
+
# Never log the raw URI in production
|
|
51
|
+
self.log.debug("Normalising git uri", uri="[REDACTED]" + uri[-4:])
|
|
50
52
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
51
53
|
git.Repo.clone_from(uri, temp_dir)
|
|
52
54
|
remote = git.Repo(temp_dir).remote()
|
|
53
55
|
uri = remote.url
|
|
54
56
|
|
|
57
|
+
# Sanitize the URI to remove any credentials
|
|
58
|
+
sanitized_uri = sanitize_git_url(uri)
|
|
59
|
+
self.log.debug("Sanitized git uri", sanitized_uri=sanitized_uri)
|
|
60
|
+
|
|
55
61
|
# Check if source already exists
|
|
56
|
-
self.log.debug("Checking if source already exists", uri=
|
|
57
|
-
source = await self.repository.get_by_uri(
|
|
62
|
+
self.log.debug("Checking if source already exists", uri=sanitized_uri)
|
|
63
|
+
source = await self.repository.get_by_uri(sanitized_uri)
|
|
58
64
|
|
|
59
65
|
if source:
|
|
60
66
|
self.log.info("Source already exists, reusing...", source_id=source.id)
|
|
61
67
|
return source
|
|
62
68
|
|
|
63
|
-
# Prepare working copy
|
|
69
|
+
# Prepare working copy (use original URI for cloning, sanitized for storage)
|
|
64
70
|
clone_path = await self.working_copy.prepare(uri)
|
|
65
71
|
|
|
66
72
|
# Create source record
|
|
67
|
-
self.log.debug("Creating source", uri=
|
|
73
|
+
self.log.debug("Creating source", uri=sanitized_uri, clone_path=str(clone_path))
|
|
68
74
|
source = await self.repository.save(
|
|
69
75
|
Source(
|
|
70
|
-
uri=
|
|
76
|
+
uri=sanitized_uri,
|
|
71
77
|
cloned_path=str(clone_path),
|
|
72
78
|
source_type=SourceType.GIT,
|
|
73
79
|
)
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
"""Working copy provider for git-based sources."""
|
|
2
2
|
|
|
3
|
+
import hashlib
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
import git
|
|
6
7
|
import structlog
|
|
7
8
|
|
|
9
|
+
from kodit.infrastructure.git.git_utils import sanitize_git_url
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
class GitWorkingCopyProvider:
|
|
10
13
|
"""Working copy provider for git-based sources."""
|
|
@@ -16,17 +19,25 @@ class GitWorkingCopyProvider:
|
|
|
16
19
|
|
|
17
20
|
async def prepare(self, uri: str) -> Path:
|
|
18
21
|
"""Prepare a Git working copy."""
|
|
19
|
-
#
|
|
20
|
-
|
|
22
|
+
# Sanitize the URI for directory name to prevent credential leaks
|
|
23
|
+
sanitized_uri = sanitize_git_url(uri)
|
|
24
|
+
|
|
25
|
+
# Use a repeatable, short sha256 hash of the sanitized URI for the directory
|
|
26
|
+
dir_hash = hashlib.sha256(sanitized_uri.encode("utf-8")).hexdigest()[:16]
|
|
27
|
+
dir_name = f"repo-{dir_hash}"
|
|
28
|
+
clone_path = self.clone_dir / dir_name
|
|
21
29
|
clone_path.mkdir(parents=True, exist_ok=True)
|
|
22
30
|
|
|
23
31
|
try:
|
|
24
|
-
self.log.info(
|
|
32
|
+
self.log.info(
|
|
33
|
+
"Cloning repository", uri=sanitized_uri, clone_path=str(clone_path)
|
|
34
|
+
)
|
|
35
|
+
# Use the original URI for cloning (with credentials if present)
|
|
25
36
|
git.Repo.clone_from(uri, clone_path)
|
|
26
37
|
except git.GitCommandError as e:
|
|
27
38
|
if "already exists and is not an empty directory" not in str(e):
|
|
28
39
|
msg = f"Failed to clone repository: {e}"
|
|
29
40
|
raise ValueError(msg) from e
|
|
30
|
-
self.log.info("Repository already exists, reusing...", uri=
|
|
41
|
+
self.log.info("Repository already exists, reusing...", uri=sanitized_uri)
|
|
31
42
|
|
|
32
43
|
return clone_path
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Git utilities for infrastructure operations."""
|
|
2
2
|
|
|
3
3
|
import tempfile
|
|
4
|
+
from urllib.parse import urlparse, urlunparse
|
|
4
5
|
|
|
5
6
|
import git
|
|
6
7
|
|
|
@@ -22,3 +23,65 @@ def is_valid_clone_target(target: str) -> bool:
|
|
|
22
23
|
return False
|
|
23
24
|
else:
|
|
24
25
|
return True
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def sanitize_git_url(url: str) -> str:
|
|
29
|
+
"""Remove credentials from a git URL while preserving the rest of the URL structure.
|
|
30
|
+
|
|
31
|
+
This function handles various git URL formats:
|
|
32
|
+
- HTTPS URLs with username:password@host
|
|
33
|
+
- HTTPS URLs with username@host (no password)
|
|
34
|
+
- SSH URLs (left unchanged)
|
|
35
|
+
- File URLs (left unchanged)
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
url: The git URL that may contain credentials.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
The sanitized URL with credentials removed.
|
|
42
|
+
|
|
43
|
+
Examples:
|
|
44
|
+
>>> sanitize_git_url("https://phil:token@dev.azure.com/org/project/_git/repo")
|
|
45
|
+
"https://dev.azure.com/org/project/_git/repo"
|
|
46
|
+
>>> sanitize_git_url("https://username@github.com/user/repo.git")
|
|
47
|
+
"https://github.com/user/repo.git"
|
|
48
|
+
>>> sanitize_git_url("git@github.com:user/repo.git")
|
|
49
|
+
"git@github.com:user/repo.git"
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
# Handle SSH URLs (they don't have credentials in the URL format)
|
|
53
|
+
if url.startswith(("git@", "ssh://")):
|
|
54
|
+
return url
|
|
55
|
+
|
|
56
|
+
# Handle file URLs
|
|
57
|
+
if url.startswith("file://"):
|
|
58
|
+
return url
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
# Parse the URL
|
|
62
|
+
parsed = urlparse(url)
|
|
63
|
+
|
|
64
|
+
# If there are no credentials, return the URL as-is
|
|
65
|
+
if not parsed.username:
|
|
66
|
+
return url
|
|
67
|
+
|
|
68
|
+
# Reconstruct the URL without credentials
|
|
69
|
+
# Keep scheme, netloc (without username/password), path, params, query, fragment
|
|
70
|
+
sanitized_netloc = parsed.hostname
|
|
71
|
+
if parsed.port:
|
|
72
|
+
sanitized_netloc = f"{parsed.hostname}:{parsed.port}"
|
|
73
|
+
|
|
74
|
+
return urlunparse(
|
|
75
|
+
(
|
|
76
|
+
parsed.scheme,
|
|
77
|
+
sanitized_netloc,
|
|
78
|
+
parsed.path,
|
|
79
|
+
parsed.params,
|
|
80
|
+
parsed.query,
|
|
81
|
+
parsed.fragment,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
except Exception: # noqa: BLE001
|
|
86
|
+
# If URL parsing fails, return the original URL
|
|
87
|
+
return url
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
2
|
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=
|
|
3
|
+
kodit/_version.py,sha256=zkhRarrvPoGA1yWjS9_zVM80dWqpDesNn9DiHcF4JWM,511
|
|
4
4
|
kodit/app.py,sha256=qKBWJ0VNSY_M6G3VFfAQ0133q5bnS99cUFD0p396taw,1032
|
|
5
5
|
kodit/cli.py,sha256=JnhTlG1s04O0m8AzsBdrwP8T_BqSZMPXnRLvI7T_Gxc,12004
|
|
6
6
|
kodit/config.py,sha256=3yh7hfLSILjZK_qJMhcExwRcrWJ0b5Eb1JjjOvMPJZo,4146
|
|
@@ -41,8 +41,8 @@ kodit/infrastructure/cloning/folder/__init__.py,sha256=w6ykrVtbYJlUDEXAjqgf6w2rM
|
|
|
41
41
|
kodit/infrastructure/cloning/folder/factory.py,sha256=vl1hwnYA7lczjotn2fahJQAt7IK96CSArx8cSaRFKeY,4242
|
|
42
42
|
kodit/infrastructure/cloning/folder/working_copy.py,sha256=FPhwzuPj40yGoYvwcm9VG8mv8MbJxwfby_N5JS-_daA,1154
|
|
43
43
|
kodit/infrastructure/cloning/git/__init__.py,sha256=20ePcp0qE6BuLsjsv4KYB1DzKhMIMsPXwEqIEZtjTJs,34
|
|
44
|
-
kodit/infrastructure/cloning/git/factory.py,sha256=
|
|
45
|
-
kodit/infrastructure/cloning/git/working_copy.py,sha256=
|
|
44
|
+
kodit/infrastructure/cloning/git/factory.py,sha256=cY0cxapp0NCvjMRpzesW_qRzbWbh-tMKIeAj0Eodyhw,5409
|
|
45
|
+
kodit/infrastructure/cloning/git/working_copy.py,sha256=IwXQ0Ta59ykVkrxAyhJk0ijOO6aaub7UI-bXFDyNT0k,1562
|
|
46
46
|
kodit/infrastructure/embedding/__init__.py,sha256=F-8nLlWAerYJ0MOIA4tbXHLan8bW5rRR84vzxx6tRKI,39
|
|
47
47
|
kodit/infrastructure/embedding/embedding_factory.py,sha256=1AypjhWJGxvLnZt1SEH_FHPk9P0Vkt9fXdSGzFPp2ow,3432
|
|
48
48
|
kodit/infrastructure/embedding/local_vector_search_repository.py,sha256=UO8A3Eb_djFVrWKKSukAo4u7k8djDD1SlOPHk2pP9ps,3921
|
|
@@ -59,7 +59,7 @@ kodit/infrastructure/enrichment/local_enrichment_provider.py,sha256=8CATNtgMHgBR
|
|
|
59
59
|
kodit/infrastructure/enrichment/null_enrichment_provider.py,sha256=5Ksyxl3qDLxUjmOeIdHZ0UAIULy7RcbLXJoT7_CNXoQ,775
|
|
60
60
|
kodit/infrastructure/enrichment/openai_enrichment_provider.py,sha256=fenq4HiJ2UkrzsE2D0A0qpmro38z9mKaIzKKU5v7hnY,3189
|
|
61
61
|
kodit/infrastructure/git/__init__.py,sha256=0iMosFzudj4_xNIMe2SRbV6l5bWqkjnUsZoFsoZFuM8,33
|
|
62
|
-
kodit/infrastructure/git/git_utils.py,sha256=
|
|
62
|
+
kodit/infrastructure/git/git_utils.py,sha256=2DH6cyTjDRwFfL5Bzt1y2w0DwHZNypbC6R0Gm_A3hhg,2476
|
|
63
63
|
kodit/infrastructure/ignore/__init__.py,sha256=VzFv8XOzHmsu0MEGnWVSF6KsgqLBmvHlRqAkT1Xb1MY,36
|
|
64
64
|
kodit/infrastructure/ignore/ignore_pattern_provider.py,sha256=9m2XCsgW87UBTfzHr6Z0Ns6WpzwkLir3zyBY3PwsgXk,2225
|
|
65
65
|
kodit/infrastructure/indexing/__init__.py,sha256=7UPRa2jwCAsa0Orsp6PqXSF8iIXJVzXHMFmrKkI9yH8,38
|
|
@@ -93,8 +93,8 @@ kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7h
|
|
|
93
93
|
kodit/migrations/versions/9e53ea8bb3b0_add_authors.py,sha256=a32Zm8KUQyiiLkjKNPYdaJDgjW6VsV-GhaLnPnK_fpI,3884
|
|
94
94
|
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
95
95
|
kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py,sha256=rI8LmjF-I2OMxZ2nOIF_NRmqOLXe45hL_iz_nx97DTQ,1680
|
|
96
|
-
kodit-0.2.
|
|
97
|
-
kodit-0.2.
|
|
98
|
-
kodit-0.2.
|
|
99
|
-
kodit-0.2.
|
|
100
|
-
kodit-0.2.
|
|
96
|
+
kodit-0.2.8.dist-info/METADATA,sha256=uxuFXAnAellsx3-Isah750l4jBAme9DT2imIzjjBmHg,5867
|
|
97
|
+
kodit-0.2.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
98
|
+
kodit-0.2.8.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
99
|
+
kodit-0.2.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
100
|
+
kodit-0.2.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|