kodit 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.6'
21
- __version_tuple__ = version_tuple = (0, 2, 6)
20
+ __version__ = version = '0.2.7'
21
+ __version_tuple__ = version_tuple = (0, 2, 7)
@@ -17,6 +17,7 @@ from kodit.infrastructure.cloning.metadata import (
17
17
  GitAuthorExtractor,
18
18
  GitFileMetadataExtractor,
19
19
  )
20
+ from kodit.infrastructure.git.git_utils import sanitize_git_url
20
21
  from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
21
22
 
22
23
 
@@ -46,28 +47,33 @@ class GitSourceFactory:
46
47
  progress_callback = NullProgressCallback()
47
48
 
48
49
  # Normalize the URI
49
- self.log.debug("Normalising git uri", uri=uri)
50
+ # Never log the raw URI in production
51
+ self.log.debug("Normalising git uri", uri="[REDACTED]" + uri[-4:])
50
52
  with tempfile.TemporaryDirectory() as temp_dir:
51
53
  git.Repo.clone_from(uri, temp_dir)
52
54
  remote = git.Repo(temp_dir).remote()
53
55
  uri = remote.url
54
56
 
57
+ # Sanitize the URI to remove any credentials
58
+ sanitized_uri = sanitize_git_url(uri)
59
+ self.log.debug("Sanitized git uri", sanitized_uri=sanitized_uri)
60
+
55
61
  # Check if source already exists
56
- self.log.debug("Checking if source already exists", uri=uri)
57
- source = await self.repository.get_by_uri(uri)
62
+ self.log.debug("Checking if source already exists", uri=sanitized_uri)
63
+ source = await self.repository.get_by_uri(sanitized_uri)
58
64
 
59
65
  if source:
60
66
  self.log.info("Source already exists, reusing...", source_id=source.id)
61
67
  return source
62
68
 
63
- # Prepare working copy
69
+ # Prepare working copy (use original URI for cloning, sanitized for storage)
64
70
  clone_path = await self.working_copy.prepare(uri)
65
71
 
66
72
  # Create source record
67
- self.log.debug("Creating source", uri=uri, clone_path=str(clone_path))
73
+ self.log.debug("Creating source", uri=sanitized_uri, clone_path=str(clone_path))
68
74
  source = await self.repository.save(
69
75
  Source(
70
- uri=uri,
76
+ uri=sanitized_uri,
71
77
  cloned_path=str(clone_path),
72
78
  source_type=SourceType.GIT,
73
79
  )
@@ -5,6 +5,8 @@ from pathlib import Path
5
5
  import git
6
6
  import structlog
7
7
 
8
+ from kodit.infrastructure.git.git_utils import sanitize_git_url
9
+
8
10
 
9
11
  class GitWorkingCopyProvider:
10
12
  """Working copy provider for git-based sources."""
@@ -16,17 +18,23 @@ class GitWorkingCopyProvider:
16
18
 
17
19
  async def prepare(self, uri: str) -> Path:
18
20
  """Prepare a Git working copy."""
19
- # Create a unique directory name for the clone
20
- clone_path = self.clone_dir / uri.replace("/", "_").replace(":", "_")
21
+ # Sanitize the URI for directory name to prevent credential leaks
22
+ sanitized_uri = sanitize_git_url(uri)
23
+
24
+ # Create a unique directory name for the clone using the sanitized URI
25
+ clone_path = self.clone_dir / sanitized_uri.replace("/", "_").replace(":", "_")
21
26
  clone_path.mkdir(parents=True, exist_ok=True)
22
27
 
23
28
  try:
24
- self.log.info("Cloning repository", uri=uri, clone_path=str(clone_path))
29
+ self.log.info(
30
+ "Cloning repository", uri=sanitized_uri, clone_path=str(clone_path)
31
+ )
32
+ # Use the original URI for cloning (with credentials if present)
25
33
  git.Repo.clone_from(uri, clone_path)
26
34
  except git.GitCommandError as e:
27
35
  if "already exists and is not an empty directory" not in str(e):
28
36
  msg = f"Failed to clone repository: {e}"
29
37
  raise ValueError(msg) from e
30
- self.log.info("Repository already exists, reusing...", uri=uri)
38
+ self.log.info("Repository already exists, reusing...", uri=sanitized_uri)
31
39
 
32
40
  return clone_path
@@ -1,6 +1,7 @@
1
1
  """Git utilities for infrastructure operations."""
2
2
 
3
3
  import tempfile
4
+ from urllib.parse import urlparse, urlunparse
4
5
 
5
6
  import git
6
7
 
@@ -22,3 +23,65 @@ def is_valid_clone_target(target: str) -> bool:
22
23
  return False
23
24
  else:
24
25
  return True
26
+
27
+
28
+ def sanitize_git_url(url: str) -> str:
29
+ """Remove credentials from a git URL while preserving the rest of the URL structure.
30
+
31
+ This function handles various git URL formats:
32
+ - HTTPS URLs with username:password@host
33
+ - HTTPS URLs with username@host (no password)
34
+ - SSH URLs (left unchanged)
35
+ - File URLs (left unchanged)
36
+
37
+ Args:
38
+ url: The git URL that may contain credentials.
39
+
40
+ Returns:
41
+ The sanitized URL with credentials removed.
42
+
43
+ Examples:
44
+ >>> sanitize_git_url("https://phil:token@dev.azure.com/org/project/_git/repo")
45
+ "https://dev.azure.com/org/project/_git/repo"
46
+ >>> sanitize_git_url("https://username@github.com/user/repo.git")
47
+ "https://github.com/user/repo.git"
48
+ >>> sanitize_git_url("git@github.com:user/repo.git")
49
+ "git@github.com:user/repo.git"
50
+
51
+ """
52
+ # Handle SSH URLs (they don't have credentials in the URL format)
53
+ if url.startswith(("git@", "ssh://")):
54
+ return url
55
+
56
+ # Handle file URLs
57
+ if url.startswith("file://"):
58
+ return url
59
+
60
+ try:
61
+ # Parse the URL
62
+ parsed = urlparse(url)
63
+
64
+ # If there are no credentials, return the URL as-is
65
+ if not parsed.username:
66
+ return url
67
+
68
+ # Reconstruct the URL without credentials
69
+ # Keep scheme, netloc (without username/password), path, params, query, fragment
70
+ sanitized_netloc = parsed.hostname
71
+ if parsed.port:
72
+ sanitized_netloc = f"{parsed.hostname}:{parsed.port}"
73
+
74
+ return urlunparse(
75
+ (
76
+ parsed.scheme,
77
+ sanitized_netloc,
78
+ parsed.path,
79
+ parsed.params,
80
+ parsed.query,
81
+ parsed.fragment,
82
+ )
83
+ )
84
+
85
+ except Exception: # noqa: BLE001
86
+ # If URL parsing fails, return the original URL
87
+ return url
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -1,6 +1,6 @@
1
1
  kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
2
2
  kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
3
- kodit/_version.py,sha256=nObnONsicQ3YX6SG5MVBxmIp5dmRacXDauSqZijWQbY,511
3
+ kodit/_version.py,sha256=Xk20v7uvkFqkpy9aLJzVngs1eKQn0FYUP2oyA1MEQUU,511
4
4
  kodit/app.py,sha256=qKBWJ0VNSY_M6G3VFfAQ0133q5bnS99cUFD0p396taw,1032
5
5
  kodit/cli.py,sha256=JnhTlG1s04O0m8AzsBdrwP8T_BqSZMPXnRLvI7T_Gxc,12004
6
6
  kodit/config.py,sha256=3yh7hfLSILjZK_qJMhcExwRcrWJ0b5Eb1JjjOvMPJZo,4146
@@ -41,8 +41,8 @@ kodit/infrastructure/cloning/folder/__init__.py,sha256=w6ykrVtbYJlUDEXAjqgf6w2rM
41
41
  kodit/infrastructure/cloning/folder/factory.py,sha256=vl1hwnYA7lczjotn2fahJQAt7IK96CSArx8cSaRFKeY,4242
42
42
  kodit/infrastructure/cloning/folder/working_copy.py,sha256=FPhwzuPj40yGoYvwcm9VG8mv8MbJxwfby_N5JS-_daA,1154
43
43
  kodit/infrastructure/cloning/git/__init__.py,sha256=20ePcp0qE6BuLsjsv4KYB1DzKhMIMsPXwEqIEZtjTJs,34
44
- kodit/infrastructure/cloning/git/factory.py,sha256=1f0TKM9R_65WUMEhoJwBKTeo9xNYhm6VtscY7SqD6yU,5012
45
- kodit/infrastructure/cloning/git/working_copy.py,sha256=DMW_p7WWGoSeyDI9g55ItwsRomZSotXWRrlopqwszaQ,1115
44
+ kodit/infrastructure/cloning/git/factory.py,sha256=cY0cxapp0NCvjMRpzesW_qRzbWbh-tMKIeAj0Eodyhw,5409
45
+ kodit/infrastructure/cloning/git/working_copy.py,sha256=r_Uu6NYhRQLAQce6k4KThXLtGPqkzr6CgWx7AJ89gN4,1462
46
46
  kodit/infrastructure/embedding/__init__.py,sha256=F-8nLlWAerYJ0MOIA4tbXHLan8bW5rRR84vzxx6tRKI,39
47
47
  kodit/infrastructure/embedding/embedding_factory.py,sha256=1AypjhWJGxvLnZt1SEH_FHPk9P0Vkt9fXdSGzFPp2ow,3432
48
48
  kodit/infrastructure/embedding/local_vector_search_repository.py,sha256=UO8A3Eb_djFVrWKKSukAo4u7k8djDD1SlOPHk2pP9ps,3921
@@ -59,7 +59,7 @@ kodit/infrastructure/enrichment/local_enrichment_provider.py,sha256=8CATNtgMHgBR
59
59
  kodit/infrastructure/enrichment/null_enrichment_provider.py,sha256=5Ksyxl3qDLxUjmOeIdHZ0UAIULy7RcbLXJoT7_CNXoQ,775
60
60
  kodit/infrastructure/enrichment/openai_enrichment_provider.py,sha256=fenq4HiJ2UkrzsE2D0A0qpmro38z9mKaIzKKU5v7hnY,3189
61
61
  kodit/infrastructure/git/__init__.py,sha256=0iMosFzudj4_xNIMe2SRbV6l5bWqkjnUsZoFsoZFuM8,33
62
- kodit/infrastructure/git/git_utils.py,sha256=lOujEx41UuWfYSnFWbY4HC2tK5utytyzNkW1e5IPCr0,543
62
+ kodit/infrastructure/git/git_utils.py,sha256=2DH6cyTjDRwFfL5Bzt1y2w0DwHZNypbC6R0Gm_A3hhg,2476
63
63
  kodit/infrastructure/ignore/__init__.py,sha256=VzFv8XOzHmsu0MEGnWVSF6KsgqLBmvHlRqAkT1Xb1MY,36
64
64
  kodit/infrastructure/ignore/ignore_pattern_provider.py,sha256=9m2XCsgW87UBTfzHr6Z0Ns6WpzwkLir3zyBY3PwsgXk,2225
65
65
  kodit/infrastructure/indexing/__init__.py,sha256=7UPRa2jwCAsa0Orsp6PqXSF8iIXJVzXHMFmrKkI9yH8,38
@@ -93,8 +93,8 @@ kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7h
93
93
  kodit/migrations/versions/9e53ea8bb3b0_add_authors.py,sha256=a32Zm8KUQyiiLkjKNPYdaJDgjW6VsV-GhaLnPnK_fpI,3884
94
94
  kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
95
95
  kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py,sha256=rI8LmjF-I2OMxZ2nOIF_NRmqOLXe45hL_iz_nx97DTQ,1680
96
- kodit-0.2.6.dist-info/METADATA,sha256=q9yeLcPnH31fKZe5i5Bzw30NDuJge6NsCJDVToc0-OE,5867
97
- kodit-0.2.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
98
- kodit-0.2.6.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
99
- kodit-0.2.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
100
- kodit-0.2.6.dist-info/RECORD,,
96
+ kodit-0.2.7.dist-info/METADATA,sha256=G5rdRgHtm6V-p1tt0h7g7pZJKnQ5mAnCgDXVFRJG7Fg,5867
97
+ kodit-0.2.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
98
+ kodit-0.2.7.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
99
+ kodit-0.2.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
100
+ kodit-0.2.7.dist-info/RECORD,,
File without changes