gitnexus 1.6.4-rc.58 → 1.6.4-rc.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -744,16 +744,28 @@ export const createServer = async (port, host = '127.0.0.1') => {
744
744
  // 1. Delete the .gitnexus index/storage directory
745
745
  const storagePath = getStoragePath(entry.path);
746
746
  await fs.rm(storagePath, { recursive: true, force: true }).catch(() => { });
747
- // 2. Delete the cloned repo dir if it lives under ~/.gitnexus/repos/
748
- const cloneDir = getCloneDir(entry.name);
747
+ // 2. Delete the cloned repo dir if it lives under ~/.gitnexus/repos/.
748
+ // getCloneDir now throws on names that are not filesystem-safe (e.g.
749
+ // local repos registered with names like "my project" or "org/repo").
750
+ // Such repos legitimately have no clone dir, so treat the rejection as
751
+ // "nothing to clean up" rather than letting it fail the delete handler.
752
+ let cloneDir = null;
749
753
  try {
750
- const stat = await fs.stat(cloneDir);
751
- if (stat.isDirectory()) {
752
- await fs.rm(cloneDir, { recursive: true, force: true });
753
- }
754
+ cloneDir = getCloneDir(entry.name);
754
755
  }
755
756
  catch {
756
- /* clone dir may not exist (local repos) */
757
+ /* repo name not eligible for a clone dir (local repo) */
758
+ }
759
+ if (cloneDir) {
760
+ try {
761
+ const stat = await fs.stat(cloneDir);
762
+ if (stat.isDirectory()) {
763
+ await fs.rm(cloneDir, { recursive: true, force: true });
764
+ }
765
+ }
766
+ catch {
767
+ /* clone dir may not exist */
768
+ }
757
769
  }
758
770
  // 3. Unregister from the global registry
759
771
  const { unregisterRepo } = await import('../storage/repo-manager.js');
@@ -4,7 +4,13 @@
4
4
  * Shallow-clones repositories into ~/.gitnexus/repos/{name}/.
5
5
  * If already cloned, does git pull instead.
6
6
  */
7
- /** Extract the repository name from a git URL (HTTPS or SSH). */
7
+ /**
8
+ * Extract the repository name from a git URL (HTTPS or SSH).
9
+ *
10
+ * Throws if the URL does not yield a filesystem-safe last segment. A name
11
+ * like `..` or `foo/bar` would otherwise let `getCloneDir(name)` escape the
12
+ * clone root via path traversal.
13
+ */
8
14
  export declare function extractRepoName(url: string): string;
9
15
  /** Get the clone target directory for a repo name. */
10
16
  export declare function getCloneDir(repoName: string): string;
@@ -18,9 +24,76 @@ export interface CloneProgress {
18
24
  phase: 'cloning' | 'pulling';
19
25
  message: string;
20
26
  }
27
+ /**
28
+ * Build the `git clone` argument list for a given URL and target directory.
29
+ *
30
+ * The `--` separator is non-negotiable: it stops git from parsing a URL that
31
+ * starts with `--` (e.g. `--upload-pack=evil`) as an option flag, which would
32
+ * otherwise execute an attacker-chosen subprocess (CodeQL
33
+ * js/second-order-command-line-injection, alerts #166/#167).
34
+ *
35
+ * Exported so the separator placement is testable without mocking spawn.
36
+ */
37
+ export declare function buildCloneArgs(url: string, targetDir: string): string[];
38
+ /**
39
+ * Normalize a git URL into a comparable form.
40
+ *
41
+ * Two URLs are considered the same repository when their normalized forms
42
+ * are identical: lowercased hostname, no trailing `.git`, no trailing
43
+ * slashes on the path, default port stripped. Path comparison stays
44
+ * case-sensitive because that's how Git hosts treat the path component on
45
+ * the wire (case-folding GitHub's web UI is a separate convenience).
46
+ *
47
+ * Returns the original input if URL parsing fails — the caller can still
48
+ * compare with the literal string for non-URL forms (e.g. SSH `git@host:`).
49
+ */
50
+ export declare function normalizeGitUrlForCompare(url: string): string;
51
+ /**
52
+ * Read `remote.origin.url` from an existing clone using `git config --get`.
53
+ *
54
+ * Returns `null` if the config key is absent, the spawn fails, or the
55
+ * directory isn't a git repository. The caller decides what a missing
56
+ * remote means for its threat model — for cloneOrPull, a missing remote
57
+ * on an existing clone is treated as a refuse-to-pull condition.
58
+ */
59
+ export declare function getRemoteOriginUrl(cwd: string): Promise<string | null>;
60
+ /**
61
+ * Verify that an existing clone's `remote.origin.url` matches the requested
62
+ * URL (after normalization). Throws on mismatch or missing remote.
63
+ *
64
+ * Closes the wrong-repo silent-analysis vector that Codex's adversarial
65
+ * review on PR #1325 surfaced: clone dirs are keyed by URL basename, so a
66
+ * request for `https://gitlab.example/attacker/repo.git` would otherwise
67
+ * collide with an existing `~/.gitnexus/repos/repo` cloned from a different
68
+ * origin and `git pull --ff-only` would silently succeed against the wrong
69
+ * remote.
70
+ *
71
+ * Exported so the comparison logic is testable in isolation against any
72
+ * tmpdir-based fixture, without needing to populate CLONE_ROOT.
73
+ */
74
+ export declare function assertRemoteMatchesRequestedUrl(targetDir: string, requestedUrl: string): Promise<void>;
21
75
  /**
22
76
  * Clone or pull a git repository.
23
77
  * If targetDir doesn't exist: git clone --depth 1
24
- * If targetDir exists with .git: git pull --ff-only
78
+ * If targetDir exists with .git: git pull --ff-only (after verifying the
79
+ * existing clone's remote.origin matches the requested URL).
80
+ *
81
+ * Security:
82
+ * - targetDir must resolve inside CLONE_ROOT (~/.gitnexus/repos/). The
83
+ * path.relative containment barrier below is the inline canonical idiom
84
+ * CodeQL's js/path-injection sanitizer recognizes.
85
+ * - validateGitUrl runs unconditionally on the requested URL — both the
86
+ * clone path and the pull path. An earlier shape only validated on the
87
+ * clone branch; an existing clone with the same basename let an
88
+ * attacker's URL skip the SSRF / scheme / private-IP checks (Codex
89
+ * adversarial review on PR #1325).
90
+ * - When the target already has `.git`, the existing clone's
91
+ * remote.origin.url is fetched and compared (normalized) to the
92
+ * requested URL. Refuses to pull if they differ — this closes the
93
+ * wrong-repo silent-analysis vector where two URLs sharing a basename
94
+ * would collide on the same on-disk clone dir.
95
+ * - The git URL is passed after a `--` separator so a value beginning with
96
+ * `--` (e.g. `--upload-pack=evil`) cannot be interpreted as a git option
97
+ * (CodeQL js/second-order-command-line-injection).
25
98
  */
26
99
  export declare function cloneOrPull(url: string, targetDir: string, onProgress?: (progress: CloneProgress) => void): Promise<string>;
@@ -9,15 +9,43 @@ import path from 'path';
9
9
  import os from 'os';
10
10
  import fs from 'fs/promises';
11
11
  import { isIP } from 'net';
12
- /** Extract the repository name from a git URL (HTTPS or SSH). */
12
+ /** Root directory for all cloned repositories. Targets must resolve inside this. */
13
+ const CLONE_ROOT = path.resolve(path.join(os.homedir(), '.gitnexus', 'repos'));
14
+ // A valid git repository name is filesystem-safe: alphanumerics plus `. _ -`.
15
+ // Rejecting anything else (including `..`, `/`, `\`, shell metacharacters)
16
+ // guarantees getCloneDir(repoName) cannot escape CLONE_ROOT regardless of
17
+ // how the caller derived repoName.
18
+ const REPO_NAME_PATTERN = /^[a-zA-Z0-9._-]+$/;
19
+ /**
20
+ * Extract the repository name from a git URL (HTTPS or SSH).
21
+ *
22
+ * Throws if the URL does not yield a filesystem-safe last segment. A name
23
+ * like `..` or `foo/bar` would otherwise let `getCloneDir(name)` escape the
24
+ * clone root via path traversal.
25
+ */
13
26
  export function extractRepoName(url) {
14
- const cleaned = url.replace(/\/+$/, '');
15
- const lastSegment = cleaned.split(/[/:]/).pop() || 'unknown';
16
- return lastSegment.replace(/\.git$/, '');
27
+ // Strip trailing slashes without a regex to avoid polynomial-ReDoS on
28
+ // pathological inputs like `https://x.com/y` + '/'.repeat(1e6). CodeQL's
29
+ // js/polynomial-redos flagged `/\/+$/` here.
30
+ let end = url.length;
31
+ while (end > 0 && url.charCodeAt(end - 1) === 47 /* '/' */)
32
+ end--;
33
+ const cleaned = url.slice(0, end);
34
+ const lastSegment = cleaned.split(/[/:]/).pop() || '';
35
+ const stripped = lastSegment.endsWith('.git') ? lastSegment.slice(0, -4) : lastSegment;
36
+ if (!stripped || stripped === '.' || stripped === '..' || !REPO_NAME_PATTERN.test(stripped)) {
37
+ throw new Error('Could not extract a valid repository name from URL');
38
+ }
39
+ return stripped;
17
40
  }
18
41
  /** Get the clone target directory for a repo name. */
19
42
  export function getCloneDir(repoName) {
20
- return path.join(os.homedir(), '.gitnexus', 'repos', repoName);
43
+ // Re-validate at the boundary even though extractRepoName already checked —
44
+ // callers may pass a repoName from another source (test fixtures, scripts).
45
+ if (!repoName || repoName === '.' || repoName === '..' || !REPO_NAME_PATTERN.test(repoName)) {
46
+ throw new Error('Invalid repository name');
47
+ }
48
+ return path.join(CLONE_ROOT, repoName);
21
49
  }
22
50
  // Cloud metadata hostnames that must never be reachable via user-supplied URLs
23
51
  const BLOCKED_HOSTNAMES = new Set([
@@ -161,24 +189,181 @@ function assertNotPrivateIPv4(ip) {
161
189
  throw new Error('Cloning from private/internal addresses is not allowed');
162
190
  }
163
191
  }
192
+ /**
193
+ * Build the `git clone` argument list for a given URL and target directory.
194
+ *
195
+ * The `--` separator is non-negotiable: it stops git from parsing a URL that
196
+ * starts with `--` (e.g. `--upload-pack=evil`) as an option flag, which would
197
+ * otherwise execute an attacker-chosen subprocess (CodeQL
198
+ * js/second-order-command-line-injection, alerts #166/#167).
199
+ *
200
+ * Exported so the separator placement is testable without mocking spawn.
201
+ */
202
+ export function buildCloneArgs(url, targetDir) {
203
+ return ['clone', '--depth', '1', '--', url, targetDir];
204
+ }
205
+ /**
206
+ * Normalize a git URL into a comparable form.
207
+ *
208
+ * Two URLs are considered the same repository when their normalized forms
209
+ * are identical: lowercased hostname, no trailing `.git`, no trailing
210
+ * slashes on the path, default port stripped. Path comparison stays
211
+ * case-sensitive because that's how Git hosts treat the path component on
212
+ * the wire (case-folding GitHub's web UI is a separate convenience).
213
+ *
214
+ * Returns the original input if URL parsing fails — the caller can still
215
+ * compare with the literal string for non-URL forms (e.g. SSH `git@host:`).
216
+ */
217
+ export function normalizeGitUrlForCompare(url) {
218
+ // Strip trailing slashes and a trailing `.git` for both URL and SSH forms.
219
+ let trimmed = url;
220
+ while (trimmed.length > 0 && trimmed[trimmed.length - 1] === '/') {
221
+ trimmed = trimmed.slice(0, -1);
222
+ }
223
+ if (trimmed.endsWith('.git'))
224
+ trimmed = trimmed.slice(0, -4);
225
+ try {
226
+ const parsed = new URL(trimmed);
227
+ parsed.hostname = parsed.hostname.toLowerCase();
228
+ // strip default ports
229
+ if ((parsed.protocol === 'https:' && parsed.port === '443') ||
230
+ (parsed.protocol === 'http:' && parsed.port === '80')) {
231
+ parsed.port = '';
232
+ }
233
+ // Strip credentials — never material to repo identity, and including
234
+ // them would let two equivalent URLs (with/without basic auth) compare
235
+ // unequal.
236
+ parsed.username = '';
237
+ parsed.password = '';
238
+ // Recompose without trailing slash on the path.
239
+ let pathname = parsed.pathname;
240
+ while (pathname.length > 1 && pathname[pathname.length - 1] === '/') {
241
+ pathname = pathname.slice(0, -1);
242
+ }
243
+ parsed.pathname = pathname;
244
+ return `${parsed.protocol}//${parsed.hostname}${parsed.port ? ':' + parsed.port : ''}${parsed.pathname}`;
245
+ }
246
+ catch {
247
+ // Non-URL forms (e.g. `git@github.com:owner/repo`) — return the trimmed
248
+ // form lowercased on the hostname-ish prefix. SSH-form normalization
249
+ // is best-effort; exact-string compare is sufficient for the threat
250
+ // model (mismatched origins still differ at the literal level).
251
+ return trimmed.toLowerCase();
252
+ }
253
+ }
254
+ /**
255
+ * Read `remote.origin.url` from an existing clone using `git config --get`.
256
+ *
257
+ * Returns `null` if the config key is absent, the spawn fails, or the
258
+ * directory isn't a git repository. The caller decides what a missing
259
+ * remote means for its threat model — for cloneOrPull, a missing remote
260
+ * on an existing clone is treated as a refuse-to-pull condition.
261
+ */
262
+ export function getRemoteOriginUrl(cwd) {
263
+ return new Promise((resolve) => {
264
+ const proc = spawn('git', ['config', '--get', 'remote.origin.url'], {
265
+ cwd,
266
+ stdio: ['ignore', 'pipe', 'pipe'],
267
+ env: { ...process.env, GIT_TERMINAL_PROMPT: '0' },
268
+ });
269
+ let stdout = '';
270
+ proc.stdout.on('data', (chunk) => {
271
+ stdout += chunk;
272
+ });
273
+ proc.on('close', (code) => {
274
+ if (code === 0 && stdout.trim()) {
275
+ resolve(stdout.trim());
276
+ }
277
+ else {
278
+ resolve(null);
279
+ }
280
+ });
281
+ proc.on('error', () => resolve(null));
282
+ });
283
+ }
284
+ /**
285
+ * Verify that an existing clone's `remote.origin.url` matches the requested
286
+ * URL (after normalization). Throws on mismatch or missing remote.
287
+ *
288
+ * Closes the wrong-repo silent-analysis vector that Codex's adversarial
289
+ * review on PR #1325 surfaced: clone dirs are keyed by URL basename, so a
290
+ * request for `https://gitlab.example/attacker/repo.git` would otherwise
291
+ * collide with an existing `~/.gitnexus/repos/repo` cloned from a different
292
+ * origin and `git pull --ff-only` would silently succeed against the wrong
293
+ * remote.
294
+ *
295
+ * Exported so the comparison logic is testable in isolation against any
296
+ * tmpdir-based fixture, without needing to populate CLONE_ROOT.
297
+ */
298
+ export async function assertRemoteMatchesRequestedUrl(targetDir, requestedUrl) {
299
+ const remoteUrl = await getRemoteOriginUrl(targetDir);
300
+ if (remoteUrl === null) {
301
+ throw new Error(`Existing clone at ${targetDir} has no remote.origin — refusing to pull`);
302
+ }
303
+ if (normalizeGitUrlForCompare(remoteUrl) !== normalizeGitUrlForCompare(requestedUrl)) {
304
+ throw new Error(`Existing clone at ${targetDir} has remote ${remoteUrl}, not the requested URL ${requestedUrl}`);
305
+ }
306
+ }
164
307
  /**
165
308
  * Clone or pull a git repository.
166
309
  * If targetDir doesn't exist: git clone --depth 1
167
- * If targetDir exists with .git: git pull --ff-only
310
+ * If targetDir exists with .git: git pull --ff-only (after verifying the
311
+ * existing clone's remote.origin matches the requested URL).
312
+ *
313
+ * Security:
314
+ * - targetDir must resolve inside CLONE_ROOT (~/.gitnexus/repos/). The
315
+ * path.relative containment barrier below is the inline canonical idiom
316
+ * CodeQL's js/path-injection sanitizer recognizes.
317
+ * - validateGitUrl runs unconditionally on the requested URL — both the
318
+ * clone path and the pull path. An earlier shape only validated on the
319
+ * clone branch; an existing clone with the same basename let an
320
+ * attacker's URL skip the SSRF / scheme / private-IP checks (Codex
321
+ * adversarial review on PR #1325).
322
+ * - When the target already has `.git`, the existing clone's
323
+ * remote.origin.url is fetched and compared (normalized) to the
324
+ * requested URL. Refuses to pull if they differ — this closes the
325
+ * wrong-repo silent-analysis vector where two URLs sharing a basename
326
+ * would collide on the same on-disk clone dir.
327
+ * - The git URL is passed after a `--` separator so a value beginning with
328
+ * `--` (e.g. `--upload-pack=evil`) cannot be interpreted as a git option
329
+ * (CodeQL js/second-order-command-line-injection).
168
330
  */
169
331
  export async function cloneOrPull(url, targetDir, onProgress) {
170
- const exists = await fs.access(path.join(targetDir, '.git')).then(() => true, () => false);
332
+ // Containment barrier inline with the canonical path.relative idiom so
333
+ // CodeQL recognizes the sanitizer at every following filesystem and
334
+ // subprocess sink. The same `safeTarget` is used for every downstream
335
+ // path operation — no reassignment that the analyzer could lose track of.
336
+ //
337
+ // Limitation: this is a lexical containment check, not a realpath check.
338
+ // If an attacker can place a symlink under CLONE_ROOT pointing outside it,
339
+ // the lexical check passes but the clone lands at the symlink target. That
340
+ // requires pre-existing local write access to CLONE_ROOT, so the threat
341
+ // model considers it out of scope; CodeQL js/path-injection accepts the
342
+ // lexical form. Tracked as a follow-up if defense-in-depth is needed.
343
+ const safeTarget = path.resolve(targetDir);
344
+ const rel = path.relative(CLONE_ROOT, safeTarget);
345
+ if (rel === '' || rel.startsWith('..') || path.isAbsolute(rel)) {
346
+ throw new Error(`Clone target must be a subdirectory of ${CLONE_ROOT}`);
347
+ }
348
+ // Always validate the requested URL — the prior shape only ran this in
349
+ // the clone branch, leaving the pull branch as an SSRF / blocked-host
350
+ // bypass when an existing clone shared the basename of an attacker URL.
351
+ validateGitUrl(url);
352
+ const exists = await fs.access(path.join(safeTarget, '.git')).then(() => true, () => false);
171
353
  if (exists) {
354
+ // Confirm the existing clone is actually the same repository the caller
355
+ // requested. Without this check, a pull would silently succeed against
356
+ // whatever remote the dir was originally cloned from.
357
+ await assertRemoteMatchesRequestedUrl(safeTarget, url);
172
358
  onProgress?.({ phase: 'pulling', message: 'Pulling latest changes...' });
173
- await runGit(['pull', '--ff-only'], targetDir);
359
+ await runGit(['pull', '--ff-only'], safeTarget);
174
360
  }
175
361
  else {
176
- validateGitUrl(url);
177
- await fs.mkdir(path.dirname(targetDir), { recursive: true });
362
+ await fs.mkdir(path.dirname(safeTarget), { recursive: true });
178
363
  onProgress?.({ phase: 'cloning', message: `Cloning ${url}...` });
179
- await runGit(['clone', '--depth', '1', url, targetDir]);
364
+ await runGit(buildCloneArgs(url, safeTarget));
180
365
  }
181
- return targetDir;
366
+ return safeTarget;
182
367
  }
183
368
  function runGit(args, cwd) {
184
369
  return new Promise((resolve, reject) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.4-rc.58",
3
+ "version": "1.6.4-rc.59",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",