@ictechgy/context-guard 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,9 +15,12 @@ import hashlib
15
15
  import ipaddress
16
16
  import json
17
17
  import math
18
+ import os
18
19
  import re
20
+ import secrets
19
21
  import shlex
20
22
  from pathlib import Path
23
+ import stat
21
24
  import sys
22
25
  from typing import Any, NoReturn
23
26
  import unicodedata
@@ -26,6 +29,7 @@ from urllib.parse import urlparse
26
29
  TOOL_NAME = "context-guard-experiments"
27
30
  CONFIG_SCHEMA_VERSION = "contextguard.experiments.v1"
28
31
  DEFAULT_CONFIG = Path(".context-guard") / "experiments.json"
32
+ MAX_CONFIG_BYTES = 64_000
29
33
  MAX_CONTEXT_DIFF_INPUT_BYTES = 256_000
30
34
  MAX_VISUAL_OCR_TEXT_BYTES = 64_000
31
35
  MAX_LEARNED_COMPRESSION_INPUT_BYTES = 128_000
@@ -49,6 +53,17 @@ LOCAL_PROXY_DEFAULT_BIND_PORT = 0
49
53
  LOCAL_PROXY_DEFAULT_TARGET_HOST = "127.0.0.1"
50
54
  LOCAL_PROXY_DEFAULT_TARGET_PORT = 0
51
55
  LOCAL_PROXY_LOCALHOST_NAMES = {"localhost"}
56
+ ALLOWED_FIRST_COMPONENT_SYMLINKS = {
57
+ "tmp": Path("/private/tmp"),
58
+ "var": Path("/private/var"),
59
+ }
60
+ DIR_FD_OPEN_SUPPORTED = os.open in getattr(os, "supports_dir_fd", set())
61
+ DIR_FD_MKDIR_SUPPORTED = os.mkdir in getattr(os, "supports_dir_fd", set())
62
+ DIR_FD_STAT_NOFOLLOW_SUPPORTED = (
63
+ os.stat in getattr(os, "supports_dir_fd", set())
64
+ and os.stat in getattr(os, "supports_follow_symlinks", set())
65
+ )
66
+ NO_FOLLOW_SUPPORTED = hasattr(os, "O_NOFOLLOW")
52
67
 
53
68
 
54
69
  @dataclass(frozen=True)
@@ -276,6 +291,306 @@ def fail(message: str, code: int = 2) -> NoReturn:
276
291
  raise SystemExit(code)
277
292
 
278
293
 
294
+ def os_error_detail(exc: OSError) -> str:
295
+ detail = exc.strerror or exc.__class__.__name__
296
+ if exc.errno is not None:
297
+ return f"{detail} (errno {exc.errno})"
298
+ return detail
299
+
300
+
301
+ def _no_follow_flag(*, label: str) -> int:
302
+ if not NO_FOLLOW_SUPPORTED:
303
+ raise RegistryError(f"{label} requires O_NOFOLLOW support")
304
+ return os.O_NOFOLLOW
305
+
306
+
307
+ def _directory_open_flags(*, follow_final: bool = False, label: str) -> int:
308
+ flags = os.O_RDONLY
309
+ if hasattr(os, "O_CLOEXEC"):
310
+ flags |= os.O_CLOEXEC
311
+ if hasattr(os, "O_DIRECTORY"):
312
+ flags |= os.O_DIRECTORY
313
+ if not follow_final:
314
+ flags |= _no_follow_flag(label=label)
315
+ return flags
316
+
317
+
318
+ def _file_open_flags(*, label: str, write: bool = False) -> int:
319
+ flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC if write else os.O_RDONLY
320
+ flags |= _no_follow_flag(label=label)
321
+ if hasattr(os, "O_CLOEXEC"):
322
+ flags |= os.O_CLOEXEC
323
+ if hasattr(os, "O_NONBLOCK"):
324
+ flags |= os.O_NONBLOCK
325
+ if hasattr(os, "O_NOCTTY"):
326
+ flags |= os.O_NOCTTY
327
+ return flags
328
+
329
+
330
+ def _temp_file_open_flags(*, label: str) -> int:
331
+ flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
332
+ flags |= _no_follow_flag(label=label)
333
+ if hasattr(os, "O_CLOEXEC"):
334
+ flags |= os.O_CLOEXEC
335
+ if hasattr(os, "O_NOCTTY"):
336
+ flags |= os.O_NOCTTY
337
+ return flags
338
+
339
+
340
+ def _leaf_name(path: Path, *, label: str) -> str:
341
+ name = path.name
342
+ if name in {"", ".", ".."}:
343
+ raise RegistryError(f"{label} must name a regular file")
344
+ return name
345
+
346
+
347
+ def _normalized_link_target(anchor: Path, raw_target: str) -> Path:
348
+ target = Path(raw_target)
349
+ if target.is_absolute():
350
+ return Path(os.path.normpath(str(target)))
351
+ return Path(os.path.normpath(str(anchor / target)))
352
+
353
+
354
+ def normalize_allowed_first_absolute_symlink(path: Path) -> Path:
355
+ if not path.is_absolute():
356
+ return path
357
+ parts = path.parts
358
+ if len(parts) < 2:
359
+ return path
360
+ first = parts[1]
361
+ expected = ALLOWED_FIRST_COMPONENT_SYMLINKS.get(first)
362
+ if expected is None:
363
+ return path
364
+ link = Path(path.anchor) / first
365
+ try:
366
+ if link.is_symlink() and _normalized_link_target(Path(path.anchor), os.readlink(link)) == expected:
367
+ return expected.joinpath(*parts[2:])
368
+ except OSError:
369
+ return path
370
+ return path
371
+
372
+
373
+ def normalize_local_path(path: Path) -> Path:
374
+ path = path.expanduser()
375
+ if not path.is_absolute():
376
+ path = Path.cwd() / path
377
+ return normalize_allowed_first_absolute_symlink(Path(os.path.normpath(str(path))))
378
+
379
+
380
+ def normalize_project_path(root: Path, candidate: Path, *, label: str) -> Path:
381
+ candidate = candidate.expanduser()
382
+ if not candidate.is_absolute():
383
+ candidate = root / candidate
384
+ normalized = normalize_allowed_first_absolute_symlink(Path(os.path.normpath(str(candidate))))
385
+ try:
386
+ normalized.relative_to(root)
387
+ except ValueError as exc:
388
+ raise RegistryError(f"{label} must stay inside project root: {normalized}") from exc
389
+ return normalized
390
+
391
+
392
+ def open_directory_no_follow(path: Path, *, label: str, create: bool = False, missing_ok: bool = False) -> int | None:
393
+ path = normalize_allowed_first_absolute_symlink(path)
394
+ if not DIR_FD_OPEN_SUPPORTED:
395
+ raise RegistryError(f"{label} requires dir_fd open support")
396
+ if create and not DIR_FD_MKDIR_SUPPORTED:
397
+ raise RegistryError(f"{label} requires dir_fd mkdir support")
398
+ flags = _directory_open_flags(label=label)
399
+ if path.is_absolute():
400
+ anchor = path.anchor or os.sep
401
+ parts = path.parts[1:]
402
+ try:
403
+ current_fd = os.open(anchor, _directory_open_flags(follow_final=True, label=label))
404
+ except OSError as exc:
405
+ raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
406
+ else:
407
+ parts = path.parts
408
+ try:
409
+ current_fd = os.open(".", flags)
410
+ except OSError as exc:
411
+ raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
412
+ try:
413
+ for part in parts:
414
+ if part in {"", "."}:
415
+ continue
416
+ if part == "..":
417
+ raise RegistryError(f"{label} must not contain parent traversal")
418
+ next_fd = -1
419
+ try:
420
+ next_fd = os.open(part, flags, dir_fd=current_fd)
421
+ except FileNotFoundError:
422
+ if missing_ok:
423
+ os.close(current_fd)
424
+ current_fd = -1
425
+ return None
426
+ if not create:
427
+ raise RegistryError(f"could not inspect {label}: missing directory component") from None
428
+ try:
429
+ os.mkdir(part, mode=0o755, dir_fd=current_fd)
430
+ except FileExistsError:
431
+ pass
432
+ except OSError as exc:
433
+ raise RegistryError(f"could not create {label}: {os_error_detail(exc)}") from exc
434
+ try:
435
+ next_fd = os.open(part, flags, dir_fd=current_fd)
436
+ except OSError as exc:
437
+ raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
438
+ except OSError as exc:
439
+ raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
440
+ try:
441
+ if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
442
+ raise RegistryError(f"{label} must not traverse non-directory components")
443
+ except Exception:
444
+ if next_fd >= 0:
445
+ try:
446
+ os.close(next_fd)
447
+ except OSError:
448
+ pass
449
+ raise
450
+ try:
451
+ os.close(current_fd)
452
+ except OSError:
453
+ pass
454
+ current_fd = next_fd
455
+ owned_fd = current_fd
456
+ current_fd = -1
457
+ return owned_fd
458
+ finally:
459
+ if current_fd >= 0:
460
+ try:
461
+ os.close(current_fd)
462
+ except OSError:
463
+ pass
464
+
465
+
466
+ def _precheck_regular_leaf(parent_fd: int, leaf_name: str, *, label: str, missing_ok: bool = False) -> bool:
467
+ if not DIR_FD_STAT_NOFOLLOW_SUPPORTED:
468
+ raise RegistryError(f"{label} requires dir_fd stat support")
469
+ try:
470
+ st = os.stat(leaf_name, dir_fd=parent_fd, follow_symlinks=False)
471
+ except FileNotFoundError:
472
+ if missing_ok:
473
+ return False
474
+ raise RegistryError(f"could not inspect {label}: missing file") from None
475
+ except OSError as exc:
476
+ raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
477
+ if not stat.S_ISREG(st.st_mode):
478
+ raise RegistryError(f"{label} must be a regular file")
479
+ return True
480
+
481
+
482
+ def read_bounded_regular_file(path: Path, *, max_bytes: int, label: str, missing_ok: bool = False) -> tuple[bytes, bool] | None:
483
+ path = normalize_local_path(path)
484
+ parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", missing_ok=missing_ok)
485
+ if parent_fd is None:
486
+ return None
487
+ fd = -1
488
+ try:
489
+ leaf = _leaf_name(path, label=label)
490
+ exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=missing_ok)
491
+ if not exists:
492
+ return None
493
+ fd = os.open(leaf, _file_open_flags(label=label), dir_fd=parent_fd)
494
+ if not stat.S_ISREG(os.fstat(fd).st_mode):
495
+ raise RegistryError(f"{label} must be a regular file")
496
+ chunks: list[bytes] = []
497
+ remaining = max_bytes + 1
498
+ while remaining > 0:
499
+ chunk = os.read(fd, min(64 * 1024, remaining))
500
+ if not chunk:
501
+ break
502
+ chunks.append(chunk)
503
+ remaining -= len(chunk)
504
+ raw = b"".join(chunks)
505
+ truncated = len(raw) > max_bytes
506
+ return raw[:max_bytes], truncated
507
+ except OSError as exc:
508
+ raise RegistryError(f"could not read {label}: {os_error_detail(exc)}") from exc
509
+ finally:
510
+ if fd >= 0:
511
+ try:
512
+ os.close(fd)
513
+ except OSError:
514
+ pass
515
+ try:
516
+ os.close(parent_fd)
517
+ except OSError:
518
+ pass
519
+
520
+
521
+ def write_all_fd(fd: int, data: bytes) -> None:
522
+ view = memoryview(data)
523
+ offset = 0
524
+ while offset < len(view):
525
+ written = os.write(fd, view[offset:])
526
+ if written <= 0:
527
+ raise OSError("short write")
528
+ offset += written
529
+
530
+
531
+ def write_regular_file_no_follow(path: Path, data: bytes, *, label: str) -> None:
532
+ path = normalize_local_path(path)
533
+ parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", create=True)
534
+ if parent_fd is None: # pragma: no cover - create=True never returns None.
535
+ raise RegistryError(f"could not inspect {label} parent")
536
+ fd = -1
537
+ temp_leaf: str | None = None
538
+ try:
539
+ leaf = _leaf_name(path, label=label)
540
+ exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
541
+ mode = 0o644
542
+ if exists:
543
+ try:
544
+ mode = stat.S_IMODE(os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False).st_mode) or 0o644
545
+ except OSError:
546
+ mode = 0o644
547
+ for _attempt in range(20):
548
+ candidate = _leaf_name(Path(f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.tmp"), label=f"{label} temp")
549
+ try:
550
+ fd = os.open(candidate, _temp_file_open_flags(label=f"{label} temp"), mode, dir_fd=parent_fd)
551
+ temp_leaf = candidate
552
+ break
553
+ except FileExistsError:
554
+ continue
555
+ if fd < 0 or temp_leaf is None:
556
+ raise RegistryError(f"could not create temporary {label}")
557
+ if not stat.S_ISREG(os.fstat(fd).st_mode):
558
+ raise RegistryError(f"{label} temp must be a regular file")
559
+ write_all_fd(fd, data)
560
+ try:
561
+ os.fsync(fd)
562
+ except OSError:
563
+ pass
564
+ try:
565
+ os.close(fd)
566
+ except OSError:
567
+ pass
568
+ fd = -1
569
+ os.replace(temp_leaf, leaf, src_dir_fd=parent_fd, dst_dir_fd=parent_fd)
570
+ temp_leaf = None
571
+ except OSError as exc:
572
+ raise RegistryError(f"could not write {label}: {os_error_detail(exc)}") from exc
573
+ finally:
574
+ if fd >= 0:
575
+ try:
576
+ os.close(fd)
577
+ except OSError:
578
+ pass
579
+ if temp_leaf is not None:
580
+ try:
581
+ os.unlink(temp_leaf, dir_fd=parent_fd)
582
+ except OSError:
583
+ pass
584
+ try:
585
+ os.fsync(parent_fd)
586
+ except OSError:
587
+ pass
588
+ try:
589
+ os.close(parent_fd)
590
+ except OSError:
591
+ pass
592
+
593
+
279
594
  def resolve_root(raw_root: str | None) -> Path:
280
595
  root = Path(raw_root) if raw_root else Path.cwd()
281
596
  try:
@@ -286,27 +601,25 @@ def resolve_root(raw_root: str | None) -> Path:
286
601
 
287
602
  def resolve_config_path(root: Path, raw_config: str | None) -> Path:
288
603
  if raw_config:
289
- candidate = Path(raw_config).expanduser()
290
- if not candidate.is_absolute():
291
- candidate = root / candidate
604
+ candidate = Path(raw_config)
292
605
  else:
293
- candidate = root / DEFAULT_CONFIG
294
- try:
295
- resolved = candidate.resolve(strict=False)
296
- except OSError as exc:
297
- raise RegistryError(f"could not resolve config path: {candidate}: {exc}") from exc
298
- try:
299
- resolved.relative_to(root)
300
- except ValueError as exc:
301
- raise RegistryError(f"config path must stay inside project root: {resolved}") from exc
302
- return resolved
606
+ candidate = DEFAULT_CONFIG
607
+ return normalize_project_path(root, candidate, label="config path")
303
608
 
304
609
 
305
610
  def load_config(path: Path) -> dict[str, Any]:
306
- if not path.exists():
611
+ loaded = read_bounded_regular_file(path, max_bytes=MAX_CONFIG_BYTES, label="config", missing_ok=True)
612
+ if loaded is None:
307
613
  return {"schema_version": CONFIG_SCHEMA_VERSION, "enabled": []}
614
+ raw, truncated = loaded
615
+ if truncated:
616
+ raise RegistryError("config exceeded max bytes")
617
+ try:
618
+ text = raw.decode("utf-8")
619
+ except UnicodeDecodeError as exc:
620
+ raise RegistryError(f"could not decode config UTF-8: {path}: {exc.reason}") from exc
308
621
  try:
309
- data = json.loads(path.read_text(encoding="utf-8"))
622
+ data = json.loads(text)
310
623
  except json.JSONDecodeError as exc:
311
624
  raise RegistryError(f"could not parse config JSON: {path}: {exc.msg}") from exc
312
625
  except OSError as exc:
@@ -328,11 +641,8 @@ def write_config(path: Path, enabled: set[str]) -> dict[str, Any]:
328
641
  "updated_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
329
642
  "enabled": sorted(enabled),
330
643
  }
331
- try:
332
- path.parent.mkdir(parents=True, exist_ok=True)
333
- path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
334
- except OSError as exc:
335
- raise RegistryError(f"could not write config: {path}: {exc}") from exc
644
+ payload = (json.dumps(data, indent=2, sort_keys=True) + "\n").encode("utf-8")
645
+ write_regular_file_no_follow(path, payload, label="config")
336
646
  return data
337
647
 
338
648
 
@@ -459,18 +769,16 @@ def read_bounded_input(args: argparse.Namespace) -> tuple[str, dict[str, Any]]:
459
769
  if args.input:
460
770
  path = Path(args.input)
461
771
  source_label = source_label or str(path)
462
- try:
463
- with path.open("rb") as handle:
464
- raw = handle.read(MAX_CONTEXT_DIFF_INPUT_BYTES + 1)
465
- except OSError as exc:
466
- raise RegistryError(f"could not read input: {path}: {exc}") from exc
772
+ loaded = read_bounded_regular_file(path, max_bytes=MAX_CONTEXT_DIFF_INPUT_BYTES, label="input")
773
+ assert loaded is not None
774
+ raw, truncated = loaded
467
775
  else:
468
776
  source_label = source_label or "stdin"
469
777
  raw = sys.stdin.buffer.read(MAX_CONTEXT_DIFF_INPUT_BYTES + 1)
778
+ truncated = len(raw) > MAX_CONTEXT_DIFF_INPUT_BYTES
779
+ raw = raw[:MAX_CONTEXT_DIFF_INPUT_BYTES]
470
780
  if not raw:
471
781
  raise RegistryError("context-diff-compaction plan requires diff input on stdin or --input")
472
- truncated = len(raw) > MAX_CONTEXT_DIFF_INPUT_BYTES
473
- raw = raw[:MAX_CONTEXT_DIFF_INPUT_BYTES]
474
782
  text = raw.decode("utf-8", errors="replace")
475
783
  metadata = {
476
784
  "source_label": source_label,
@@ -678,23 +986,21 @@ def read_visual_ocr_text(args: argparse.Namespace) -> dict[str, Any]:
678
986
  if args.ocr_text_file is not None:
679
987
  path = Path(args.ocr_text_file)
680
988
  source_label = args.ocr_source_label.strip() if args.ocr_source_label else path.name
681
- try:
682
- with path.open("rb") as handle:
683
- raw = handle.read(MAX_VISUAL_OCR_TEXT_BYTES + 1)
684
- except OSError as exc:
685
- raise RegistryError(f"could not read OCR text file: {path}: {exc}") from exc
989
+ loaded = read_bounded_regular_file(path, max_bytes=MAX_VISUAL_OCR_TEXT_BYTES, label="OCR text file")
990
+ assert loaded is not None
991
+ raw, truncated = loaded
686
992
  source_type = "file"
687
993
  elif args.ocr_text is not None:
688
994
  raw = args.ocr_text.encode("utf-8")
689
995
  source_label = args.ocr_source_label.strip() if args.ocr_source_label else "inline"
690
996
  source_type = "inline"
997
+ truncated = len(raw) > MAX_VISUAL_OCR_TEXT_BYTES
998
+ raw = raw[:MAX_VISUAL_OCR_TEXT_BYTES]
691
999
  else:
692
1000
  raw = b""
693
1001
  source_label = args.ocr_source_label.strip() if args.ocr_source_label else None
694
1002
  source_type = None
695
-
696
- truncated = len(raw) > MAX_VISUAL_OCR_TEXT_BYTES
697
- raw = raw[:MAX_VISUAL_OCR_TEXT_BYTES]
1003
+ truncated = False
698
1004
  try:
699
1005
  text = raw.decode("utf-8")
700
1006
  valid_encoding = True
@@ -1059,22 +1365,21 @@ def read_self_hosted_payload(args: argparse.Namespace) -> tuple[Any, dict[str, A
1059
1365
  path = Path(args.input)
1060
1366
  source_label = source_label or sanitize_self_hosted_text(path)
1061
1367
  try:
1062
- with path.open("rb") as handle:
1063
- raw = handle.read(MAX_SELF_HOSTED_METRICS_INPUT_BYTES + 1)
1064
- except OSError as exc:
1065
- safe_path = sanitize_self_hosted_text(path)
1066
- detail = exc.strerror or exc.__class__.__name__
1067
- if exc.errno is not None:
1068
- detail = f"{detail} (errno {exc.errno})"
1069
- raise RegistryError(f"could not read self-hosted metrics input: {safe_path}: {detail}") from exc
1368
+ loaded = read_bounded_regular_file(path, max_bytes=MAX_SELF_HOSTED_METRICS_INPUT_BYTES, label=f"self-hosted metrics input: {source_label}")
1369
+ except RegistryError as exc:
1370
+ raise RegistryError(f"could not read self-hosted metrics input: {source_label}: {exc}") from exc
1371
+ assert loaded is not None
1372
+ raw, loaded_truncated = loaded
1070
1373
  else:
1071
1374
  source_label = source_label or "stdin"
1072
1375
  raw = sys.stdin.buffer.read(MAX_SELF_HOSTED_METRICS_INPUT_BYTES + 1)
1073
- if len(raw) > MAX_SELF_HOSTED_METRICS_INPUT_BYTES:
1376
+ loaded_truncated = len(raw) > MAX_SELF_HOSTED_METRICS_INPUT_BYTES
1377
+ raw = raw[:MAX_SELF_HOSTED_METRICS_INPUT_BYTES]
1378
+ if loaded_truncated:
1074
1379
  return None, {
1075
1380
  "source_label": source_label,
1076
1381
  "bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
1077
- "sha256": hashlib.sha256(raw[:MAX_SELF_HOSTED_METRICS_INPUT_BYTES]).hexdigest(),
1382
+ "sha256": hashlib.sha256(raw).hexdigest(),
1078
1383
  "truncated": True,
1079
1384
  "max_bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
1080
1385
  "envelope_source": None,
@@ -1333,18 +1638,16 @@ def read_local_proxy_payload(args: argparse.Namespace) -> tuple[dict[str, Any],
1333
1638
  path = Path(args.input)
1334
1639
  safe_path = sanitize_local_proxy_value(path)
1335
1640
  try:
1336
- with path.open("rb") as handle:
1337
- raw = handle.read(MAX_SELF_HOSTED_METRICS_INPUT_BYTES + 1)
1338
- except OSError as exc:
1339
- detail = exc.strerror or exc.__class__.__name__
1340
- if exc.errno is not None:
1341
- detail = f"{detail} (errno {exc.errno})"
1342
- raise RegistryError(f"could not read local-proxy input: {safe_path}: {detail}") from exc
1343
- if len(raw) > MAX_SELF_HOSTED_METRICS_INPUT_BYTES:
1641
+ loaded = read_bounded_regular_file(path, max_bytes=MAX_SELF_HOSTED_METRICS_INPUT_BYTES, label=f"local-proxy input: {safe_path}")
1642
+ except RegistryError as exc:
1643
+ raise RegistryError(f"could not read local-proxy input: {safe_path}: {exc}") from exc
1644
+ assert loaded is not None
1645
+ raw, loaded_truncated = loaded
1646
+ if loaded_truncated:
1344
1647
  return {}, {
1345
1648
  "source_label": safe_path,
1346
1649
  "bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
1347
- "sha256": hashlib.sha256(raw[:MAX_SELF_HOSTED_METRICS_INPUT_BYTES]).hexdigest(),
1650
+ "sha256": hashlib.sha256(raw).hexdigest(),
1348
1651
  "truncated": True,
1349
1652
  "ignored_keys": [],
1350
1653
  }
@@ -1691,16 +1994,14 @@ def read_learned_input(args: argparse.Namespace) -> tuple[str, dict[str, Any]]:
1691
1994
  if args.input:
1692
1995
  path = Path(args.input)
1693
1996
  source_label = source_label or path.name
1694
- try:
1695
- with path.open("rb") as handle:
1696
- raw = handle.read(MAX_LEARNED_COMPRESSION_INPUT_BYTES + 1)
1697
- except OSError as exc:
1698
- raise RegistryError(f"could not read learned-compression input: {path}: {exc}") from exc
1997
+ loaded = read_bounded_regular_file(path, max_bytes=MAX_LEARNED_COMPRESSION_INPUT_BYTES, label="learned-compression input")
1998
+ assert loaded is not None
1999
+ raw, truncated = loaded
1699
2000
  else:
1700
2001
  source_label = source_label or "stdin"
1701
2002
  raw = sys.stdin.buffer.read(MAX_LEARNED_COMPRESSION_INPUT_BYTES + 1)
1702
- truncated = len(raw) > MAX_LEARNED_COMPRESSION_INPUT_BYTES
1703
- raw = raw[:MAX_LEARNED_COMPRESSION_INPUT_BYTES]
2003
+ truncated = len(raw) > MAX_LEARNED_COMPRESSION_INPUT_BYTES
2004
+ raw = raw[:MAX_LEARNED_COMPRESSION_INPUT_BYTES]
1704
2005
  text = raw.decode("utf-8", errors="replace")
1705
2006
  metadata = {
1706
2007
  "source_label": source_label,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ictechgy/context-guard",
3
- "version": "0.4.7",
3
+ "version": "0.4.8",
4
4
  "description": "ContextGuard CLI helpers for keeping AI coding agent context focused and local-first.",
5
5
  "license": "Apache-2.0",
6
6
  "homepage": "https://github.com/ictechgy/context-guard#readme",
@@ -5,7 +5,7 @@ class ContextGuard < Formula
5
5
 
6
6
  desc "Local-first context guardrails for AI coding agents"
7
7
  homepage "https://github.com/ictechgy/context-guard"
8
- url "https://github.com/ictechgy/context-guard/archive/refs/tags/v0.4.7.tar.gz"
8
+ url "https://github.com/ictechgy/context-guard/archive/refs/tags/v0.4.8.tar.gz"
9
9
  sha256 "REPLACE_WITH_RELEASE_TARBALL_SHA256"
10
10
  license "Apache-2.0"
11
11
 
@@ -37,5 +37,5 @@
37
37
  "gated-experiments",
38
38
  "future-roadmap"
39
39
  ],
40
- "version": "0.4.7"
40
+ "version": "0.4.8"
41
41
  }