PyPI - argot-engine - Versions diffs - 0.2.7__tar.gz → 0.2.8__tar.gz - Mend

argot-engine 0.2.7tar.gz → 0.2.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{argot_engine-0.2.7 → argot_engine-0.2.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: argot-engine
-Version: 0.2.7
+Version: 0.2.8
 Requires-Python: >=3.11
 Requires-Dist: pygit2==1.19.2
 Requires-Dist: scikit-learn>=1.5.0

{argot_engine-0.2.7 → argot_engine-0.2.8}/argot/check.py RENAMED Viewed

@@ -177,8 +177,13 @@ def main() -> None:
     results.sort(key=lambda r: r[0], reverse=True)
+    col_w = 55
+    def _trunc(fp: str) -> str:
+        return fp if len(fp) <= col_w else "..." + fp[-(col_w - 3) :]
     t = args.threshold
-    print(f"{'SURPRISE':>9}  {'TAG':<10}  {'FILE':<48}  {'LINE':>5}  REF")
+    print(f"{'SURPRISE':>9}  {'TAG':<10}  {'FILE':<{col_w}}  {'LINE':>5}  REF")
     for score, fp, line, ref in results:
         if score <= t:
             tag = "ok"
@@ -188,7 +193,7 @@ def main() -> None:
             tag = "suspicious"
         else:
             tag = "foreign"
-        print(f"{score:>9.4f}  {tag:<10}  {fp:<48}  {line:>5}  {ref}")
+        print(f"{score:>9.4f}  {tag:<10}  {_trunc(fp):<{col_w}}  {line:>5}  {ref}")
     if any(s > args.threshold for s, *_ in results):
         sys.exit(1)

{argot_engine-0.2.7 → argot_engine-0.2.8}/argot/explain.py RENAMED Viewed

@@ -11,7 +11,7 @@ import numpy as np
 import pygit2
 import torch
-from argot.check import _resolve_shas
+from argot.check import _resolve_shas, _workdir_patches
 from argot.git_walk import walk_commits
 from argot.jepa.encoder import TokenEncoder
 from argot.jepa.model import JEPAArgot
@@ -68,7 +68,7 @@ def _score_dataset(
 def main() -> None:
     parser = argparse.ArgumentParser(description="Explain style anomalies in a git ref")
     parser.add_argument("repo_path")
-    parser.add_argument("ref")
+    parser.add_argument("ref", nargs="?", default="")
     parser.add_argument("--model", default=".argot/model.pkl")
     parser.add_argument("--dataset", default=".argot/dataset.jsonl")
     parser.add_argument("--threshold-percentile", type=float, default=75.0)
@@ -105,62 +105,71 @@ def main() -> None:
     style_examples = select_style_examples(scored_dataset, n=args.examples)
     example_texts = [" ".join(t["text"] for t in r["hunk_tokens"]) for r in style_examples]
-    repo = pygit2.Repository(args.repo_path)
-    shas = _resolve_shas(repo, args.ref)
-    if not shas:
-        sys.exit(0)
     context_lines = 50
-    with torch.no_grad():
-        for commit, file_path, post_blob, hunks in walk_commits(args.repo_path, shas):
-            lang = language_for_path(file_path)
-            if lang is None:
-                continue
-            try:
-                source_lines = post_blob.decode("utf-8", errors="replace").splitlines()
-            except Exception:
-                continue
-            for hunk in hunks:
-                hunk_start = hunk.new_start - 1
-                hunk_end = hunk_start + hunk.new_lines
-                if hunk_start < 0 or hunk_end > len(source_lines):
-                    continue
-                before_start = max(0, hunk_start - context_lines)
-                ctx_tokens = tokenize_lines(source_lines, lang, before_start, hunk_start)
-                hunk_tokens = tokenize_lines(source_lines, lang, hunk_start, hunk_end)
+    def _emit_patches(patches: Any, commit_label: str) -> None:
+        with torch.no_grad():
+            for file_path, post_blob, hunks in patches:
+                lang = language_for_path(file_path)
+                if lang is None:
+                    continue
+                try:
+                    source_lines = post_blob.decode("utf-8", errors="replace").splitlines()
+                except Exception:
+                    continue
-                ctx_text = " ".join(t.text for t in ctx_tokens)
-                hunk_text = " ".join(t.text for t in hunk_tokens)
+                for hunk in hunks:
+                    hunk_start = hunk.new_start - 1
+                    hunk_end = hunk_start + hunk.new_lines
+                    if hunk_start < 0 or hunk_end > len(source_lines):
+                        continue
-                ctx_vec = torch.tensor(
-                    vectorizer.transform([ctx_text]).toarray(), dtype=torch.float32
-                )
-                hunk_vec = torch.tensor(
-                    vectorizer.transform([hunk_text]).toarray(), dtype=torch.float32
-                )
+                    before_start = max(0, hunk_start - context_lines)
+                    ctx_tokens = tokenize_lines(source_lines, lang, before_start, hunk_start)
+                    hunk_tokens = tokenize_lines(source_lines, lang, hunk_start, hunk_end)
-                score = model.surprise(ctx_vec, hunk_vec).item()
-                pct = percentile_rank(score, distribution)
+                    ctx_text = " ".join(t.text for t in ctx_tokens)
+                    hunk_text = " ".join(t.text for t in hunk_tokens)
-                if pct < args.threshold_percentile:
-                    continue
+                    ctx_vec = torch.tensor(
+                        vectorizer.transform([ctx_text]).toarray(), dtype=torch.float32
+                    )
+                    hunk_vec = torch.tensor(
+                        vectorizer.transform([hunk_text]).toarray(), dtype=torch.float32
+                    )
-                print(
-                    json.dumps(
-                        {
-                            "file_path": file_path,
-                            "line": hunk.new_start,
-                            "commit": str(commit.id)[:8],
-                            "surprise": round(score, 4),
-                            "percentile": round(pct, 1),
-                            "hunk_text": hunk_text,
-                            "context_text": ctx_text,
-                            "style_examples": example_texts,
-                        }
+                    score = model.surprise(ctx_vec, hunk_vec).item()
+                    pct = percentile_rank(score, distribution)
+                    if pct < args.threshold_percentile:
+                        continue
+                    print(
+                        json.dumps(
+                            {
+                                "file_path": file_path,
+                                "line": hunk.new_start,
+                                "commit": commit_label,
+                                "surprise": round(score, 4),
+                                "percentile": round(pct, 1),
+                                "hunk_text": hunk_text,
+                                "context_text": ctx_text,
+                                "style_examples": example_texts,
+                            }
+                        )
                     )
-                )
+    if args.ref == "":
+        _emit_patches(_workdir_patches(args.repo_path), "workdir")
+    else:
+        repo = pygit2.Repository(args.repo_path)
+        shas = _resolve_shas(repo, args.ref)
+        if not shas:
+            sys.exit(0)
+        _emit_patches(
+            ((fp, blob, hunks) for _, fp, blob, hunks in walk_commits(args.repo_path, shas)),
+            args.ref,
+        )
 if __name__ == "__main__":

{argot_engine-0.2.7 → argot_engine-0.2.8}/argot_engine.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: argot-engine
-Version: 0.2.7
+Version: 0.2.8
 Requires-Python: >=3.11
 Requires-Dist: pygit2==1.19.2
 Requires-Dist: scikit-learn>=1.5.0

{argot_engine-0.2.7 → argot_engine-0.2.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "argot-engine"
-version = "0.2.7"
+version = "0.2.8"
 requires-python = ">=3.11"
 dependencies = [
     "pygit2==1.19.2",