argot-engine 0.2.7__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {argot_engine-0.2.7 → argot_engine-0.2.8}/PKG-INFO +1 -1
  2. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/check.py +7 -2
  3. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/explain.py +59 -50
  4. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot_engine.egg-info/PKG-INFO +1 -1
  5. {argot_engine-0.2.7 → argot_engine-0.2.8}/pyproject.toml +1 -1
  6. {argot_engine-0.2.7 → argot_engine-0.2.8}/README.md +0 -0
  7. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/__init__.py +0 -0
  8. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/__main__.py +0 -0
  9. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/dataset.py +0 -0
  10. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/extract.py +0 -0
  11. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/fetch.py +0 -0
  12. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/git_walk.py +0 -0
  13. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/jepa/__init__.py +0 -0
  14. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/jepa/encoder.py +0 -0
  15. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/jepa/model.py +0 -0
  16. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/jepa/predictor.py +0 -0
  17. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/jepa/sigreg.py +0 -0
  18. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/__init__.py +0 -0
  19. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/test_check.py +0 -0
  20. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/test_explain.py +0 -0
  21. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/test_extract_smoke.py +0 -0
  22. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/test_fetch.py +0 -0
  23. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/test_git_walk.py +0 -0
  24. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/test_jepa.py +0 -0
  25. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/test_tokenize.py +0 -0
  26. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/test_train_smoke.py +0 -0
  27. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tests/test_validate.py +0 -0
  28. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/tokenize.py +0 -0
  29. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/train.py +0 -0
  30. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot/validate.py +0 -0
  31. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot_engine.egg-info/SOURCES.txt +0 -0
  32. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot_engine.egg-info/dependency_links.txt +0 -0
  33. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot_engine.egg-info/entry_points.txt +0 -0
  34. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot_engine.egg-info/requires.txt +0 -0
  35. {argot_engine-0.2.7 → argot_engine-0.2.8}/argot_engine.egg-info/top_level.txt +0 -0
  36. {argot_engine-0.2.7 → argot_engine-0.2.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: argot-engine
3
- Version: 0.2.7
3
+ Version: 0.2.8
4
4
  Requires-Python: >=3.11
5
5
  Requires-Dist: pygit2==1.19.2
6
6
  Requires-Dist: scikit-learn>=1.5.0
@@ -177,8 +177,13 @@ def main() -> None:
177
177
 
178
178
  results.sort(key=lambda r: r[0], reverse=True)
179
179
 
180
+ col_w = 55
181
+
182
+ def _trunc(fp: str) -> str:
183
+ return fp if len(fp) <= col_w else "..." + fp[-(col_w - 3) :]
184
+
180
185
  t = args.threshold
181
- print(f"{'SURPRISE':>9} {'TAG':<10} {'FILE':<48} {'LINE':>5} REF")
186
+ print(f"{'SURPRISE':>9} {'TAG':<10} {'FILE':<{col_w}} {'LINE':>5} REF")
182
187
  for score, fp, line, ref in results:
183
188
  if score <= t:
184
189
  tag = "ok"
@@ -188,7 +193,7 @@ def main() -> None:
188
193
  tag = "suspicious"
189
194
  else:
190
195
  tag = "foreign"
191
- print(f"{score:>9.4f} {tag:<10} {fp:<48} {line:>5} {ref}")
196
+ print(f"{score:>9.4f} {tag:<10} {_trunc(fp):<{col_w}} {line:>5} {ref}")
192
197
 
193
198
  if any(s > args.threshold for s, *_ in results):
194
199
  sys.exit(1)
@@ -11,7 +11,7 @@ import numpy as np
11
11
  import pygit2
12
12
  import torch
13
13
 
14
- from argot.check import _resolve_shas
14
+ from argot.check import _resolve_shas, _workdir_patches
15
15
  from argot.git_walk import walk_commits
16
16
  from argot.jepa.encoder import TokenEncoder
17
17
  from argot.jepa.model import JEPAArgot
@@ -68,7 +68,7 @@ def _score_dataset(
68
68
  def main() -> None:
69
69
  parser = argparse.ArgumentParser(description="Explain style anomalies in a git ref")
70
70
  parser.add_argument("repo_path")
71
- parser.add_argument("ref")
71
+ parser.add_argument("ref", nargs="?", default="")
72
72
  parser.add_argument("--model", default=".argot/model.pkl")
73
73
  parser.add_argument("--dataset", default=".argot/dataset.jsonl")
74
74
  parser.add_argument("--threshold-percentile", type=float, default=75.0)
@@ -105,62 +105,71 @@ def main() -> None:
105
105
  style_examples = select_style_examples(scored_dataset, n=args.examples)
106
106
  example_texts = [" ".join(t["text"] for t in r["hunk_tokens"]) for r in style_examples]
107
107
 
108
- repo = pygit2.Repository(args.repo_path)
109
- shas = _resolve_shas(repo, args.ref)
110
- if not shas:
111
- sys.exit(0)
112
-
113
108
  context_lines = 50
114
- with torch.no_grad():
115
- for commit, file_path, post_blob, hunks in walk_commits(args.repo_path, shas):
116
- lang = language_for_path(file_path)
117
- if lang is None:
118
- continue
119
- try:
120
- source_lines = post_blob.decode("utf-8", errors="replace").splitlines()
121
- except Exception:
122
- continue
123
-
124
- for hunk in hunks:
125
- hunk_start = hunk.new_start - 1
126
- hunk_end = hunk_start + hunk.new_lines
127
- if hunk_start < 0 or hunk_end > len(source_lines):
128
- continue
129
109
 
130
- before_start = max(0, hunk_start - context_lines)
131
- ctx_tokens = tokenize_lines(source_lines, lang, before_start, hunk_start)
132
- hunk_tokens = tokenize_lines(source_lines, lang, hunk_start, hunk_end)
110
+ def _emit_patches(patches: Any, commit_label: str) -> None:
111
+ with torch.no_grad():
112
+ for file_path, post_blob, hunks in patches:
113
+ lang = language_for_path(file_path)
114
+ if lang is None:
115
+ continue
116
+ try:
117
+ source_lines = post_blob.decode("utf-8", errors="replace").splitlines()
118
+ except Exception:
119
+ continue
133
120
 
134
- ctx_text = " ".join(t.text for t in ctx_tokens)
135
- hunk_text = " ".join(t.text for t in hunk_tokens)
121
+ for hunk in hunks:
122
+ hunk_start = hunk.new_start - 1
123
+ hunk_end = hunk_start + hunk.new_lines
124
+ if hunk_start < 0 or hunk_end > len(source_lines):
125
+ continue
136
126
 
137
- ctx_vec = torch.tensor(
138
- vectorizer.transform([ctx_text]).toarray(), dtype=torch.float32
139
- )
140
- hunk_vec = torch.tensor(
141
- vectorizer.transform([hunk_text]).toarray(), dtype=torch.float32
142
- )
127
+ before_start = max(0, hunk_start - context_lines)
128
+ ctx_tokens = tokenize_lines(source_lines, lang, before_start, hunk_start)
129
+ hunk_tokens = tokenize_lines(source_lines, lang, hunk_start, hunk_end)
143
130
 
144
- score = model.surprise(ctx_vec, hunk_vec).item()
145
- pct = percentile_rank(score, distribution)
131
+ ctx_text = " ".join(t.text for t in ctx_tokens)
132
+ hunk_text = " ".join(t.text for t in hunk_tokens)
146
133
 
147
- if pct < args.threshold_percentile:
148
- continue
134
+ ctx_vec = torch.tensor(
135
+ vectorizer.transform([ctx_text]).toarray(), dtype=torch.float32
136
+ )
137
+ hunk_vec = torch.tensor(
138
+ vectorizer.transform([hunk_text]).toarray(), dtype=torch.float32
139
+ )
149
140
 
150
- print(
151
- json.dumps(
152
- {
153
- "file_path": file_path,
154
- "line": hunk.new_start,
155
- "commit": str(commit.id)[:8],
156
- "surprise": round(score, 4),
157
- "percentile": round(pct, 1),
158
- "hunk_text": hunk_text,
159
- "context_text": ctx_text,
160
- "style_examples": example_texts,
161
- }
141
+ score = model.surprise(ctx_vec, hunk_vec).item()
142
+ pct = percentile_rank(score, distribution)
143
+
144
+ if pct < args.threshold_percentile:
145
+ continue
146
+
147
+ print(
148
+ json.dumps(
149
+ {
150
+ "file_path": file_path,
151
+ "line": hunk.new_start,
152
+ "commit": commit_label,
153
+ "surprise": round(score, 4),
154
+ "percentile": round(pct, 1),
155
+ "hunk_text": hunk_text,
156
+ "context_text": ctx_text,
157
+ "style_examples": example_texts,
158
+ }
159
+ )
162
160
  )
163
- )
161
+
162
+ if args.ref == "":
163
+ _emit_patches(_workdir_patches(args.repo_path), "workdir")
164
+ else:
165
+ repo = pygit2.Repository(args.repo_path)
166
+ shas = _resolve_shas(repo, args.ref)
167
+ if not shas:
168
+ sys.exit(0)
169
+ _emit_patches(
170
+ ((fp, blob, hunks) for _, fp, blob, hunks in walk_commits(args.repo_path, shas)),
171
+ args.ref,
172
+ )
164
173
 
165
174
 
166
175
  if __name__ == "__main__":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: argot-engine
3
- Version: 0.2.7
3
+ Version: 0.2.8
4
4
  Requires-Python: >=3.11
5
5
  Requires-Dist: pygit2==1.19.2
6
6
  Requires-Dist: scikit-learn>=1.5.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "argot-engine"
3
- version = "0.2.7"
3
+ version = "0.2.8"
4
4
  requires-python = ">=3.11"
5
5
  dependencies = [
6
6
  "pygit2==1.19.2",
File without changes
File without changes