repr-cli 0.2.14__tar.gz → 0.2.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {repr_cli-0.2.14/repr_cli.egg-info → repr_cli-0.2.16}/PKG-INFO +3 -1
  2. {repr_cli-0.2.14 → repr_cli-0.2.16}/README.md +2 -0
  3. {repr_cli-0.2.14 → repr_cli-0.2.16}/pyproject.toml +1 -1
  4. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/cli.py +17 -0
  5. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/config.py +1 -1
  6. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/storage.py +32 -0
  7. {repr_cli-0.2.14 → repr_cli-0.2.16/repr_cli.egg-info}/PKG-INFO +3 -1
  8. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr_cli.egg-info/SOURCES.txt +1 -0
  9. repr_cli-0.2.16/tests/test_deduplication.py +143 -0
  10. {repr_cli-0.2.14 → repr_cli-0.2.16}/LICENSE +0 -0
  11. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/__init__.py +0 -0
  12. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/__main__.py +0 -0
  13. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/api.py +0 -0
  14. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/auth.py +0 -0
  15. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/discovery.py +0 -0
  16. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/doctor.py +0 -0
  17. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/extractor.py +0 -0
  18. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/hooks.py +0 -0
  19. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/keychain.py +0 -0
  20. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/llm.py +0 -0
  21. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/openai_analysis.py +0 -0
  22. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/privacy.py +0 -0
  23. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/telemetry.py +0 -0
  24. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/templates.py +0 -0
  25. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/tools.py +0 -0
  26. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/ui.py +0 -0
  27. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr/updater.py +0 -0
  28. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr_cli.egg-info/dependency_links.txt +0 -0
  29. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr_cli.egg-info/entry_points.txt +0 -0
  30. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr_cli.egg-info/requires.txt +0 -0
  31. {repr_cli-0.2.14 → repr_cli-0.2.16}/repr_cli.egg-info/top_level.txt +0 -0
  32. {repr_cli-0.2.14 → repr_cli-0.2.16}/setup.cfg +0 -0
  33. {repr_cli-0.2.14 → repr_cli-0.2.16}/setup.py +0 -0
  34. {repr_cli-0.2.14 → repr_cli-0.2.16}/tests/test_environment_variables.py +0 -0
  35. {repr_cli-0.2.14 → repr_cli-0.2.16}/tests/test_network_sandboxing.py +0 -0
  36. {repr_cli-0.2.14 → repr_cli-0.2.16}/tests/test_privacy_guarantees.py +0 -0
  37. {repr_cli-0.2.14 → repr_cli-0.2.16}/tests/test_profile_export.py +0 -0
  38. {repr_cli-0.2.14 → repr_cli-0.2.16}/tests/test_repo_identity.py +0 -0
  39. {repr_cli-0.2.14 → repr_cli-0.2.16}/tests/test_stories_review.py +0 -0
  40. {repr_cli-0.2.14 → repr_cli-0.2.16}/tests/test_token_budget.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: repr-cli
3
- Version: 0.2.14
3
+ Version: 0.2.16
4
4
  Summary: A beautiful, privacy-first CLI that analyzes your code repositories and generates a compelling developer profile
5
5
  Author-email: Repr <hello@repr.dev>
6
6
  License: MIT License
@@ -219,6 +219,8 @@ repr generate --since "2 weeks ago" --local
219
219
  repr generate --since monday --local
220
220
  ```
221
221
 
222
+ **Note:** `repr generate` automatically skips commits that have already been processed into stories. You can safely run it multiple times without creating duplicates.
223
+
222
224
  ### Publish your profile (optional)
223
225
 
224
226
  ```bash
@@ -161,6 +161,8 @@ repr generate --since "2 weeks ago" --local
161
161
  repr generate --since monday --local
162
162
  ```
163
163
 
164
+ **Note:** `repr generate` automatically skips commits that have already been processed into stories. You can safely run it multiple times without creating duplicates.
165
+
164
166
  ### Publish your profile (optional)
165
167
 
166
168
  ```bash
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "repr-cli"
7
- version = "0.2.14"
7
+ version = "0.2.16"
8
8
  description = "A beautiful, privacy-first CLI that analyzes your code repositories and generates a compelling developer profile"
9
9
  readme = "README.md"
10
10
  license = {file = "LICENSE"}
@@ -491,6 +491,23 @@ def generate(
491
491
  console.print(f" [{BRAND_MUTED}]No commits found[/]")
492
492
  continue
493
493
 
494
+ # Filter out already-processed commits
495
+ from .storage import get_processed_commit_shas
496
+ processed_shas = get_processed_commit_shas(repo_name=repo_info.name)
497
+
498
+ original_count = len(commit_list)
499
+ commit_list = [c for c in commit_list if c["full_sha"] not in processed_shas]
500
+
501
+ if not json_output and processed_shas:
502
+ skipped_count = original_count - len(commit_list)
503
+ if skipped_count > 0:
504
+ console.print(f" [{BRAND_MUTED}]Skipping {skipped_count} already-processed commits[/]")
505
+
506
+ if not commit_list:
507
+ if not json_output:
508
+ console.print(f" [{BRAND_MUTED}]All commits already processed[/]")
509
+ continue
510
+
494
511
  # Dry run: show what would be sent
495
512
  if dry_run:
496
513
  from .openai_analysis import estimate_tokens, get_batch_size
@@ -108,7 +108,7 @@ DEFAULT_CONFIG = {
108
108
  },
109
109
  "generation": {
110
110
  "batch_size": 5, # Commits per story
111
- "auto_generate_on_hook": False, # Auto-generate when hook runs
111
+ "auto_generate_on_hook": True, # Auto-generate when hook runs
112
112
  "default_template": "resume", # Default story template
113
113
  "token_limit": 100000, # Max tokens per cloud request
114
114
  "max_commits_per_batch": 50, # Max commits per request
@@ -246,6 +246,38 @@ def update_story_metadata(story_id: str, updates: dict[str, Any]) -> bool:
246
246
  return False
247
247
 
248
248
 
249
+ def get_processed_commit_shas(repo_name: str | None = None) -> set[str]:
250
+ """
251
+ Get all commit SHAs that have already been processed into stories.
252
+
253
+ Args:
254
+ repo_name: Optional filter by repository name
255
+
256
+ Returns:
257
+ Set of commit SHAs that have been processed
258
+ """
259
+ ensure_directories()
260
+
261
+ processed_shas = set()
262
+
263
+ for meta_path in STORIES_DIR.glob("*.json"):
264
+ try:
265
+ metadata = json.loads(meta_path.read_text())
266
+
267
+ # Filter by repo if specified
268
+ if repo_name and metadata.get("repo_name") != repo_name:
269
+ continue
270
+
271
+ # Collect commit SHAs from this story
272
+ commit_shas = metadata.get("commit_shas", [])
273
+ processed_shas.update(commit_shas)
274
+
275
+ except (json.JSONDecodeError, IOError):
276
+ continue
277
+
278
+ return processed_shas
279
+
280
+
249
281
  def get_unpushed_stories() -> list[dict[str, Any]]:
250
282
  """Get stories that haven't been pushed to cloud."""
251
283
  stories = list_stories()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: repr-cli
3
- Version: 0.2.14
3
+ Version: 0.2.16
4
4
  Summary: A beautiful, privacy-first CLI that analyzes your code repositories and generates a compelling developer profile
5
5
  Author-email: Repr <hello@repr.dev>
6
6
  License: MIT License
@@ -219,6 +219,8 @@ repr generate --since "2 weeks ago" --local
219
219
  repr generate --since monday --local
220
220
  ```
221
221
 
222
+ **Note:** `repr generate` automatically skips commits that have already been processed into stories. You can safely run it multiple times without creating duplicates.
223
+
222
224
  ### Publish your profile (optional)
223
225
 
224
226
  ```bash
@@ -28,6 +28,7 @@ repr_cli.egg-info/dependency_links.txt
28
28
  repr_cli.egg-info/entry_points.txt
29
29
  repr_cli.egg-info/requires.txt
30
30
  repr_cli.egg-info/top_level.txt
31
+ tests/test_deduplication.py
31
32
  tests/test_environment_variables.py
32
33
  tests/test_network_sandboxing.py
33
34
  tests/test_privacy_guarantees.py
@@ -0,0 +1,143 @@
1
+ """
2
+ Test commit deduplication in story generation.
3
+ """
4
+
5
+ import json
6
+ import tempfile
7
+ from pathlib import Path
8
+ from unittest.mock import patch
9
+
10
+ import pytest
11
+
12
+ from repr.storage import (
13
+ get_processed_commit_shas,
14
+ save_story,
15
+ STORIES_DIR,
16
+ )
17
+
18
+
19
+ @pytest.fixture
20
+ def temp_stories_dir(monkeypatch):
21
+ """Use a temporary directory for stories during tests."""
22
+ with tempfile.TemporaryDirectory() as tmpdir:
23
+ stories_dir = Path(tmpdir) / "stories"
24
+ stories_dir.mkdir()
25
+ monkeypatch.setattr("repr.storage.STORIES_DIR", stories_dir)
26
+ yield stories_dir
27
+
28
+
29
+ def test_get_processed_commit_shas_empty(temp_stories_dir):
30
+ """Test getting processed commits when no stories exist."""
31
+ processed = get_processed_commit_shas()
32
+ assert processed == set()
33
+
34
+
35
+ def test_get_processed_commit_shas_single_story(temp_stories_dir):
36
+ """Test getting processed commits from a single story."""
37
+ # Save a story with some commits
38
+ metadata = {
39
+ "repo_name": "test-repo",
40
+ "commit_shas": ["abc123", "def456", "ghi789"],
41
+ }
42
+ save_story("Test story content", metadata)
43
+
44
+ # Get processed commits
45
+ processed = get_processed_commit_shas()
46
+ assert processed == {"abc123", "def456", "ghi789"}
47
+
48
+
49
+ def test_get_processed_commit_shas_multiple_stories(temp_stories_dir):
50
+ """Test getting processed commits from multiple stories."""
51
+ # Save multiple stories
52
+ save_story("Story 1", {
53
+ "repo_name": "test-repo",
54
+ "commit_shas": ["abc123", "def456"],
55
+ })
56
+ save_story("Story 2", {
57
+ "repo_name": "test-repo",
58
+ "commit_shas": ["ghi789", "jkl012"],
59
+ })
60
+ save_story("Story 3", {
61
+ "repo_name": "test-repo",
62
+ "commit_shas": ["def456", "mno345"], # def456 is duplicate
63
+ })
64
+
65
+ # Get all processed commits
66
+ processed = get_processed_commit_shas()
67
+ assert processed == {"abc123", "def456", "ghi789", "jkl012", "mno345"}
68
+
69
+
70
+ def test_get_processed_commit_shas_filter_by_repo(temp_stories_dir):
71
+ """Test filtering processed commits by repository."""
72
+ # Save stories for different repos
73
+ save_story("Story 1", {
74
+ "repo_name": "repo-a",
75
+ "commit_shas": ["abc123", "def456"],
76
+ })
77
+ save_story("Story 2", {
78
+ "repo_name": "repo-b",
79
+ "commit_shas": ["ghi789", "jkl012"],
80
+ })
81
+ save_story("Story 3", {
82
+ "repo_name": "repo-a",
83
+ "commit_shas": ["mno345"],
84
+ })
85
+
86
+ # Get processed commits for repo-a only
87
+ processed_a = get_processed_commit_shas(repo_name="repo-a")
88
+ assert processed_a == {"abc123", "def456", "mno345"}
89
+
90
+ # Get processed commits for repo-b only
91
+ processed_b = get_processed_commit_shas(repo_name="repo-b")
92
+ assert processed_b == {"ghi789", "jkl012"}
93
+
94
+
95
+ def test_get_processed_commit_shas_handles_missing_field(temp_stories_dir):
96
+ """Test handling stories without commit_shas field."""
97
+ # Save a story without commit_shas
98
+ save_story("Story without commits", {
99
+ "repo_name": "test-repo",
100
+ "summary": "Test story",
101
+ })
102
+
103
+ # Should not crash, just return empty set
104
+ processed = get_processed_commit_shas()
105
+ assert processed == set()
106
+
107
+
108
+ def test_get_processed_commit_shas_handles_corrupt_json(temp_stories_dir):
109
+ """Test handling corrupt JSON files."""
110
+ # Create a corrupt JSON file
111
+ corrupt_file = temp_stories_dir / "corrupt.json"
112
+ corrupt_file.write_text("{ invalid json }")
113
+
114
+ # Should not crash, just skip the corrupt file
115
+ processed = get_processed_commit_shas()
116
+ assert processed == set()
117
+
118
+
119
+ def test_commit_filtering_in_generate(temp_stories_dir):
120
+ """Test that generate command filters out processed commits."""
121
+ # Save a story with some commits
122
+ save_story("Existing story", {
123
+ "repo_name": "test-repo",
124
+ "commit_shas": ["abc123", "def456"],
125
+ })
126
+
127
+ # Simulate commit list from git
128
+ all_commits = [
129
+ {"full_sha": "abc123", "message": "Already processed 1"},
130
+ {"full_sha": "def456", "message": "Already processed 2"},
131
+ {"full_sha": "ghi789", "message": "New commit 1"},
132
+ {"full_sha": "jkl012", "message": "New commit 2"},
133
+ ]
134
+
135
+ # Filter out processed commits
136
+ processed_shas = get_processed_commit_shas(repo_name="test-repo")
137
+ filtered_commits = [c for c in all_commits if c["full_sha"] not in processed_shas]
138
+
139
+ # Should only have the new commits
140
+ assert len(filtered_commits) == 2
141
+ assert filtered_commits[0]["full_sha"] == "ghi789"
142
+ assert filtered_commits[1]["full_sha"] == "jkl012"
143
+
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes