PyPI - repr-cli - Versions diffs - 0.2.14__tar.gz → 0.2.16__tar.gz - Mend

repr-cli 0.2.14tar.gz → 0.2.16tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{repr_cli-0.2.14/repr_cli.egg-info → repr_cli-0.2.16}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: repr-cli
-Version: 0.2.14
+Version: 0.2.16
 Summary: A beautiful, privacy-first CLI that analyzes your code repositories and generates a compelling developer profile
 Author-email: Repr <hello@repr.dev>
 License: MIT License
@@ -219,6 +219,8 @@ repr generate --since "2 weeks ago" --local
 repr generate --since monday --local
 ```
+**Note:** `repr generate` automatically skips commits that have already been processed into stories. You can safely run it multiple times without creating duplicates.
 ### Publish your profile (optional)
 ```bash

{repr_cli-0.2.14 → repr_cli-0.2.16}/README.md RENAMED Viewed

@@ -161,6 +161,8 @@ repr generate --since "2 weeks ago" --local
 repr generate --since monday --local
 ```
+**Note:** `repr generate` automatically skips commits that have already been processed into stories. You can safely run it multiple times without creating duplicates.
 ### Publish your profile (optional)
 ```bash

{repr_cli-0.2.14 → repr_cli-0.2.16}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "repr-cli"
-version = "0.2.14"
+version = "0.2.16"
 description = "A beautiful, privacy-first CLI that analyzes your code repositories and generates a compelling developer profile"
 readme = "README.md"
 license = {file = "LICENSE"}

{repr_cli-0.2.14 → repr_cli-0.2.16}/repr/cli.py RENAMED Viewed

@@ -491,6 +491,23 @@ def generate(
                 console.print(f"  [{BRAND_MUTED}]No commits found[/]")
             continue
+        # Filter out already-processed commits
+        from .storage import get_processed_commit_shas
+        processed_shas = get_processed_commit_shas(repo_name=repo_info.name)
+        original_count = len(commit_list)
+        commit_list = [c for c in commit_list if c["full_sha"] not in processed_shas]
+        if not json_output and processed_shas:
+            skipped_count = original_count - len(commit_list)
+            if skipped_count > 0:
+                console.print(f"  [{BRAND_MUTED}]Skipping {skipped_count} already-processed commits[/]")
+        if not commit_list:
+            if not json_output:
+                console.print(f"  [{BRAND_MUTED}]All commits already processed[/]")
+            continue
         # Dry run: show what would be sent
         if dry_run:
             from .openai_analysis import estimate_tokens, get_batch_size

{repr_cli-0.2.14 → repr_cli-0.2.16}/repr/config.py RENAMED Viewed

@@ -108,7 +108,7 @@ DEFAULT_CONFIG = {
     },
     "generation": {
         "batch_size": 5,  # Commits per story
-        "auto_generate_on_hook": False,  # Auto-generate when hook runs
+        "auto_generate_on_hook": True,  # Auto-generate when hook runs
         "default_template": "resume",  # Default story template
         "token_limit": 100000,  # Max tokens per cloud request
         "max_commits_per_batch": 50,  # Max commits per request

{repr_cli-0.2.14 → repr_cli-0.2.16}/repr/storage.py RENAMED Viewed

@@ -246,6 +246,38 @@ def update_story_metadata(story_id: str, updates: dict[str, Any]) -> bool:
         return False
+def get_processed_commit_shas(repo_name: str | None = None) -> set[str]:
+    """
+    Get all commit SHAs that have already been processed into stories.
+    Args:
+        repo_name: Optional filter by repository name
+    Returns:
+        Set of commit SHAs that have been processed
+    """
+    ensure_directories()
+    processed_shas = set()
+    for meta_path in STORIES_DIR.glob("*.json"):
+        try:
+            metadata = json.loads(meta_path.read_text())
+            # Filter by repo if specified
+            if repo_name and metadata.get("repo_name") != repo_name:
+                continue
+            # Collect commit SHAs from this story
+            commit_shas = metadata.get("commit_shas", [])
+            processed_shas.update(commit_shas)
+        except (json.JSONDecodeError, IOError):
+            continue
+    return processed_shas
 def get_unpushed_stories() -> list[dict[str, Any]]:
     """Get stories that haven't been pushed to cloud."""
     stories = list_stories()

{repr_cli-0.2.14 → repr_cli-0.2.16/repr_cli.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: repr-cli
-Version: 0.2.14
+Version: 0.2.16
 Summary: A beautiful, privacy-first CLI that analyzes your code repositories and generates a compelling developer profile
 Author-email: Repr <hello@repr.dev>
 License: MIT License
@@ -219,6 +219,8 @@ repr generate --since "2 weeks ago" --local
 repr generate --since monday --local
 ```
+**Note:** `repr generate` automatically skips commits that have already been processed into stories. You can safely run it multiple times without creating duplicates.
 ### Publish your profile (optional)
 ```bash

{repr_cli-0.2.14 → repr_cli-0.2.16}/repr_cli.egg-info/SOURCES.txt RENAMED Viewed

@@ -28,6 +28,7 @@ repr_cli.egg-info/dependency_links.txt
 repr_cli.egg-info/entry_points.txt
 repr_cli.egg-info/requires.txt
 repr_cli.egg-info/top_level.txt
+tests/test_deduplication.py
 tests/test_environment_variables.py
 tests/test_network_sandboxing.py
 tests/test_privacy_guarantees.py

repr_cli-0.2.16/tests/test_deduplication.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""
+Test commit deduplication in story generation.
+"""
+import json
+import tempfile
+from pathlib import Path
+from unittest.mock import patch
+import pytest
+from repr.storage import (
+    get_processed_commit_shas,
+    save_story,
+    STORIES_DIR,
+)
+@pytest.fixture
+def temp_stories_dir(monkeypatch):
+    """Use a temporary directory for stories during tests."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        stories_dir = Path(tmpdir) / "stories"
+        stories_dir.mkdir()
+        monkeypatch.setattr("repr.storage.STORIES_DIR", stories_dir)
+        yield stories_dir
+def test_get_processed_commit_shas_empty(temp_stories_dir):
+    """Test getting processed commits when no stories exist."""
+    processed = get_processed_commit_shas()
+    assert processed == set()
+def test_get_processed_commit_shas_single_story(temp_stories_dir):
+    """Test getting processed commits from a single story."""
+    # Save a story with some commits
+    metadata = {
+        "repo_name": "test-repo",
+        "commit_shas": ["abc123", "def456", "ghi789"],
+    }
+    save_story("Test story content", metadata)
+    # Get processed commits
+    processed = get_processed_commit_shas()
+    assert processed == {"abc123", "def456", "ghi789"}
+def test_get_processed_commit_shas_multiple_stories(temp_stories_dir):
+    """Test getting processed commits from multiple stories."""
+    # Save multiple stories
+    save_story("Story 1", {
+        "repo_name": "test-repo",
+        "commit_shas": ["abc123", "def456"],
+    })
+    save_story("Story 2", {
+        "repo_name": "test-repo",
+        "commit_shas": ["ghi789", "jkl012"],
+    })
+    save_story("Story 3", {
+        "repo_name": "test-repo",
+        "commit_shas": ["def456", "mno345"],  # def456 is duplicate
+    })
+    # Get all processed commits
+    processed = get_processed_commit_shas()
+    assert processed == {"abc123", "def456", "ghi789", "jkl012", "mno345"}
+def test_get_processed_commit_shas_filter_by_repo(temp_stories_dir):
+    """Test filtering processed commits by repository."""
+    # Save stories for different repos
+    save_story("Story 1", {
+        "repo_name": "repo-a",
+        "commit_shas": ["abc123", "def456"],
+    })
+    save_story("Story 2", {
+        "repo_name": "repo-b",
+        "commit_shas": ["ghi789", "jkl012"],
+    })
+    save_story("Story 3", {
+        "repo_name": "repo-a",
+        "commit_shas": ["mno345"],
+    })
+    # Get processed commits for repo-a only
+    processed_a = get_processed_commit_shas(repo_name="repo-a")
+    assert processed_a == {"abc123", "def456", "mno345"}
+    # Get processed commits for repo-b only
+    processed_b = get_processed_commit_shas(repo_name="repo-b")
+    assert processed_b == {"ghi789", "jkl012"}
+def test_get_processed_commit_shas_handles_missing_field(temp_stories_dir):
+    """Test handling stories without commit_shas field."""
+    # Save a story without commit_shas
+    save_story("Story without commits", {
+        "repo_name": "test-repo",
+        "summary": "Test story",
+    })
+    # Should not crash, just return empty set
+    processed = get_processed_commit_shas()
+    assert processed == set()
+def test_get_processed_commit_shas_handles_corrupt_json(temp_stories_dir):
+    """Test handling corrupt JSON files."""
+    # Create a corrupt JSON file
+    corrupt_file = temp_stories_dir / "corrupt.json"
+    corrupt_file.write_text("{ invalid json }")
+    # Should not crash, just skip the corrupt file
+    processed = get_processed_commit_shas()
+    assert processed == set()
+def test_commit_filtering_in_generate(temp_stories_dir):
+    """Test that generate command filters out processed commits."""
+    # Save a story with some commits
+    save_story("Existing story", {
+        "repo_name": "test-repo",
+        "commit_shas": ["abc123", "def456"],
+    })
+    # Simulate commit list from git
+    all_commits = [
+        {"full_sha": "abc123", "message": "Already processed 1"},
+        {"full_sha": "def456", "message": "Already processed 2"},
+        {"full_sha": "ghi789", "message": "New commit 1"},
+        {"full_sha": "jkl012", "message": "New commit 2"},
+    ]
+    # Filter out processed commits
+    processed_shas = get_processed_commit_shas(repo_name="test-repo")
+    filtered_commits = [c for c in all_commits if c["full_sha"] not in processed_shas]
+    # Should only have the new commits
+    assert len(filtered_commits) == 2
+    assert filtered_commits[0]["full_sha"] == "ghi789"
+    assert filtered_commits[1]["full_sha"] == "jkl012"