PyPI - photo-stack-finder - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

photo-stack-finder 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

orchestrator/__init__.py +2 -2
orchestrator/app.py +6 -11
orchestrator/build_pipeline.py +19 -21
orchestrator/orchestrator_runner.py +11 -8
orchestrator/pipeline_builder.py +126 -126
orchestrator/pipeline_orchestrator.py +604 -604
orchestrator/review_persistence.py +162 -162
orchestrator/static/orchestrator.css +76 -76
orchestrator/static/orchestrator.html +11 -5
orchestrator/static/orchestrator.js +3 -1
overlap_metrics/__init__.py +1 -1
overlap_metrics/config.py +135 -135
overlap_metrics/core.py +284 -284
overlap_metrics/estimators.py +292 -292
overlap_metrics/metrics.py +307 -307
overlap_metrics/registry.py +99 -99
overlap_metrics/utils.py +104 -104
photo_compare/__init__.py +1 -1
photo_compare/base.py +285 -285
photo_compare/config.py +225 -225
photo_compare/distance.py +15 -15
photo_compare/feature_methods.py +173 -173
photo_compare/file_hash.py +29 -29
photo_compare/hash_methods.py +99 -99
photo_compare/histogram_methods.py +118 -118
photo_compare/pixel_methods.py +58 -58
photo_compare/structural_methods.py +104 -104
photo_compare/types.py +28 -28
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
scripts/orchestrate.py +12 -10
utils/__init__.py +4 -3
utils/base_pipeline_stage.py +171 -171
utils/base_ports.py +176 -176
utils/benchmark_utils.py +823 -823
utils/channel.py +74 -74
utils/comparison_gates.py +40 -21
utils/compute_benchmarks.py +355 -355
utils/compute_identical.py +94 -24
utils/compute_indices.py +235 -235
utils/compute_perceptual_hash.py +127 -127
utils/compute_perceptual_match.py +240 -240
utils/compute_sha_bins.py +64 -20
utils/compute_template_similarity.py +1 -1
utils/compute_versions.py +483 -483
utils/config.py +8 -5
utils/data_io.py +83 -83
utils/graph_context.py +44 -44
utils/logger.py +2 -2
utils/models.py +2 -2
utils/photo_file.py +90 -91
utils/pipeline_graph.py +334 -334
utils/pipeline_stage.py +408 -408
utils/plot_helpers.py +123 -123
utils/ports.py +136 -136
utils/progress.py +415 -415
utils/report_builder.py +139 -139
utils/review_types.py +55 -55
utils/review_utils.py +10 -19
utils/sequence.py +10 -8
utils/sequence_clustering.py +1 -1
utils/template.py +57 -57
utils/template_parsing.py +71 -0
photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0

orchestrator/review_persistence.py CHANGED Viewed

@@ -1,162 +1,162 @@
-"""Functions for persisting and loading review decisions."""
-from __future__ import annotations
-import csv
-import json
-from pathlib import Path
-from typing import Any
-from utils.review_types import (
-    DeletionIndexEntry,
-    IdenticalDecision,
-    PhotoIdentifier,
-    ReviewIndexEntry,
-    SequenceDecision,
-)
-def append_decision_to_log(decision: IdenticalDecision | SequenceDecision, work_dir: Path) -> None:
-    """Append a review decision to the JSONL log.
-    Args:
-        decision: Decision object (IdenticalDecision or SequenceDecision)
-        work_dir: Work directory containing the log file
-    """
-    log_path: Path = work_dir / "review_decisions.jsonl"
-    # Ensure work directory exists
-    work_dir.mkdir(parents=True, exist_ok=True)
-    # Append decision as JSON line
-    with log_path.open("a", encoding="utf-8") as f:
-        json.dump(decision, f, ensure_ascii=False)
-        f.write("\n")
-def build_review_index(work_dir: Path) -> dict[str, Any]:
-    """Build in-memory indices from JSONL log and generate CSV files.
-    Reads review_decisions.jsonl and creates:
-    - review_index_identical.csv: identical group decisions
-    - review_index_sequences.csv: sequence group decisions
-    - review_index_deletions.csv: individual photo deletions
-    Args:
-        work_dir: Work directory containing the log file
-    Returns:
-        Dictionary with 'identical', 'sequences', and 'deletions' indices
-    """
-    log_path: Path = work_dir / "review_decisions.jsonl"
-    if not log_path.exists():
-        # No decisions yet, return empty indices
-        return {"identical": {}, "sequences": {}, "deletions": {}}
-    # Parse JSONL log
-    identical_index: dict[str, ReviewIndexEntry] = {}
-    sequences_index: dict[str, ReviewIndexEntry] = {}
-    deletions_index: dict[PhotoIdentifier, DeletionIndexEntry] = {}
-    with log_path.open(encoding="utf-8") as f:
-        for _line_num, raw_line in enumerate(f, 1):
-            line = raw_line.strip()
-            if not line:
-                continue
-            try:
-                decision: dict[str, Any] = json.loads(line)
-                decision_type: str | None = decision.get("type")
-                if decision_type == "identical":
-                    # Index identical group decision
-                    group_id: str = decision["group_id"]
-                    identical_index[group_id] = {
-                        "group_id": group_id,
-                        "decision_type": "identical",
-                        "action": decision["action"],
-                        "timestamp": decision["timestamp"],
-                        "user": decision["user"],
-                    }
-                    # Index deletions
-                    deleted_photos: list[tuple[str, str]] = decision.get("deleted_photos", [])
-                    sha256: str
-                    path: str
-                    for sha256, path in deleted_photos:
-                        deletions_index[(sha256, path)] = {
-                            "sha256": sha256,
-                            "path": path,
-                            "reason": "identical_group",
-                            "group_id": group_id,
-                            "timestamp": decision["timestamp"],
-                            "user": decision["user"],
-                        }
-                elif decision_type == "sequences":
-                    # Index sequence group decision
-                    seq_group_id: str = decision["group_id"]
-                    sequences_index[seq_group_id] = {
-                        "group_id": seq_group_id,
-                        "decision_type": "sequences",
-                        "action": decision["action"],
-                        "timestamp": decision["timestamp"],
-                        "user": decision["user"],
-                    }
-                    # Index deletions
-                    seq_deleted_photos: list[tuple[str, str]] = decision.get("deleted_photos", [])
-                    seq_sha256: str
-                    seq_path: str
-                    for seq_sha256, seq_path in seq_deleted_photos:
-                        deletions_index[(seq_sha256, seq_path)] = {
-                            "sha256": seq_sha256,
-                            "path": seq_path,
-                            "reason": "sequence_group",
-                            "group_id": seq_group_id,
-                            "timestamp": decision["timestamp"],
-                            "user": decision["user"],
-                        }
-            except json.JSONDecodeError:
-                continue
-    # Write CSV indices
-    _write_csv_index(
-        work_dir / "review_index_identical.csv",
-        identical_index.values(),
-        ["group_id", "decision_type", "action", "timestamp", "user"],
-    )
-    _write_csv_index(
-        work_dir / "review_index_sequences.csv",
-        sequences_index.values(),
-        ["group_id", "decision_type", "action", "timestamp", "user"],
-    )
-    _write_csv_index(
-        work_dir / "review_index_deletions.csv",
-        deletions_index.values(),
-        ["sha256", "path", "reason", "group_id", "timestamp", "user"],
-    )
-    return {
-        "identical": identical_index,
-        "sequences": sequences_index,
-        "deletions": deletions_index,
-    }
-def _write_csv_index(path: Path, rows: list[dict[str, Any]] | Any, fieldnames: list[str]) -> None:
-    """Write index data to CSV file.
-    Args:
-        path: Output CSV file path
-        rows: Iterable of dictionaries to write
-        fieldnames: CSV column names
-    """
-    with path.open("w", newline="", encoding="utf-8") as f:
-        writer = csv.DictWriter(f, fieldnames=fieldnames)
-        writer.writeheader()
-        writer.writerows(rows)
+"""Functions for persisting and loading review decisions."""
+from __future__ import annotations
+import csv
+import json
+from pathlib import Path
+from typing import Any
+from utils.review_types import (
+    DeletionIndexEntry,
+    IdenticalDecision,
+    PhotoIdentifier,
+    ReviewIndexEntry,
+    SequenceDecision,
+)
+def append_decision_to_log(decision: IdenticalDecision | SequenceDecision, work_dir: Path) -> None:
+    """Append a review decision to the JSONL log.
+    Args:
+        decision: Decision object (IdenticalDecision or SequenceDecision)
+        work_dir: Work directory containing the log file
+    """
+    log_path: Path = work_dir / "review_decisions.jsonl"
+    # Ensure work directory exists
+    work_dir.mkdir(parents=True, exist_ok=True)
+    # Append decision as JSON line
+    with log_path.open("a", encoding="utf-8") as f:
+        json.dump(decision, f, ensure_ascii=False)
+        f.write("\n")
+def build_review_index(work_dir: Path) -> dict[str, Any]:
+    """Build in-memory indices from JSONL log and generate CSV files.
+    Reads review_decisions.jsonl and creates:
+    - review_index_identical.csv: identical group decisions
+    - review_index_sequences.csv: sequence group decisions
+    - review_index_deletions.csv: individual photo deletions
+    Args:
+        work_dir: Work directory containing the log file
+    Returns:
+        Dictionary with 'identical', 'sequences', and 'deletions' indices
+    """
+    log_path: Path = work_dir / "review_decisions.jsonl"
+    if not log_path.exists():
+        # No decisions yet, return empty indices
+        return {"identical": {}, "sequences": {}, "deletions": {}}
+    # Parse JSONL log
+    identical_index: dict[str, ReviewIndexEntry] = {}
+    sequences_index: dict[str, ReviewIndexEntry] = {}
+    deletions_index: dict[PhotoIdentifier, DeletionIndexEntry] = {}
+    with log_path.open(encoding="utf-8") as f:
+        for _line_num, raw_line in enumerate(f, 1):
+            line = raw_line.strip()
+            if not line:
+                continue
+            try:
+                decision: dict[str, Any] = json.loads(line)
+                decision_type: str | None = decision.get("type")
+                if decision_type == "identical":
+                    # Index identical group decision
+                    group_id: str = decision["group_id"]
+                    identical_index[group_id] = {
+                        "group_id": group_id,
+                        "decision_type": "identical",
+                        "action": decision["action"],
+                        "timestamp": decision["timestamp"],
+                        "user": decision["user"],
+                    }
+                    # Index deletions
+                    deleted_photos: list[tuple[str, str]] = decision.get("deleted_photos", [])
+                    sha256: str
+                    path: str
+                    for sha256, path in deleted_photos:
+                        deletions_index[(sha256, path)] = {
+                            "sha256": sha256,
+                            "path": path,
+                            "reason": "identical_group",
+                            "group_id": group_id,
+                            "timestamp": decision["timestamp"],
+                            "user": decision["user"],
+                        }
+                elif decision_type == "sequences":
+                    # Index sequence group decision
+                    seq_group_id: str = decision["group_id"]
+                    sequences_index[seq_group_id] = {
+                        "group_id": seq_group_id,
+                        "decision_type": "sequences",
+                        "action": decision["action"],
+                        "timestamp": decision["timestamp"],
+                        "user": decision["user"],
+                    }
+                    # Index deletions
+                    seq_deleted_photos: list[tuple[str, str]] = decision.get("deleted_photos", [])
+                    seq_sha256: str
+                    seq_path: str
+                    for seq_sha256, seq_path in seq_deleted_photos:
+                        deletions_index[(seq_sha256, seq_path)] = {
+                            "sha256": seq_sha256,
+                            "path": seq_path,
+                            "reason": "sequence_group",
+                            "group_id": seq_group_id,
+                            "timestamp": decision["timestamp"],
+                            "user": decision["user"],
+                        }
+            except json.JSONDecodeError:
+                continue
+    # Write CSV indices
+    _write_csv_index(
+        work_dir / "review_index_identical.csv",
+        identical_index.values(),
+        ["group_id", "decision_type", "action", "timestamp", "user"],
+    )
+    _write_csv_index(
+        work_dir / "review_index_sequences.csv",
+        sequences_index.values(),
+        ["group_id", "decision_type", "action", "timestamp", "user"],
+    )
+    _write_csv_index(
+        work_dir / "review_index_deletions.csv",
+        deletions_index.values(),
+        ["sha256", "path", "reason", "group_id", "timestamp", "user"],
+    )
+    return {
+        "identical": identical_index,
+        "sequences": sequences_index,
+        "deletions": deletions_index,
+    }
+def _write_csv_index(path: Path, rows: list[dict[str, Any]] | Any, fieldnames: list[str]) -> None:
+    """Write index data to CSV file.
+    Args:
+        path: Output CSV file path
+        rows: Iterable of dictionaries to write
+        fieldnames: CSV column names
+    """
+    with path.open("w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)

orchestrator/static/orchestrator.css CHANGED Viewed

@@ -1,4 +1,4 @@
-/* Photo Dedup Orchestrator Styles */
+/* Photo Stack Finder Orchestrator Styles */
 :root {
     --primary-color: #2563eb;
@@ -520,78 +520,78 @@ h2 {
         max-height: 90vh;
     }
 }
-/* Info Section (Getting Started) */
-.info-section {
-    background: linear-gradient(135deg, #f0f9ff, #e0f2fe);
-    border-left: 4px solid var(--primary-color);
-    border-radius: 8px;
-    padding: 0;
-    margin-bottom: 2rem;
-    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
-}
-.info-section details summary {
-    padding: 1rem 1.5rem;
-    cursor: pointer;
-    user-select: none;
-    font-size: 1.1rem;
-    color: var(--primary-color);
-    list-style: none;
-}
-.info-section details summary::-webkit-details-marker {
-    display: none;
-}
-.info-section details[open] summary {
-    border-bottom: 1px solid var(--border-color);
-    margin-bottom: 1rem;
-}
-.info-section .info-content {
-    padding: 0 1.5rem 1.5rem 1.5rem;
-}
-.info-section h3 {
-    color: var(--text-primary);
-    margin-top: 1rem;
-    margin-bottom: 0.5rem;
-    font-size: 1.1rem;
-}
-.info-section ul, .info-section ol {
-    margin-left: 1.5rem;
-    margin-bottom: 1rem;
-}
-.info-section li {
-    margin-bottom: 0.5rem;
-}
-.info-section .checklist {
-    list-style: none;
-    margin-left: 0;
-}
-.info-section .checklist li {
-    padding-left: 1.5rem;
-    position: relative;
-}
-.info-section code {
-    background: rgba(0, 0, 0, 0.05);
-    padding: 0.2rem 0.4rem;
-    border-radius: 3px;
-    font-family: 'Consolas', 'Monaco', monospace;
-    font-size: 0.9em;
-}
-.info-section a {
-    color: var(--primary-color);
-    text-decoration: none;
-}
-.info-section a:hover {
-    text-decoration: underline;
-}
+/* Info Section (Getting Started) */
+.info-section {
+    background: linear-gradient(135deg, #f0f9ff, #e0f2fe);
+    border-left: 4px solid var(--primary-color);
+    border-radius: 8px;
+    padding: 0;
+    margin-bottom: 2rem;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
+}
+.info-section details summary {
+    padding: 1rem 1.5rem;
+    cursor: pointer;
+    user-select: none;
+    font-size: 1.1rem;
+    color: var(--primary-color);
+    list-style: none;
+}
+.info-section details summary::-webkit-details-marker {
+    display: none;
+}
+.info-section details[open] summary {
+    border-bottom: 1px solid var(--border-color);
+    margin-bottom: 1rem;
+}
+.info-section .info-content {
+    padding: 0 1.5rem 1.5rem 1.5rem;
+}
+.info-section h3 {
+    color: var(--text-primary);
+    margin-top: 1rem;
+    margin-bottom: 0.5rem;
+    font-size: 1.1rem;
+}
+.info-section ul, .info-section ol {
+    margin-left: 1.5rem;
+    margin-bottom: 1rem;
+}
+.info-section li {
+    margin-bottom: 0.5rem;
+}
+.info-section .checklist {
+    list-style: none;
+    margin-left: 0;
+}
+.info-section .checklist li {
+    padding-left: 1.5rem;
+    position: relative;
+}
+.info-section code {
+    background: rgba(0, 0, 0, 0.05);
+    padding: 0.2rem 0.4rem;
+    border-radius: 3px;
+    font-family: 'Consolas', 'Monaco', monospace;
+    font-size: 0.9em;
+}
+.info-section a {
+    color: var(--primary-color);
+    text-decoration: none;
+}
+.info-section a:hover {
+    text-decoration: underline;
+}

orchestrator/static/orchestrator.html CHANGED Viewed

@@ -3,14 +3,14 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Photo Dedup Orchestrator</title>
+    <title>Photo Stack Finder Orchestrator</title>
     <link rel="icon" type="image/svg+xml" href="/static/favicon.svg">
     <link rel="stylesheet" href="/static/orchestrator.css">
 </head>
 <body>
     <div class="container">
         <header>
-            <h1>📸 Photo Deduplication Orchestrator</h1>
+            <h1>📸 Photo Stack Finder Orchestrator</h1>
             <p>Automated pipeline for finding and reviewing duplicate photos</p>
         </header>
@@ -22,7 +22,7 @@
                     <div class="info-content">
                         <h3>What This Tool Does</h3>
                         <p>
-                            Photo Dedup finds photos that originate from the <strong>same source image</strong>:
+                            Photo Stack Finder finds photos that originate from the <strong>same source image</strong>:
                             byte-identical files, different resolutions (low-res vs. high-res), edited versions vs. originals,
                             rotation variants, format conversions (JPEG vs. HEIC), and cloud sync duplicates like
                             <code>IMG_1234.jpg</code> and <code>IMG_1234(1).jpg</code>.
@@ -63,7 +63,7 @@
                         <p>
                             <strong>📖 Detailed Guide:</strong> See
-                            <a href="https://github.com/gbarrett28/photo_dedup/blob/master/GETTING_STARTED.md" target="_blank" rel="noopener">
+                            <a href="https://github.com/gbarrett28/photo_stack_finder/blob/master/GETTING_STARTED.md" target="_blank" rel="noopener">
                                 GETTING_STARTED.md
                             </a>
                             for step-by-step instructions including Google Takeout export.
@@ -84,7 +84,7 @@
                                    placeholder="/path/to/your/photos">
                             <button type="button" id="browse-source" class="btn-browse">📁 Browse</button>
                         </div>
-                        <small>Directory containing your photos to deduplicate</small>
+                        <small>Directory containing your photos to analyze</small>
                     </div>
                     <div class="form-group">
@@ -120,6 +120,12 @@
                                         Debug Mode (sequential processing)
                                     </label>
                                 </div>
+                                <div class="form-group">
+                                    <label for="skip-byte-identical">
+                                        <input type="checkbox" id="skip-byte-identical" name="skip_byte_identical" checked>
+                                        Skip byte-identical detection (trust SHA256 uniqueness)
+                                    </label>
+                                </div>
                             </div>
                             <div class="option-group">

orchestrator/static/orchestrator.js CHANGED Viewed

@@ -1,4 +1,4 @@
-// Photo Dedup Orchestrator Client-side Logic
+// Photo Stack Finder Orchestrator Client-side Logic
 // State
 let currentBrowsePath = '';
@@ -313,6 +313,7 @@ function populateForm(config) {
     document.getElementById('max-workers').value = config.max_workers || '';
     document.getElementById('batch-size').value = config.batch_size || '';
     document.getElementById('debug-mode').checked = config.debug_mode || false;
+    document.getElementById('skip-byte-identical').checked = config.skip_byte_identical !== false; // Default true
     document.getElementById('comparison-method').value = config.comparison_method || 'SSIM';
     document.getElementById('ssim-threshold').value = config.gate_thresholds?.SSIM || 0.95;
@@ -369,6 +370,7 @@ async function handleFormSubmit(event) {
         max_workers: maxWorkers ? parseInt(maxWorkers) : null,
         batch_size: batchSize ? parseInt(batchSize) : null,
         debug_mode: formData.get('debug_mode') === 'on',
+        skip_byte_identical: formData.get('skip_byte_identical') === 'on',
         comparison_method: formData.get('comparison_method'),
         gate_thresholds: {
             SSIM: parseFloat(formData.get('ssim_threshold')),

overlap_metrics/__init__.py CHANGED Viewed

@@ -40,7 +40,7 @@ if version.parse(scipy.__version__) < version.parse(MINIMUM_SCIPY_VERSION):
 # Version info
 __version__ = "1.0.0"
-__author__ = "Photo Deduplication Team"
+__author__ = "Photo Stack Finder Team"
 __description__ = "Distribution separation and overlap metrics with pluggable estimators"
 # Default metric suite for compute_suite()

photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

photo-stack-finder 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl