PyPI - photo-stack-finder - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

photo-stack-finder 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

orchestrator/__init__.py +2 -2
orchestrator/app.py +6 -11
orchestrator/build_pipeline.py +19 -21
orchestrator/orchestrator_runner.py +11 -8
orchestrator/pipeline_builder.py +126 -126
orchestrator/pipeline_orchestrator.py +604 -604
orchestrator/review_persistence.py +162 -162
orchestrator/static/orchestrator.css +76 -76
orchestrator/static/orchestrator.html +11 -5
orchestrator/static/orchestrator.js +3 -1
overlap_metrics/__init__.py +1 -1
overlap_metrics/config.py +135 -135
overlap_metrics/core.py +284 -284
overlap_metrics/estimators.py +292 -292
overlap_metrics/metrics.py +307 -307
overlap_metrics/registry.py +99 -99
overlap_metrics/utils.py +104 -104
photo_compare/__init__.py +1 -1
photo_compare/base.py +285 -285
photo_compare/config.py +225 -225
photo_compare/distance.py +15 -15
photo_compare/feature_methods.py +173 -173
photo_compare/file_hash.py +29 -29
photo_compare/hash_methods.py +99 -99
photo_compare/histogram_methods.py +118 -118
photo_compare/pixel_methods.py +58 -58
photo_compare/structural_methods.py +104 -104
photo_compare/types.py +28 -28
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
scripts/orchestrate.py +12 -10
utils/__init__.py +4 -3
utils/base_pipeline_stage.py +171 -171
utils/base_ports.py +176 -176
utils/benchmark_utils.py +823 -823
utils/channel.py +74 -74
utils/comparison_gates.py +40 -21
utils/compute_benchmarks.py +355 -355
utils/compute_identical.py +94 -24
utils/compute_indices.py +235 -235
utils/compute_perceptual_hash.py +127 -127
utils/compute_perceptual_match.py +240 -240
utils/compute_sha_bins.py +64 -20
utils/compute_template_similarity.py +1 -1
utils/compute_versions.py +483 -483
utils/config.py +8 -5
utils/data_io.py +83 -83
utils/graph_context.py +44 -44
utils/logger.py +2 -2
utils/models.py +2 -2
utils/photo_file.py +90 -91
utils/pipeline_graph.py +334 -334
utils/pipeline_stage.py +408 -408
utils/plot_helpers.py +123 -123
utils/ports.py +136 -136
utils/progress.py +415 -415
utils/report_builder.py +139 -139
utils/review_types.py +55 -55
utils/review_utils.py +10 -19
utils/sequence.py +10 -8
utils/sequence_clustering.py +1 -1
utils/template.py +57 -57
utils/template_parsing.py +71 -0
photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
{photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0

utils/report_builder.py CHANGED Viewed

@@ -1,139 +1,139 @@
-"""Report generation utility with fluent API for consistent formatting.
-This module provides a ReportBuilder class that simplifies the creation of
-text-based reports with consistent formatting across the codebase.
-Usage:
-    report = (
-        ReportBuilder()
-        .add_title("Benchmark Results")
-        .add_section("Performance Metrics")
-        .add_metric("Accuracy", 0.95, ".2%")
-        .add_blank_line()
-        .build()
-    )
-"""
-from pathlib import Path
-from typing import Any, Self
-class ReportBuilder:
-    """Fluent API for building text reports with consistent formatting.
-    Provides a chainable interface for adding formatted sections, metrics,
-    and separators to text reports. All methods return self to enable
-    method chaining.
-    Example:
-        >>> report = (
-        ...     ReportBuilder()
-        ...     .add_title("Results")
-        ...     .add_metric("Score", 0.85, ".2f")
-        ...     .build()
-        ... )
-    """
-    def __init__(self) -> None:
-        """Initialize an empty report builder."""
-        self._lines: list[str] = []
-    def add_title(self, text: str, width: int = 80) -> Self:
-        """Add a centered title with separator lines above and below.
-        Args:
-            text: The title text to display
-            width: Width of the separator lines (default: 80)
-        Returns:
-            Self for method chaining
-        """
-        self._lines.append("=" * width)
-        self._lines.append(text.center(width))
-        self._lines.append("=" * width)
-        return self
-    def add_section(self, header: str) -> Self:
-        """Add a section header with a single underline.
-        Args:
-            header: The section header text
-        Returns:
-            Self for method chaining
-        """
-        self._lines.append("")
-        self._lines.append(header)
-        self._lines.append("-" * len(header))
-        return self
-    def add_metric(self, name: str, value: Any, format_spec: str = "") -> Self:
-        """Add a formatted metric line.
-        Args:
-            name: The metric name (will be left-aligned)
-            value: The metric value to format
-            format_spec: Optional format specification (e.g., ".2f", ".2%")
-        Returns:
-            Self for method chaining
-        Example:
-            >>> builder.add_metric("Accuracy", 0.95, ".2%")  # "Accuracy: 95.00%"
-        """
-        if format_spec:
-            formatted_value = f"{value:{format_spec}}"
-        else:
-            formatted_value = str(value)
-        self._lines.append(f"{name}: {formatted_value}")
-        return self
-    def add_blank_line(self) -> Self:
-        """Add a blank line for spacing.
-        Returns:
-            Self for method chaining
-        """
-        self._lines.append("")
-        return self
-    def add_separator(self, char: str = "=", width: int = 80) -> Self:
-        """Add a separator line.
-        Args:
-            char: The character to use for the separator (default: "=")
-            width: Width of the separator line (default: 80)
-        Returns:
-            Self for method chaining
-        """
-        self._lines.append(char * width)
-        return self
-    def add_text(self, text: str) -> Self:
-        """Add arbitrary text (can be multi-line).
-        Args:
-            text: The text to add (newlines will be preserved)
-        Returns:
-            Self for method chaining
-        """
-        self._lines.append(text)
-        return self
-    def build(self) -> str:
-        """Build and return the complete report as a string.
-        Returns:
-            The formatted report with newline-separated lines
-        """
-        return "\n".join(self._lines)
-    def save(self, path: Path) -> None:
-        """Build the report and save it to a file.
-        Args:
-            path: The file path where the report should be saved
-        """
-        path.write_text(self.build(), encoding="utf-8")
+"""Report generation utility with fluent API for consistent formatting.
+This module provides a ReportBuilder class that simplifies the creation of
+text-based reports with consistent formatting across the codebase.
+Usage:
+    report = (
+        ReportBuilder()
+        .add_title("Benchmark Results")
+        .add_section("Performance Metrics")
+        .add_metric("Accuracy", 0.95, ".2%")
+        .add_blank_line()
+        .build()
+    )
+"""
+from pathlib import Path
+from typing import Any, Self
+class ReportBuilder:
+    """Fluent API for building text reports with consistent formatting.
+    Provides a chainable interface for adding formatted sections, metrics,
+    and separators to text reports. All methods return self to enable
+    method chaining.
+    Example:
+        >>> report = (
+        ...     ReportBuilder()
+        ...     .add_title("Results")
+        ...     .add_metric("Score", 0.85, ".2f")
+        ...     .build()
+        ... )
+    """
+    def __init__(self) -> None:
+        """Initialize an empty report builder."""
+        self._lines: list[str] = []
+    def add_title(self, text: str, width: int = 80) -> Self:
+        """Add a centered title with separator lines above and below.
+        Args:
+            text: The title text to display
+            width: Width of the separator lines (default: 80)
+        Returns:
+            Self for method chaining
+        """
+        self._lines.append("=" * width)
+        self._lines.append(text.center(width))
+        self._lines.append("=" * width)
+        return self
+    def add_section(self, header: str) -> Self:
+        """Add a section header with a single underline.
+        Args:
+            header: The section header text
+        Returns:
+            Self for method chaining
+        """
+        self._lines.append("")
+        self._lines.append(header)
+        self._lines.append("-" * len(header))
+        return self
+    def add_metric(self, name: str, value: Any, format_spec: str = "") -> Self:
+        """Add a formatted metric line.
+        Args:
+            name: The metric name (will be left-aligned)
+            value: The metric value to format
+            format_spec: Optional format specification (e.g., ".2f", ".2%")
+        Returns:
+            Self for method chaining
+        Example:
+            >>> builder.add_metric("Accuracy", 0.95, ".2%")  # "Accuracy: 95.00%"
+        """
+        if format_spec:
+            formatted_value = f"{value:{format_spec}}"
+        else:
+            formatted_value = str(value)
+        self._lines.append(f"{name}: {formatted_value}")
+        return self
+    def add_blank_line(self) -> Self:
+        """Add a blank line for spacing.
+        Returns:
+            Self for method chaining
+        """
+        self._lines.append("")
+        return self
+    def add_separator(self, char: str = "=", width: int = 80) -> Self:
+        """Add a separator line.
+        Args:
+            char: The character to use for the separator (default: "=")
+            width: Width of the separator line (default: 80)
+        Returns:
+            Self for method chaining
+        """
+        self._lines.append(char * width)
+        return self
+    def add_text(self, text: str) -> Self:
+        """Add arbitrary text (can be multi-line).
+        Args:
+            text: The text to add (newlines will be preserved)
+        Returns:
+            Self for method chaining
+        """
+        self._lines.append(text)
+        return self
+    def build(self) -> str:
+        """Build and return the complete report as a string.
+        Returns:
+            The formatted report with newline-separated lines
+        """
+        return "\n".join(self._lines)
+    def save(self, path: Path) -> None:
+        """Build the report and save it to a file.
+        Args:
+            path: The file path where the report should be saved
+        """
+        path.write_text(self.build(), encoding="utf-8")

utils/review_types.py CHANGED Viewed

@@ -1,55 +1,55 @@
-"""Type definitions for review decision persistence."""
-from __future__ import annotations
-from typing import Literal, TypedDict
-# Photo identifier using content hash + path (stable across runs)
-PhotoIdentifier = tuple[str, str]  # (sha256, relative_path)
-class IdenticalDecision(TypedDict):
-    """Decision for an identical group."""
-    type: Literal["identical"]
-    group_id: str  # SHA256 of sorted photo sha256s
-    timestamp: str  # ISO format
-    user: str
-    action: Literal["keep_all", "keep_exemplar", "delete_all", "custom"]
-    kept_photos: list[PhotoIdentifier]  # Photos to keep (empty if keep_all)
-    deleted_photos: list[PhotoIdentifier]  # Photos to delete (empty if keep_all)
-class SequenceDecision(TypedDict):
-    """Decision for a sequence similarity group."""
-    type: Literal["sequences"]
-    group_id: str  # SHA256 of template + sorted photo sha256s
-    timestamp: str  # ISO format
-    user: str
-    action: Literal["approved", "rejected"]
-    sequence_selections: dict[str, bool]  # sequence_name -> included
-    deleted_photos: list[PhotoIdentifier]  # Individual photos marked for deletion
-    deleted_rows: list[int]  # Row positions marked for deletion
-    deleted_sequences: list[int]  # Sequence indices marked for deletion
-class ReviewIndexEntry(TypedDict):
-    """Entry in the review index (loaded from JSONL)."""
-    group_id: str
-    decision_type: Literal["identical", "sequences"]
-    action: str
-    timestamp: str
-    user: str
-class DeletionIndexEntry(TypedDict):
-    """Entry in the deletion index."""
-    sha256: str
-    path: str
-    reason: str  # "identical_group", "sequence_group", "individual"
-    group_id: str
-    timestamp: str
-    user: str
+"""Type definitions for review decision persistence."""
+from __future__ import annotations
+from typing import Literal, TypedDict
+# Photo identifier using content hash + path (stable across runs)
+PhotoIdentifier = tuple[str, str]  # (sha256, relative_path)
+class IdenticalDecision(TypedDict):
+    """Decision for an identical group."""
+    type: Literal["identical"]
+    group_id: str  # SHA256 of sorted photo sha256s
+    timestamp: str  # ISO format
+    user: str
+    action: Literal["keep_all", "keep_exemplar", "delete_all", "custom"]
+    kept_photos: list[PhotoIdentifier]  # Photos to keep (empty if keep_all)
+    deleted_photos: list[PhotoIdentifier]  # Photos to delete (empty if keep_all)
+class SequenceDecision(TypedDict):
+    """Decision for a sequence similarity group."""
+    type: Literal["sequences"]
+    group_id: str  # SHA256 of template + sorted photo sha256s
+    timestamp: str  # ISO format
+    user: str
+    action: Literal["approved", "rejected"]
+    sequence_selections: dict[str, bool]  # sequence_name -> included
+    deleted_photos: list[PhotoIdentifier]  # Individual photos marked for deletion
+    deleted_rows: list[int]  # Row positions marked for deletion
+    deleted_sequences: list[int]  # Sequence indices marked for deletion
+class ReviewIndexEntry(TypedDict):
+    """Entry in the review index (loaded from JSONL)."""
+    group_id: str
+    decision_type: Literal["identical", "sequences"]
+    action: str
+    timestamp: str
+    user: str
+class DeletionIndexEntry(TypedDict):
+    """Entry in the deletion index."""
+    sha256: str
+    path: str
+    reason: str  # "identical_group", "sequence_group", "individual"
+    group_id: str
+    timestamp: str
+    user: str

utils/review_utils.py CHANGED Viewed

@@ -32,18 +32,16 @@ from .sequence import (
 def build_identical_group(eq_class: list[PhotoFile], exemplar_id: int) -> IdenticalGroup:
-    """Create review data structure from a list of identical photos."""
+    """Create review data structure from a list of identical photos.
+    Uses pre-computed dimensions from PhotoFile (no file I/O required).
+    """
     # Create stable group_id from sorted photo IDs
     photo_ids: list[int] = sorted([pf.id for pf in eq_class])
     group_id: str = hashlib.sha256("".join(str(id) for id in photo_ids).encode()).hexdigest()
     photos: list[IdenticalPhoto] = []
     for pf in eq_class:
-        # Get canonical dimensions (may trigger rotation detection if not cached)
-        with pf.image_data() as img:
-            width = img.get_width()
-            height = img.get_height()
         # Production code: path should never be None
         assert pf.path is not None, f"Photo {pf.id} has None path in production code"
@@ -52,10 +50,10 @@ def build_identical_group(eq_class: list[PhotoFile], exemplar_id: int) -> Identi
                 id=pf.id,
                 path=str(pf.path),
                 filename=pf.path.name,
-                is_exemplar="IDENTICAL" in pf.cache,
+                is_exemplar="IDENTICAL" not in pf.cache,
                 file_size=pf.size_bytes,
-                width=width,
-                height=height,
+                width=pf.width,  # Use pre-computed dimension
+                height=pf.height,  # Use pre-computed dimension
             )
         )
@@ -183,16 +181,9 @@ def _dataframe_to_group_dict(df: pd.DataFrame, reference: PhotoFileSeries) -> Se
                 # In production, path is never None, but test fixtures need this
                 filename = photo.path.name if photo.path is not None else f"test_photo_{photo.id}.jpg"
-                # Get canonical dimensions (may trigger rotation detection if not cached)
-                # For test fixtures with path=None, use placeholder values
-                if photo.path is not None:
-                    with photo.image_data() as img:
-                        width = img.get_width()
-                        height = img.get_height()
-                else:
-                    # Test fixture: estimate dimensions from pixels assuming square
-                    width = int(math.sqrt(photo.pixels))
-                    height = int(math.sqrt(photo.pixels))
+                # Use pre-computed dimensions (loaded during PhotoFile.__init__)
+                width = photo.width
+                height = photo.height
                 photos.append(
                     SequencePhoto(

utils/sequence.py CHANGED Viewed

@@ -1,10 +1,10 @@
-"""Sequence data structures for photo deduplication pipeline.
+"""Sequence data structures for photo stack finding pipeline.
 This module provides dict-based data structures that replace pandas for type safety
 and pickle reliability, while maintaining minimal pandas usage for specific algorithms.
 Core Types:
-    INDEX_T: Type alias for tuple[str, ...], used as multi-field index keys
+    INDEX_T: Type alias for tuple[str, ...] (from template_parsing), used as multi-field index keys
     PhotoFileSeries: dict[INDEX_T, PhotoFile] with template name and pd.Series-like API
     PhotoSequence: Hierarchical forest structure containing reference + similar sequences
@@ -54,12 +54,15 @@ import pandas as pd
 from .comparison_gates import GateSequence
 from .config import CONFIG
 from .photo_file import PhotoFile
+from .template_parsing import INDEX_T
-# Type alias for multi-field index keys used in PhotoFileSeries
-# Each tuple element represents a field extracted from the filename pattern
-# Example: ("IMG", "001", "2024") for template "prefix_{P0}_{P1}_{P2}.jpg"
-# Variable-length tuple allows different sequences to have different numbers of fields
-INDEX_T = tuple[str, ...]
+__all__ = [
+    "INDEX_T",
+    "PhotoFileSeries",
+    "PhotoSequence",
+    "count_forest_ref_photos",
+    "count_forest_total_photos",
+]
 class PhotoFileSeries(dict[INDEX_T, PhotoFile]):
@@ -844,6 +847,5 @@ def extend_reference_sequence(
     return result
 # Moved to sequence_clustering.py to avoid circular import with review_utils
 # Import from sequence_clustering instead: from .sequence_clustering import cluster_similar_sequences

utils/sequence_clustering.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Sequence clustering algorithm for photo deduplication pipeline.
+"""Sequence clustering algorithm for photo stack finding pipeline.
 This module provides the core clustering algorithm used by multiple pipeline stages
 (ComputeIndices, ComputeTemplateSimilarity, ComputePerceptualMatch) to group similar

utils/template.py CHANGED Viewed

@@ -1,57 +1,57 @@
-"""Template utilities for partial string substitution.
-Provides template extraction and partial substitution using str.format_map().
-"""
-from __future__ import annotations
-class DefaultDict(dict[str, str]):
-    """Dictionary that returns placeholder for missing keys during format_map().
-    This allows partial substitution of template strings, leaving unmatched
-    placeholders in their original form.
-    Example:
-            >>> template = "IMG_{P0}_{P1}_{P2}"
-            >>> values = {'P0': '1234', 'P1': '5678'}
-            >>> result = template.format_map(DefaultDict(values))
-            >>> print(result)
-            IMG_1234_5678_{P2}
-    """
-    def __missing__(self, key: str) -> str:
-        """Return the key wrapped in braces for missing keys.
-        Args:
-                key: The missing key
-        Returns:
-                String of the form "{key}"
-        """
-        return f"{{{key}}}"
-def partial_format(template: str, values: dict[str, str]) -> str:
-    """Perform partial string substitution on a template.
-    Substitutes available values and leaves missing placeholders unchanged.
-    Args:
-            template: Template string with {P0}, {P1}, etc. placeholders
-            values: Dictionary of values to substitute
-    Returns:
-            Partially formatted string
-    Example:
-            >>> partial_format("IMG_{P0}_{P1}_{P2}", {'P0': '1234', 'P1': '5678'})
-            'IMG_1234_5678_{P2}'
-            >>> partial_format("IMG_{P0}_{P1}", {'P0': '1234', 'P1': '5678'})
-            'IMG_1234_5678'
-            >>> partial_format("IMG_{P0}_{P1}_{P2}", {})
-            'IMG_{P0}_{P1}_{P2}'
-    """
-    return template.format_map(DefaultDict(values))
+"""Template utilities for partial string substitution.
+Provides template extraction and partial substitution using str.format_map().
+"""
+from __future__ import annotations
+class DefaultDict(dict[str, str]):
+    """Dictionary that returns placeholder for missing keys during format_map().
+    This allows partial substitution of template strings, leaving unmatched
+    placeholders in their original form.
+    Example:
+            >>> template = "IMG_{P0}_{P1}_{P2}"
+            >>> values = {'P0': '1234', 'P1': '5678'}
+            >>> result = template.format_map(DefaultDict(values))
+            >>> print(result)
+            IMG_1234_5678_{P2}
+    """
+    def __missing__(self, key: str) -> str:
+        """Return the key wrapped in braces for missing keys.
+        Args:
+                key: The missing key
+        Returns:
+                String of the form "{key}"
+        """
+        return f"{{{key}}}"
+def partial_format(template: str, values: dict[str, str]) -> str:
+    """Perform partial string substitution on a template.
+    Substitutes available values and leaves missing placeholders unchanged.
+    Args:
+            template: Template string with {P0}, {P1}, etc. placeholders
+            values: Dictionary of values to substitute
+    Returns:
+            Partially formatted string
+    Example:
+            >>> partial_format("IMG_{P0}_{P1}_{P2}", {'P0': '1234', 'P1': '5678'})
+            'IMG_1234_5678_{P2}'
+            >>> partial_format("IMG_{P0}_{P1}", {'P0': '1234', 'P1': '5678'})
+            'IMG_1234_5678'
+            >>> partial_format("IMG_{P0}_{P1}_{P2}", {})
+            'IMG_{P0}_{P1}_{P2}'
+    """
+    return template.format_map(DefaultDict(values))

photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

photo-stack-finder 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl