photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/app.py +6 -11
  3. orchestrator/build_pipeline.py +19 -21
  4. orchestrator/orchestrator_runner.py +11 -8
  5. orchestrator/pipeline_builder.py +126 -126
  6. orchestrator/pipeline_orchestrator.py +604 -604
  7. orchestrator/review_persistence.py +162 -162
  8. orchestrator/static/orchestrator.css +76 -76
  9. orchestrator/static/orchestrator.html +11 -5
  10. orchestrator/static/orchestrator.js +3 -1
  11. overlap_metrics/__init__.py +1 -1
  12. overlap_metrics/config.py +135 -135
  13. overlap_metrics/core.py +284 -284
  14. overlap_metrics/estimators.py +292 -292
  15. overlap_metrics/metrics.py +307 -307
  16. overlap_metrics/registry.py +99 -99
  17. overlap_metrics/utils.py +104 -104
  18. photo_compare/__init__.py +1 -1
  19. photo_compare/base.py +285 -285
  20. photo_compare/config.py +225 -225
  21. photo_compare/distance.py +15 -15
  22. photo_compare/feature_methods.py +173 -173
  23. photo_compare/file_hash.py +29 -29
  24. photo_compare/hash_methods.py +99 -99
  25. photo_compare/histogram_methods.py +118 -118
  26. photo_compare/pixel_methods.py +58 -58
  27. photo_compare/structural_methods.py +104 -104
  28. photo_compare/types.py +28 -28
  29. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
  30. photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
  31. scripts/orchestrate.py +12 -10
  32. utils/__init__.py +4 -3
  33. utils/base_pipeline_stage.py +171 -171
  34. utils/base_ports.py +176 -176
  35. utils/benchmark_utils.py +823 -823
  36. utils/channel.py +74 -74
  37. utils/comparison_gates.py +40 -21
  38. utils/compute_benchmarks.py +355 -355
  39. utils/compute_identical.py +94 -24
  40. utils/compute_indices.py +235 -235
  41. utils/compute_perceptual_hash.py +127 -127
  42. utils/compute_perceptual_match.py +240 -240
  43. utils/compute_sha_bins.py +64 -20
  44. utils/compute_template_similarity.py +1 -1
  45. utils/compute_versions.py +483 -483
  46. utils/config.py +8 -5
  47. utils/data_io.py +83 -83
  48. utils/graph_context.py +44 -44
  49. utils/logger.py +2 -2
  50. utils/models.py +2 -2
  51. utils/photo_file.py +90 -91
  52. utils/pipeline_graph.py +334 -334
  53. utils/pipeline_stage.py +408 -408
  54. utils/plot_helpers.py +123 -123
  55. utils/ports.py +136 -136
  56. utils/progress.py +415 -415
  57. utils/report_builder.py +139 -139
  58. utils/review_types.py +55 -55
  59. utils/review_utils.py +10 -19
  60. utils/sequence.py +10 -8
  61. utils/sequence_clustering.py +1 -1
  62. utils/template.py +57 -57
  63. utils/template_parsing.py +71 -0
  64. photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
  65. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
  66. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
  67. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
  68. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
utils/config.py CHANGED
@@ -1,4 +1,4 @@
1
- """Global configuration for photo_dedup project."""
1
+ """Global configuration for photo_stack_finder project."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -22,6 +22,9 @@ class ProcessingConfig:
22
22
  # Random seeds for reproducibility
23
23
  DEFAULT_RANDOM_SEED: int = 12345
24
24
 
25
+ # Byte-identical detection (skip to trust SHA256 uniqueness)
26
+ SKIP_BYTE_IDENTICAL: bool = True
27
+
25
28
  # Sequential gates configuration (order matters)
26
29
  # Gates are executed in order, short-circuiting on first failure
27
30
  # "aspect_ratio" is special gate, others are photo_compare methods
@@ -76,7 +79,7 @@ class ProcessingConfig:
76
79
  TARGET_FPR: float = 0.00075 # Target false positive rate for benchmark thresholds
77
80
 
78
81
  # Logging configuration
79
- LOG_LEVEL: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL
82
+ LOG_LEVEL: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL
80
83
 
81
84
 
82
85
  @dataclass
@@ -105,7 +108,7 @@ class PathsConfig:
105
108
 
106
109
  SOURCE_DIR: str = ""
107
110
  WORK_DIR: str = ""
108
- WORK_DIR_NAME: str = "photo_dedup"
111
+ WORK_DIR_NAME: str = "photo_stack_finder"
109
112
 
110
113
  # ALL filenames are now consistently abstracted as configurable constants
111
114
  # Pipeline data files
@@ -316,7 +319,7 @@ class OrchestratorConfig:
316
319
  """Configuration for orchestrator server and UI."""
317
320
 
318
321
  # Directory naming
319
- DEFAULT_WORK_DIR_NAME: str = "photo_dedup" # Default work directory name (relative to source parent)
322
+ DEFAULT_WORK_DIR_NAME: str = "photo_stack_finder" # Default work directory name (relative to source parent)
320
323
 
321
324
  # Static asset paths (computed relative to package)
322
325
  # These are set dynamically and should not be overridden
@@ -344,7 +347,7 @@ ARG_CONFIG_MAP: list[tuple[str, str, Any, bool]] = [
344
347
 
345
348
  @dataclass
346
349
  class Config:
347
- """Global configuration for photo_dedup."""
350
+ """Global configuration for photo_stack_finder."""
348
351
 
349
352
  processing: ProcessingConfig
350
353
  sequences: SequenceConfig
utils/data_io.py CHANGED
@@ -1,83 +1,83 @@
1
- """Data I/O utilities for CSV file handling with error handling.
2
-
3
- This module provides standardized functions for loading and saving CSV files
4
- with consistent error handling, logging, and user feedback.
5
- """
6
-
7
- import logging
8
- import sys
9
- from pathlib import Path
10
- from typing import Any
11
-
12
- import pandas as pd
13
-
14
- # Configure logger for this module
15
- logger = logging.getLogger(__name__)
16
-
17
-
18
- def load_required_csv(
19
- path: Path,
20
- error_message: str,
21
- **pandas_kwargs: Any,
22
- ) -> pd.DataFrame:
23
- """Load a CSV file or exit with error message if not found.
24
-
25
- This function is designed for user-facing scripts that require specific
26
- input files. If the file doesn't exist, it prints an error message and
27
- exits the program with status code 1.
28
-
29
- Args:
30
- path: Path to the CSV file to load
31
- error_message: Error message to display if file doesn't exist
32
- **pandas_kwargs: Additional keyword arguments to pass to pd.read_csv()
33
-
34
- Returns:
35
- The loaded DataFrame
36
-
37
- Raises:
38
- SystemExit: If the file doesn't exist (exits with code 1)
39
-
40
- Example:
41
- >>> df = load_required_csv(
42
- ... Path("data/scores.csv"),
43
- ... "scores.csv not found. Run benchmark first.",
44
- ... index_col=0
45
- ... )
46
- """
47
- if not path.exists():
48
- logger.error(error_message)
49
- sys.exit(1)
50
-
51
- # read_csv returns DataFrame, but mypy doesn't infer this from **kwargs
52
- result: pd.DataFrame = pd.read_csv(path, **pandas_kwargs)
53
- return result
54
-
55
-
56
- def save_dataframe_with_logging(
57
- df: pd.DataFrame,
58
- path: Path,
59
- description: str,
60
- **pandas_kwargs: Any,
61
- ) -> None:
62
- """Save DataFrame to CSV with logging.
63
-
64
- Saves the DataFrame and prints a confirmation message indicating
65
- what was saved and where.
66
-
67
- Args:
68
- df: DataFrame to save
69
- path: Path where the CSV should be saved
70
- description: Human-readable description of what's being saved (for logging)
71
- **pandas_kwargs: Additional keyword arguments to pass to pd.to_csv()
72
-
73
- Example:
74
- >>> save_dataframe_with_logging(
75
- ... outliers_df,
76
- ... output_dir / "outliers.csv",
77
- ... "outlier pairs",
78
- ... index=False
79
- ... )
80
- # Prints: "Saved 42 outlier pairs to output/outliers.csv"
81
- """
82
- df.to_csv(path, **pandas_kwargs)
83
- logger.info(f"Saved {len(df)} {description} to {path}")
1
+ """Data I/O utilities for CSV file handling with error handling.
2
+
3
+ This module provides standardized functions for loading and saving CSV files
4
+ with consistent error handling, logging, and user feedback.
5
+ """
6
+
7
+ import logging
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import pandas as pd
13
+
14
+ # Configure logger for this module
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def load_required_csv(
19
+ path: Path,
20
+ error_message: str,
21
+ **pandas_kwargs: Any,
22
+ ) -> pd.DataFrame:
23
+ """Load a CSV file or exit with error message if not found.
24
+
25
+ This function is designed for user-facing scripts that require specific
26
+ input files. If the file doesn't exist, it prints an error message and
27
+ exits the program with status code 1.
28
+
29
+ Args:
30
+ path: Path to the CSV file to load
31
+ error_message: Error message to display if file doesn't exist
32
+ **pandas_kwargs: Additional keyword arguments to pass to pd.read_csv()
33
+
34
+ Returns:
35
+ The loaded DataFrame
36
+
37
+ Raises:
38
+ SystemExit: If the file doesn't exist (exits with code 1)
39
+
40
+ Example:
41
+ >>> df = load_required_csv(
42
+ ... Path("data/scores.csv"),
43
+ ... "scores.csv not found. Run benchmark first.",
44
+ ... index_col=0
45
+ ... )
46
+ """
47
+ if not path.exists():
48
+ logger.error(error_message)
49
+ sys.exit(1)
50
+
51
+ # read_csv returns DataFrame, but mypy doesn't infer this from **kwargs
52
+ result: pd.DataFrame = pd.read_csv(path, **pandas_kwargs)
53
+ return result
54
+
55
+
56
+ def save_dataframe_with_logging(
57
+ df: pd.DataFrame,
58
+ path: Path,
59
+ description: str,
60
+ **pandas_kwargs: Any,
61
+ ) -> None:
62
+ """Save DataFrame to CSV with logging.
63
+
64
+ Saves the DataFrame and prints a confirmation message indicating
65
+ what was saved and where.
66
+
67
+ Args:
68
+ df: DataFrame to save
69
+ path: Path where the CSV should be saved
70
+ description: Human-readable description of what's being saved (for logging)
71
+ **pandas_kwargs: Additional keyword arguments to pass to pd.to_csv()
72
+
73
+ Example:
74
+ >>> save_dataframe_with_logging(
75
+ ... outliers_df,
76
+ ... output_dir / "outliers.csv",
77
+ ... "outlier pairs",
78
+ ... index=False
79
+ ... )
80
+ # Prints: "Saved 42 outlier pairs to output/outliers.csv"
81
+ """
82
+ df.to_csv(path, **pandas_kwargs)
83
+ logger.info(f"Saved {len(df)} {description} to {path}")
utils/graph_context.py CHANGED
@@ -1,44 +1,44 @@
1
- """Global graph context for pipeline auto-registration.
2
-
3
- This module holds the active PipelineGraph instance during pipeline
4
- construction. Stages and channels auto-register with this graph when
5
- instantiated within a PipelineBuilder context.
6
-
7
- Architecture:
8
- - Breaks circular dependency between pipeline_stage, pipeline_graph, and channel
9
- - Provides clean separation of concerns (graph context vs stage implementation)
10
- - Enables auto-registration pattern without tight coupling
11
- """
12
-
13
- # FIXME: Is this thread-safe?
14
-
15
- from __future__ import annotations
16
-
17
- from .pipeline_graph import PipelineGraph
18
-
19
- # Circular dependency resolved by moving BaseChannel from channel.py to ports.py
20
- # Previous cycle: channel → graph_context → pipeline_graph → channel (via BaseChannel)
21
- # Now: pipeline_graph → ports (BaseChannel), channel → ports (BaseChannel), no cycle!
22
-
23
- # Global graph context set by PipelineBuilder context manager
24
- _active_graph: PipelineGraph | None = None
25
-
26
-
27
- def get_active_graph() -> PipelineGraph | None:
28
- """Get the currently active pipeline graph for auto-registration.
29
-
30
- Returns:
31
- The active PipelineGraph if within a PipelineBuilder context, None otherwise
32
- """
33
- return _active_graph
34
-
35
-
36
- def set_active_graph(graph: PipelineGraph | None) -> None:
37
- """Set the active pipeline graph for auto-registration.
38
-
39
- Args:
40
- graph: The PipelineGraph to set as active, or None to clear
41
- """
42
- global _active_graph # noqa: PLW0603
43
- # Library configuration pattern - global state for graph context
44
- _active_graph = graph
1
+ """Global graph context for pipeline auto-registration.
2
+
3
+ This module holds the active PipelineGraph instance during pipeline
4
+ construction. Stages and channels auto-register with this graph when
5
+ instantiated within a PipelineBuilder context.
6
+
7
+ Architecture:
8
+ - Breaks circular dependency between pipeline_stage, pipeline_graph, and channel
9
+ - Provides clean separation of concerns (graph context vs stage implementation)
10
+ - Enables auto-registration pattern without tight coupling
11
+ """
12
+
13
+ # FIXME: Is this thread-safe?
14
+
15
+ from __future__ import annotations
16
+
17
+ from .pipeline_graph import PipelineGraph
18
+
19
+ # Circular dependency resolved by moving BaseChannel from channel.py to ports.py
20
+ # Previous cycle: channel → graph_context → pipeline_graph → channel (via BaseChannel)
21
+ # Now: pipeline_graph → ports (BaseChannel), channel → ports (BaseChannel), no cycle!
22
+
23
+ # Global graph context set by PipelineBuilder context manager
24
+ _active_graph: PipelineGraph | None = None
25
+
26
+
27
+ def get_active_graph() -> PipelineGraph | None:
28
+ """Get the currently active pipeline graph for auto-registration.
29
+
30
+ Returns:
31
+ The active PipelineGraph if within a PipelineBuilder context, None otherwise
32
+ """
33
+ return _active_graph
34
+
35
+
36
+ def set_active_graph(graph: PipelineGraph | None) -> None:
37
+ """Set the active pipeline graph for auto-registration.
38
+
39
+ Args:
40
+ graph: The PipelineGraph to set as active, or None to clear
41
+ """
42
+ global _active_graph # noqa: PLW0603
43
+ # Library configuration pattern - global state for graph context
44
+ _active_graph = graph
utils/logger.py CHANGED
@@ -1,4 +1,4 @@
1
- """Logging module for photo_dedup project.
1
+ """Logging module for photo_stack_finder project.
2
2
 
3
3
  Provides centralized logging configuration that reads from CONFIG.
4
4
  """
@@ -32,7 +32,7 @@ def get_logger() -> logging.Logger:
32
32
  # Library configuration pattern - global state for logger
33
33
 
34
34
  if _logger is None:
35
- _logger = logging.getLogger("photo_dedup")
35
+ _logger = logging.getLogger("photo_stack_finder")
36
36
  _logger.setLevel(getattr(logging, CONFIG.processing.LOG_LEVEL))
37
37
 
38
38
  # Console handler with formatting
utils/models.py CHANGED
@@ -292,7 +292,7 @@ class IdenticalGroup(BaseModel):
292
292
  """Model for a group of byte-identical photos.
293
293
 
294
294
  Represents a set of photos that have identical SHA256 hashes and are verified
295
- to be byte-for-byte identical. Used in review UI for photo deduplication decisions.
295
+ to be byte-for-byte identical. Used in review UI for photo stack finding decisions.
296
296
  """
297
297
 
298
298
  group_id: str = Field(..., description="Stable group identifier (SHA256 hash of sorted photo SHA256s)")
@@ -353,7 +353,7 @@ class SequenceGroup(BaseModel):
353
353
  """Model for a group of similar photo sequences.
354
354
 
355
355
  Represents multiple photo sequences that share similar template patterns or
356
- perceptual features. Used in review UI for sequence-level deduplication decisions.
356
+ perceptual features. Used in review UI for sequence-level stack finding decisions.
357
357
  """
358
358
 
359
359
  group_id: str = Field(..., description="Stable group identifier")
utils/photo_file.py CHANGED
@@ -18,6 +18,7 @@ import pillow_heif
18
18
  from PIL import ExifTags, Image, ImageOps
19
19
 
20
20
  from .config import CONFIG
21
+ from .template_parsing import INDEX_T
21
22
 
22
23
  pillow_heif.register_heif_opener()
23
24
 
@@ -67,20 +68,7 @@ class ImageData:
67
68
  Returns:
68
69
  Aspect ratio (width/height)
69
70
  """
70
- if "aspect_ratio" not in self._photo.cache:
71
- # Need to load pixels to get dimensions
72
- raw_pixels = self._photo._load_raw_pixels()
73
- h, w = raw_pixels.shape[:2]
74
- aspect_ratio = w / h if h > 0 else 0.0
75
-
76
- # Store in PhotoFile permanently
77
- self._photo.cache["aspect_ratio"] = aspect_ratio
78
- self._photo.cache["width"] = w
79
- self._photo.cache["height"] = h
80
-
81
- return aspect_ratio
82
- # Already calculated, return cached value
83
- return cast(float, self._photo.cache["aspect_ratio"])
71
+ return self._photo.aspect_ratio
84
72
 
85
73
  def get_pixels(self) -> npt.NDArray[np.uint8]:
86
74
  """Get pixels (EXIF orientation applied).
@@ -104,14 +92,6 @@ class ImageData:
104
92
  # Load raw pixels
105
93
  raw_pixels = self._photo._load_raw_pixels()
106
94
 
107
- # Cache dimensions if not already done
108
- if "aspect_ratio" not in self._photo.cache:
109
- h, w = raw_pixels.shape[:2]
110
- aspect_ratio = w / h if h > 0 else 0.0
111
- self._photo.cache["aspect_ratio"] = aspect_ratio
112
- self._photo.cache["width"] = w
113
- self._photo.cache["height"] = h
114
-
115
95
  # Cache in context manager scope
116
96
  self._pixels = raw_pixels
117
97
  return raw_pixels
@@ -122,10 +102,7 @@ class ImageData:
122
102
  Returns:
123
103
  Width in pixels
124
104
  """
125
- # Ensure dimensions are cached
126
- if "width" not in self._photo.cache:
127
- _ = self.get_aspect_ratio() # Triggers dimension extraction
128
- return cast(int, self._photo.cache["width"])
105
+ return self._photo.width
129
106
 
130
107
  def get_height(self) -> int:
131
108
  """Get height (triggers dimension extraction if needed).
@@ -133,10 +110,7 @@ class ImageData:
133
110
  Returns:
134
111
  Height in pixels
135
112
  """
136
- # Ensure dimensions are cached
137
- if "height" not in self._photo.cache:
138
- _ = self.get_aspect_ratio() # Triggers dimension extraction
139
- return cast(int, self._photo.cache["height"])
113
+ return self._photo.height
140
114
 
141
115
  def get_normalization_rotation(self) -> int:
142
116
  """Get rotation needed to normalize photo to landscape.
@@ -148,8 +122,8 @@ class ImageData:
148
122
  return self._original_rotation
149
123
 
150
124
  # Get dimensions (may trigger dimension extraction)
151
- width = self.get_width()
152
- height = self.get_height()
125
+ width = self._photo.width
126
+ height = self._photo.height
153
127
 
154
128
  # Portrait photos need 90° CCW rotation to become landscape
155
129
  self._original_rotation = 90 if width < height else 0
@@ -184,14 +158,8 @@ class ImageData:
184
158
  # Apply rotation using numpy
185
159
  if rotation == 0:
186
160
  rotated_pixels = original_pixels
187
- elif rotation == 90:
188
- rotated_pixels = np.rot90(original_pixels, k=1) # 90° CCW
189
- elif rotation == 180:
190
- rotated_pixels = np.rot90(original_pixels, k=2) # 180°
191
- elif rotation == 270:
192
- rotated_pixels = np.rot90(original_pixels, k=3) # 270° CCW (= 90° CW)
193
161
  else:
194
- raise ValueError(f"Invalid rotation angle: {rotation}. Must be 0, 90, 180, or 270.")
162
+ rotated_pixels = np.rot90(original_pixels, k=rotation // 90) # 90° CCW
195
163
 
196
164
  # Cache and return
197
165
  self._pixels_cache[rotation] = rotated_pixels
@@ -277,52 +245,43 @@ class PhotoFile:
277
245
  mime: str,
278
246
  size_bytes: int,
279
247
  file_id: int,
248
+ width: int,
249
+ height: int,
250
+ template: str | None = None,
251
+ template_index: INDEX_T | None = None,
280
252
  ):
281
- """Create a PhotoFile record with core properties.
253
+ """Create a PhotoFile record with metadata extracted from file.
282
254
 
283
- Core properties computed during tree walk: path, mime, size_bytes.
284
- All other properties (pixels, dimensions, EXIF) are computed lazily.
255
+ All metadata is provided by the caller (typically ComputeShaBins.stage_worker())
256
+ which performs the file I/O. PhotoFile.__init__ never opens files - it's a
257
+ pure data container.
285
258
 
286
259
  Args:
287
260
  path: Path to the photo file (None for anonymized test fixtures)
288
261
  mime: MIME type
289
262
  size_bytes: File size in bytes
290
263
  file_id: Unique identifier
264
+ width: Image width with EXIF orientation applied
265
+ height: Image height with EXIF orientation applied
266
+ template: Optional template pattern for this file (for test mocking)
267
+ template_index: Optional template index tuple (for test mocking)
291
268
  """
292
269
  self.id: int = file_id
293
270
  self.path: Path | None = path
294
271
  self.mime: str = mime
295
272
  self.size_bytes: int = size_bytes
273
+ self.width: int = width
274
+ self.height: int = height
296
275
 
297
- # Cache for lazy-loaded values (pixels, dimensions, EXIF, method preparations)
298
- self.cache: dict[str | tuple[str, int], Any] = {}
299
-
300
- @property
301
- def pixels(self) -> int:
302
- """Get pixel count (lazy-loaded and cached).
303
-
304
- Computes width * height on first access by opening the image.
305
- Cached for subsequent accesses.
276
+ self.pixels: int = self.width * self.height
277
+ self.aspect_ratio: float = self.width / self.height if self.height != 0 else 0.0
306
278
 
307
- Returns:
308
- Total pixel count (width * height, rotation-invariant)
309
- """
310
- if "pixels" not in self.cache:
311
- self.cache["pixels"] = self._compute_pixels()
312
- result: int = self.cache["pixels"]
313
- return result
314
-
315
- def _compute_pixels(self) -> int:
316
- """Compute pixel count by opening the image.
279
+ # Cache for lazy-loaded values (EXIF, method preparations, similarity scores)
280
+ self.cache: dict[str | tuple[str, int], Any] = {}
317
281
 
318
- Returns:
319
- Total pixels (width * height)
320
- """
321
- assert self.path is not None, f"Photo {self.id} has None path - cannot compute pixels"
322
- with Image.open(self.path) as img:
323
- width: int = img.width
324
- height: int = img.height
325
- return width * height
282
+ # Store template in cache if provided (for tests)
283
+ if template is not None and template_index is not None:
284
+ self.cache["TEMPLATE"] = (template, template_index)
326
285
 
327
286
  @contextmanager
328
287
  def image_data(self) -> Iterator[ImageData]:
@@ -341,23 +300,13 @@ class PhotoFile:
341
300
  AssertionError: If path is None and dimension values not in cache
342
301
 
343
302
  Example:
344
- >>> with photo.image_data() as img:
303
+ >>> with self.image_data() as img:
345
304
  ... # No pixels loaded yet
346
- ... if img.get_aspect_ratio() < 0.5:
305
+ ... if img._photo.aspect_ratio < 0.5:
347
306
  ... return # Early exit, pixels never loaded
348
307
  ... # Only load pixels if needed
349
308
  ... pixels = img.get_pixels()
350
309
  """
351
- # For test fixtures: Allow path=None if dimension values are pre-populated
352
- if self.path is None:
353
- required_keys = {"aspect_ratio", "width", "height"}
354
- cache_str_keys = {k for k in self.cache if isinstance(k, str)}
355
- missing_keys = required_keys - cache_str_keys
356
- assert not missing_keys, (
357
- f"Cannot get image data for photo {self.id}: path is None. "
358
- f"Test fixtures must pre-populate: {missing_keys}"
359
- )
360
-
361
310
  # Create lazy accessor
362
311
  data = ImageData(self)
363
312
 
@@ -388,19 +337,57 @@ class PhotoFile:
388
337
  The tuple is designed for use with min() to pick the "best" photo:
389
338
  - Prefer higher pixel count (negated)
390
339
  - Prefer larger file size (negated)
391
- - Use path as tiebreaker
340
+ - Use template as tiebreaker (keeps sequences together)
392
341
  - Use ID as final tiebreaker
393
342
 
394
343
  Returns:
395
- Tuple of (-pixels, -size_bytes, path_str, id)
344
+ Tuple of (-pixels, -size_bytes, template, id)
345
+
346
+ """
347
+ return -self.pixels, -self.size_bytes, self.template, self.id
348
+
349
+ @property
350
+ def template(self) -> str:
351
+ """Get template pattern for this photo's filename.
352
+
353
+ Template is extracted during SHA binning stage and cached.
354
+ Before SHA stage, falls back to the full path string.
355
+
356
+ Returns:
357
+ Template string (e.g., "IMG_{P0}.jpg" for "IMG_1234.jpg")
396
358
 
359
+ Example:
360
+ >>> photo.template # After SHA stage
361
+ "IMG_{P0}.jpg"
397
362
  """
398
- return -self.pixels, -self.size_bytes, str(self.path), self.id
363
+ if "TEMPLATE" in self.cache:
364
+ template_str: str = cast(str, self.cache["TEMPLATE"][0])
365
+ return template_str
366
+ return str(self.path) # Fallback before SHA stage
367
+
368
+ @property
369
+ def template_index(self) -> INDEX_T:
370
+ """Get digit sequence index for this photo's filename.
371
+
372
+ Returns:
373
+ Tuple of digit sequences (e.g., ("1234",) for "IMG_1234.jpg")
374
+ Empty tuple if template not yet cached.
375
+
376
+ Example:
377
+ >>> photo.template_index # After SHA stage
378
+ ("1234",)
379
+ """
380
+ if "TEMPLATE" in self.cache:
381
+ index: INDEX_T = cast(INDEX_T, self.cache["TEMPLATE"][1])
382
+ return index
383
+ return () # Fallback before SHA stage
399
384
 
400
385
  # === Lazy-loading properties for metadata ===
401
386
 
402
387
  @property
403
- def exif_data(self) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
388
+ def exif_data(
389
+ self,
390
+ ) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
404
391
  """Get EXIF data (lazy-loaded and cached).
405
392
 
406
393
  Returns:
@@ -412,7 +399,9 @@ class PhotoFile:
412
399
  return result
413
400
 
414
401
  @property
415
- def image_properties(self) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
402
+ def image_properties(
403
+ self,
404
+ ) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
416
405
  """Get PIL Image properties (format/mode/size lazy-loaded).
417
406
 
418
407
  Note: Width, height, and aspect_ratio are no longer included here.
@@ -427,7 +416,9 @@ class PhotoFile:
427
416
  return cast(dict[str, Any], self.cache["image_props"])
428
417
 
429
418
  @property
430
- def google_metadata(self) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
419
+ def google_metadata(
420
+ self,
421
+ ) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
431
422
  """Get Google Photos sidecar metadata (lazy-loaded and cached).
432
423
 
433
424
  Returns:
@@ -441,7 +432,9 @@ class PhotoFile:
441
432
  return result
442
433
 
443
434
  @property
444
- def xmp_metadata(self) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
435
+ def xmp_metadata(
436
+ self,
437
+ ) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
445
438
  """Get XMP sidecar metadata (lazy-loaded and cached).
446
439
 
447
440
  Returns:
@@ -455,7 +448,9 @@ class PhotoFile:
455
448
  return result
456
449
 
457
450
  @property
458
- def supplemental_metadata(self) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
451
+ def supplemental_metadata(
452
+ self,
453
+ ) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
459
454
  """Get supplemental metadata (lazy-loaded and cached).
460
455
 
461
456
  Returns:
@@ -470,7 +465,9 @@ class PhotoFile:
470
465
 
471
466
  # === Internal metadata loaders ===
472
467
 
473
- def _load_exif(self) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
468
+ def _load_exif(
469
+ self,
470
+ ) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
474
471
  """Load EXIF data from the image.
475
472
 
476
473
  Returns:
@@ -498,7 +495,9 @@ class PhotoFile:
498
495
 
499
496
  return result
500
497
 
501
- def _load_image_format(self) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
498
+ def _load_image_format(
499
+ self,
500
+ ) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
502
501
  """Load PIL Image format and mode (lightweight properties).
503
502
 
504
503
  Note: Width, height, aspect_ratio are no longer eager properties.