photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +2 -2
- orchestrator/app.py +6 -11
- orchestrator/build_pipeline.py +19 -21
- orchestrator/orchestrator_runner.py +11 -8
- orchestrator/pipeline_builder.py +126 -126
- orchestrator/pipeline_orchestrator.py +604 -604
- orchestrator/review_persistence.py +162 -162
- orchestrator/static/orchestrator.css +76 -76
- orchestrator/static/orchestrator.html +11 -5
- orchestrator/static/orchestrator.js +3 -1
- overlap_metrics/__init__.py +1 -1
- overlap_metrics/config.py +135 -135
- overlap_metrics/core.py +284 -284
- overlap_metrics/estimators.py +292 -292
- overlap_metrics/metrics.py +307 -307
- overlap_metrics/registry.py +99 -99
- overlap_metrics/utils.py +104 -104
- photo_compare/__init__.py +1 -1
- photo_compare/base.py +285 -285
- photo_compare/config.py +225 -225
- photo_compare/distance.py +15 -15
- photo_compare/feature_methods.py +173 -173
- photo_compare/file_hash.py +29 -29
- photo_compare/hash_methods.py +99 -99
- photo_compare/histogram_methods.py +118 -118
- photo_compare/pixel_methods.py +58 -58
- photo_compare/structural_methods.py +104 -104
- photo_compare/types.py +28 -28
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
- photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
- scripts/orchestrate.py +12 -10
- utils/__init__.py +4 -3
- utils/base_pipeline_stage.py +171 -171
- utils/base_ports.py +176 -176
- utils/benchmark_utils.py +823 -823
- utils/channel.py +74 -74
- utils/comparison_gates.py +40 -21
- utils/compute_benchmarks.py +355 -355
- utils/compute_identical.py +94 -24
- utils/compute_indices.py +235 -235
- utils/compute_perceptual_hash.py +127 -127
- utils/compute_perceptual_match.py +240 -240
- utils/compute_sha_bins.py +64 -20
- utils/compute_template_similarity.py +1 -1
- utils/compute_versions.py +483 -483
- utils/config.py +8 -5
- utils/data_io.py +83 -83
- utils/graph_context.py +44 -44
- utils/logger.py +2 -2
- utils/models.py +2 -2
- utils/photo_file.py +90 -91
- utils/pipeline_graph.py +334 -334
- utils/pipeline_stage.py +408 -408
- utils/plot_helpers.py +123 -123
- utils/ports.py +136 -136
- utils/progress.py +415 -415
- utils/report_builder.py +139 -139
- utils/review_types.py +55 -55
- utils/review_utils.py +10 -19
- utils/sequence.py +10 -8
- utils/sequence_clustering.py +1 -1
- utils/template.py +57 -57
- utils/template_parsing.py +71 -0
- photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
utils/config.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Global configuration for
|
|
1
|
+
"""Global configuration for photo_stack_finder project."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
@@ -22,6 +22,9 @@ class ProcessingConfig:
|
|
|
22
22
|
# Random seeds for reproducibility
|
|
23
23
|
DEFAULT_RANDOM_SEED: int = 12345
|
|
24
24
|
|
|
25
|
+
# Byte-identical detection (skip to trust SHA256 uniqueness)
|
|
26
|
+
SKIP_BYTE_IDENTICAL: bool = True
|
|
27
|
+
|
|
25
28
|
# Sequential gates configuration (order matters)
|
|
26
29
|
# Gates are executed in order, short-circuiting on first failure
|
|
27
30
|
# "aspect_ratio" is special gate, others are photo_compare methods
|
|
@@ -76,7 +79,7 @@ class ProcessingConfig:
|
|
|
76
79
|
TARGET_FPR: float = 0.00075 # Target false positive rate for benchmark thresholds
|
|
77
80
|
|
|
78
81
|
# Logging configuration
|
|
79
|
-
LOG_LEVEL: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL
|
|
82
|
+
LOG_LEVEL: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL # DEBUG, INFO, WARNING, ERROR, CRITICAL
|
|
80
83
|
|
|
81
84
|
|
|
82
85
|
@dataclass
|
|
@@ -105,7 +108,7 @@ class PathsConfig:
|
|
|
105
108
|
|
|
106
109
|
SOURCE_DIR: str = ""
|
|
107
110
|
WORK_DIR: str = ""
|
|
108
|
-
WORK_DIR_NAME: str = "
|
|
111
|
+
WORK_DIR_NAME: str = "photo_stack_finder"
|
|
109
112
|
|
|
110
113
|
# ALL filenames are now consistently abstracted as configurable constants
|
|
111
114
|
# Pipeline data files
|
|
@@ -316,7 +319,7 @@ class OrchestratorConfig:
|
|
|
316
319
|
"""Configuration for orchestrator server and UI."""
|
|
317
320
|
|
|
318
321
|
# Directory naming
|
|
319
|
-
DEFAULT_WORK_DIR_NAME: str = "
|
|
322
|
+
DEFAULT_WORK_DIR_NAME: str = "photo_stack_finder" # Default work directory name (relative to source parent)
|
|
320
323
|
|
|
321
324
|
# Static asset paths (computed relative to package)
|
|
322
325
|
# These are set dynamically and should not be overridden
|
|
@@ -344,7 +347,7 @@ ARG_CONFIG_MAP: list[tuple[str, str, Any, bool]] = [
|
|
|
344
347
|
|
|
345
348
|
@dataclass
|
|
346
349
|
class Config:
|
|
347
|
-
"""Global configuration for
|
|
350
|
+
"""Global configuration for photo_stack_finder."""
|
|
348
351
|
|
|
349
352
|
processing: ProcessingConfig
|
|
350
353
|
sequences: SequenceConfig
|
utils/data_io.py
CHANGED
|
@@ -1,83 +1,83 @@
|
|
|
1
|
-
"""Data I/O utilities for CSV file handling with error handling.
|
|
2
|
-
|
|
3
|
-
This module provides standardized functions for loading and saving CSV files
|
|
4
|
-
with consistent error handling, logging, and user feedback.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import logging
|
|
8
|
-
import sys
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import Any
|
|
11
|
-
|
|
12
|
-
import pandas as pd
|
|
13
|
-
|
|
14
|
-
# Configure logger for this module
|
|
15
|
-
logger = logging.getLogger(__name__)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def load_required_csv(
|
|
19
|
-
path: Path,
|
|
20
|
-
error_message: str,
|
|
21
|
-
**pandas_kwargs: Any,
|
|
22
|
-
) -> pd.DataFrame:
|
|
23
|
-
"""Load a CSV file or exit with error message if not found.
|
|
24
|
-
|
|
25
|
-
This function is designed for user-facing scripts that require specific
|
|
26
|
-
input files. If the file doesn't exist, it prints an error message and
|
|
27
|
-
exits the program with status code 1.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
path: Path to the CSV file to load
|
|
31
|
-
error_message: Error message to display if file doesn't exist
|
|
32
|
-
**pandas_kwargs: Additional keyword arguments to pass to pd.read_csv()
|
|
33
|
-
|
|
34
|
-
Returns:
|
|
35
|
-
The loaded DataFrame
|
|
36
|
-
|
|
37
|
-
Raises:
|
|
38
|
-
SystemExit: If the file doesn't exist (exits with code 1)
|
|
39
|
-
|
|
40
|
-
Example:
|
|
41
|
-
>>> df = load_required_csv(
|
|
42
|
-
... Path("data/scores.csv"),
|
|
43
|
-
... "scores.csv not found. Run benchmark first.",
|
|
44
|
-
... index_col=0
|
|
45
|
-
... )
|
|
46
|
-
"""
|
|
47
|
-
if not path.exists():
|
|
48
|
-
logger.error(error_message)
|
|
49
|
-
sys.exit(1)
|
|
50
|
-
|
|
51
|
-
# read_csv returns DataFrame, but mypy doesn't infer this from **kwargs
|
|
52
|
-
result: pd.DataFrame = pd.read_csv(path, **pandas_kwargs)
|
|
53
|
-
return result
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def save_dataframe_with_logging(
|
|
57
|
-
df: pd.DataFrame,
|
|
58
|
-
path: Path,
|
|
59
|
-
description: str,
|
|
60
|
-
**pandas_kwargs: Any,
|
|
61
|
-
) -> None:
|
|
62
|
-
"""Save DataFrame to CSV with logging.
|
|
63
|
-
|
|
64
|
-
Saves the DataFrame and prints a confirmation message indicating
|
|
65
|
-
what was saved and where.
|
|
66
|
-
|
|
67
|
-
Args:
|
|
68
|
-
df: DataFrame to save
|
|
69
|
-
path: Path where the CSV should be saved
|
|
70
|
-
description: Human-readable description of what's being saved (for logging)
|
|
71
|
-
**pandas_kwargs: Additional keyword arguments to pass to pd.to_csv()
|
|
72
|
-
|
|
73
|
-
Example:
|
|
74
|
-
>>> save_dataframe_with_logging(
|
|
75
|
-
... outliers_df,
|
|
76
|
-
... output_dir / "outliers.csv",
|
|
77
|
-
... "outlier pairs",
|
|
78
|
-
... index=False
|
|
79
|
-
... )
|
|
80
|
-
# Prints: "Saved 42 outlier pairs to output/outliers.csv"
|
|
81
|
-
"""
|
|
82
|
-
df.to_csv(path, **pandas_kwargs)
|
|
83
|
-
logger.info(f"Saved {len(df)} {description} to {path}")
|
|
1
|
+
"""Data I/O utilities for CSV file handling with error handling.
|
|
2
|
+
|
|
3
|
+
This module provides standardized functions for loading and saving CSV files
|
|
4
|
+
with consistent error handling, logging, and user feedback.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
# Configure logger for this module
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load_required_csv(
|
|
19
|
+
path: Path,
|
|
20
|
+
error_message: str,
|
|
21
|
+
**pandas_kwargs: Any,
|
|
22
|
+
) -> pd.DataFrame:
|
|
23
|
+
"""Load a CSV file or exit with error message if not found.
|
|
24
|
+
|
|
25
|
+
This function is designed for user-facing scripts that require specific
|
|
26
|
+
input files. If the file doesn't exist, it prints an error message and
|
|
27
|
+
exits the program with status code 1.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
path: Path to the CSV file to load
|
|
31
|
+
error_message: Error message to display if file doesn't exist
|
|
32
|
+
**pandas_kwargs: Additional keyword arguments to pass to pd.read_csv()
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
The loaded DataFrame
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
SystemExit: If the file doesn't exist (exits with code 1)
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
>>> df = load_required_csv(
|
|
42
|
+
... Path("data/scores.csv"),
|
|
43
|
+
... "scores.csv not found. Run benchmark first.",
|
|
44
|
+
... index_col=0
|
|
45
|
+
... )
|
|
46
|
+
"""
|
|
47
|
+
if not path.exists():
|
|
48
|
+
logger.error(error_message)
|
|
49
|
+
sys.exit(1)
|
|
50
|
+
|
|
51
|
+
# read_csv returns DataFrame, but mypy doesn't infer this from **kwargs
|
|
52
|
+
result: pd.DataFrame = pd.read_csv(path, **pandas_kwargs)
|
|
53
|
+
return result
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def save_dataframe_with_logging(
|
|
57
|
+
df: pd.DataFrame,
|
|
58
|
+
path: Path,
|
|
59
|
+
description: str,
|
|
60
|
+
**pandas_kwargs: Any,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Save DataFrame to CSV with logging.
|
|
63
|
+
|
|
64
|
+
Saves the DataFrame and prints a confirmation message indicating
|
|
65
|
+
what was saved and where.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
df: DataFrame to save
|
|
69
|
+
path: Path where the CSV should be saved
|
|
70
|
+
description: Human-readable description of what's being saved (for logging)
|
|
71
|
+
**pandas_kwargs: Additional keyword arguments to pass to pd.to_csv()
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
>>> save_dataframe_with_logging(
|
|
75
|
+
... outliers_df,
|
|
76
|
+
... output_dir / "outliers.csv",
|
|
77
|
+
... "outlier pairs",
|
|
78
|
+
... index=False
|
|
79
|
+
... )
|
|
80
|
+
# Prints: "Saved 42 outlier pairs to output/outliers.csv"
|
|
81
|
+
"""
|
|
82
|
+
df.to_csv(path, **pandas_kwargs)
|
|
83
|
+
logger.info(f"Saved {len(df)} {description} to {path}")
|
utils/graph_context.py
CHANGED
|
@@ -1,44 +1,44 @@
|
|
|
1
|
-
"""Global graph context for pipeline auto-registration.
|
|
2
|
-
|
|
3
|
-
This module holds the active PipelineGraph instance during pipeline
|
|
4
|
-
construction. Stages and channels auto-register with this graph when
|
|
5
|
-
instantiated within a PipelineBuilder context.
|
|
6
|
-
|
|
7
|
-
Architecture:
|
|
8
|
-
- Breaks circular dependency between pipeline_stage, pipeline_graph, and channel
|
|
9
|
-
- Provides clean separation of concerns (graph context vs stage implementation)
|
|
10
|
-
- Enables auto-registration pattern without tight coupling
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
# FIXME: Is this thread-safe?
|
|
14
|
-
|
|
15
|
-
from __future__ import annotations
|
|
16
|
-
|
|
17
|
-
from .pipeline_graph import PipelineGraph
|
|
18
|
-
|
|
19
|
-
# Circular dependency resolved by moving BaseChannel from channel.py to ports.py
|
|
20
|
-
# Previous cycle: channel → graph_context → pipeline_graph → channel (via BaseChannel)
|
|
21
|
-
# Now: pipeline_graph → ports (BaseChannel), channel → ports (BaseChannel), no cycle!
|
|
22
|
-
|
|
23
|
-
# Global graph context set by PipelineBuilder context manager
|
|
24
|
-
_active_graph: PipelineGraph | None = None
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def get_active_graph() -> PipelineGraph | None:
|
|
28
|
-
"""Get the currently active pipeline graph for auto-registration.
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
The active PipelineGraph if within a PipelineBuilder context, None otherwise
|
|
32
|
-
"""
|
|
33
|
-
return _active_graph
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def set_active_graph(graph: PipelineGraph | None) -> None:
|
|
37
|
-
"""Set the active pipeline graph for auto-registration.
|
|
38
|
-
|
|
39
|
-
Args:
|
|
40
|
-
graph: The PipelineGraph to set as active, or None to clear
|
|
41
|
-
"""
|
|
42
|
-
global _active_graph # noqa: PLW0603
|
|
43
|
-
# Library configuration pattern - global state for graph context
|
|
44
|
-
_active_graph = graph
|
|
1
|
+
"""Global graph context for pipeline auto-registration.
|
|
2
|
+
|
|
3
|
+
This module holds the active PipelineGraph instance during pipeline
|
|
4
|
+
construction. Stages and channels auto-register with this graph when
|
|
5
|
+
instantiated within a PipelineBuilder context.
|
|
6
|
+
|
|
7
|
+
Architecture:
|
|
8
|
+
- Breaks circular dependency between pipeline_stage, pipeline_graph, and channel
|
|
9
|
+
- Provides clean separation of concerns (graph context vs stage implementation)
|
|
10
|
+
- Enables auto-registration pattern without tight coupling
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# FIXME: Is this thread-safe?
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from .pipeline_graph import PipelineGraph
|
|
18
|
+
|
|
19
|
+
# Circular dependency resolved by moving BaseChannel from channel.py to ports.py
|
|
20
|
+
# Previous cycle: channel → graph_context → pipeline_graph → channel (via BaseChannel)
|
|
21
|
+
# Now: pipeline_graph → ports (BaseChannel), channel → ports (BaseChannel), no cycle!
|
|
22
|
+
|
|
23
|
+
# Global graph context set by PipelineBuilder context manager
|
|
24
|
+
_active_graph: PipelineGraph | None = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_active_graph() -> PipelineGraph | None:
|
|
28
|
+
"""Get the currently active pipeline graph for auto-registration.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
The active PipelineGraph if within a PipelineBuilder context, None otherwise
|
|
32
|
+
"""
|
|
33
|
+
return _active_graph
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def set_active_graph(graph: PipelineGraph | None) -> None:
|
|
37
|
+
"""Set the active pipeline graph for auto-registration.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
graph: The PipelineGraph to set as active, or None to clear
|
|
41
|
+
"""
|
|
42
|
+
global _active_graph # noqa: PLW0603
|
|
43
|
+
# Library configuration pattern - global state for graph context
|
|
44
|
+
_active_graph = graph
|
utils/logger.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Logging module for
|
|
1
|
+
"""Logging module for photo_stack_finder project.
|
|
2
2
|
|
|
3
3
|
Provides centralized logging configuration that reads from CONFIG.
|
|
4
4
|
"""
|
|
@@ -32,7 +32,7 @@ def get_logger() -> logging.Logger:
|
|
|
32
32
|
# Library configuration pattern - global state for logger
|
|
33
33
|
|
|
34
34
|
if _logger is None:
|
|
35
|
-
_logger = logging.getLogger("
|
|
35
|
+
_logger = logging.getLogger("photo_stack_finder")
|
|
36
36
|
_logger.setLevel(getattr(logging, CONFIG.processing.LOG_LEVEL))
|
|
37
37
|
|
|
38
38
|
# Console handler with formatting
|
utils/models.py
CHANGED
|
@@ -292,7 +292,7 @@ class IdenticalGroup(BaseModel):
|
|
|
292
292
|
"""Model for a group of byte-identical photos.
|
|
293
293
|
|
|
294
294
|
Represents a set of photos that have identical SHA256 hashes and are verified
|
|
295
|
-
to be byte-for-byte identical. Used in review UI for photo
|
|
295
|
+
to be byte-for-byte identical. Used in review UI for photo stack finding decisions.
|
|
296
296
|
"""
|
|
297
297
|
|
|
298
298
|
group_id: str = Field(..., description="Stable group identifier (SHA256 hash of sorted photo SHA256s)")
|
|
@@ -353,7 +353,7 @@ class SequenceGroup(BaseModel):
|
|
|
353
353
|
"""Model for a group of similar photo sequences.
|
|
354
354
|
|
|
355
355
|
Represents multiple photo sequences that share similar template patterns or
|
|
356
|
-
perceptual features. Used in review UI for sequence-level
|
|
356
|
+
perceptual features. Used in review UI for sequence-level stack finding decisions.
|
|
357
357
|
"""
|
|
358
358
|
|
|
359
359
|
group_id: str = Field(..., description="Stable group identifier")
|
utils/photo_file.py
CHANGED
|
@@ -18,6 +18,7 @@ import pillow_heif
|
|
|
18
18
|
from PIL import ExifTags, Image, ImageOps
|
|
19
19
|
|
|
20
20
|
from .config import CONFIG
|
|
21
|
+
from .template_parsing import INDEX_T
|
|
21
22
|
|
|
22
23
|
pillow_heif.register_heif_opener()
|
|
23
24
|
|
|
@@ -67,20 +68,7 @@ class ImageData:
|
|
|
67
68
|
Returns:
|
|
68
69
|
Aspect ratio (width/height)
|
|
69
70
|
"""
|
|
70
|
-
|
|
71
|
-
# Need to load pixels to get dimensions
|
|
72
|
-
raw_pixels = self._photo._load_raw_pixels()
|
|
73
|
-
h, w = raw_pixels.shape[:2]
|
|
74
|
-
aspect_ratio = w / h if h > 0 else 0.0
|
|
75
|
-
|
|
76
|
-
# Store in PhotoFile permanently
|
|
77
|
-
self._photo.cache["aspect_ratio"] = aspect_ratio
|
|
78
|
-
self._photo.cache["width"] = w
|
|
79
|
-
self._photo.cache["height"] = h
|
|
80
|
-
|
|
81
|
-
return aspect_ratio
|
|
82
|
-
# Already calculated, return cached value
|
|
83
|
-
return cast(float, self._photo.cache["aspect_ratio"])
|
|
71
|
+
return self._photo.aspect_ratio
|
|
84
72
|
|
|
85
73
|
def get_pixels(self) -> npt.NDArray[np.uint8]:
|
|
86
74
|
"""Get pixels (EXIF orientation applied).
|
|
@@ -104,14 +92,6 @@ class ImageData:
|
|
|
104
92
|
# Load raw pixels
|
|
105
93
|
raw_pixels = self._photo._load_raw_pixels()
|
|
106
94
|
|
|
107
|
-
# Cache dimensions if not already done
|
|
108
|
-
if "aspect_ratio" not in self._photo.cache:
|
|
109
|
-
h, w = raw_pixels.shape[:2]
|
|
110
|
-
aspect_ratio = w / h if h > 0 else 0.0
|
|
111
|
-
self._photo.cache["aspect_ratio"] = aspect_ratio
|
|
112
|
-
self._photo.cache["width"] = w
|
|
113
|
-
self._photo.cache["height"] = h
|
|
114
|
-
|
|
115
95
|
# Cache in context manager scope
|
|
116
96
|
self._pixels = raw_pixels
|
|
117
97
|
return raw_pixels
|
|
@@ -122,10 +102,7 @@ class ImageData:
|
|
|
122
102
|
Returns:
|
|
123
103
|
Width in pixels
|
|
124
104
|
"""
|
|
125
|
-
|
|
126
|
-
if "width" not in self._photo.cache:
|
|
127
|
-
_ = self.get_aspect_ratio() # Triggers dimension extraction
|
|
128
|
-
return cast(int, self._photo.cache["width"])
|
|
105
|
+
return self._photo.width
|
|
129
106
|
|
|
130
107
|
def get_height(self) -> int:
|
|
131
108
|
"""Get height (triggers dimension extraction if needed).
|
|
@@ -133,10 +110,7 @@ class ImageData:
|
|
|
133
110
|
Returns:
|
|
134
111
|
Height in pixels
|
|
135
112
|
"""
|
|
136
|
-
|
|
137
|
-
if "height" not in self._photo.cache:
|
|
138
|
-
_ = self.get_aspect_ratio() # Triggers dimension extraction
|
|
139
|
-
return cast(int, self._photo.cache["height"])
|
|
113
|
+
return self._photo.height
|
|
140
114
|
|
|
141
115
|
def get_normalization_rotation(self) -> int:
|
|
142
116
|
"""Get rotation needed to normalize photo to landscape.
|
|
@@ -148,8 +122,8 @@ class ImageData:
|
|
|
148
122
|
return self._original_rotation
|
|
149
123
|
|
|
150
124
|
# Get dimensions (may trigger dimension extraction)
|
|
151
|
-
width = self.
|
|
152
|
-
height = self.
|
|
125
|
+
width = self._photo.width
|
|
126
|
+
height = self._photo.height
|
|
153
127
|
|
|
154
128
|
# Portrait photos need 90° CCW rotation to become landscape
|
|
155
129
|
self._original_rotation = 90 if width < height else 0
|
|
@@ -184,14 +158,8 @@ class ImageData:
|
|
|
184
158
|
# Apply rotation using numpy
|
|
185
159
|
if rotation == 0:
|
|
186
160
|
rotated_pixels = original_pixels
|
|
187
|
-
elif rotation == 90:
|
|
188
|
-
rotated_pixels = np.rot90(original_pixels, k=1) # 90° CCW
|
|
189
|
-
elif rotation == 180:
|
|
190
|
-
rotated_pixels = np.rot90(original_pixels, k=2) # 180°
|
|
191
|
-
elif rotation == 270:
|
|
192
|
-
rotated_pixels = np.rot90(original_pixels, k=3) # 270° CCW (= 90° CW)
|
|
193
161
|
else:
|
|
194
|
-
|
|
162
|
+
rotated_pixels = np.rot90(original_pixels, k=rotation // 90) # 90° CCW
|
|
195
163
|
|
|
196
164
|
# Cache and return
|
|
197
165
|
self._pixels_cache[rotation] = rotated_pixels
|
|
@@ -277,52 +245,43 @@ class PhotoFile:
|
|
|
277
245
|
mime: str,
|
|
278
246
|
size_bytes: int,
|
|
279
247
|
file_id: int,
|
|
248
|
+
width: int,
|
|
249
|
+
height: int,
|
|
250
|
+
template: str | None = None,
|
|
251
|
+
template_index: INDEX_T | None = None,
|
|
280
252
|
):
|
|
281
|
-
"""Create a PhotoFile record with
|
|
253
|
+
"""Create a PhotoFile record with metadata extracted from file.
|
|
282
254
|
|
|
283
|
-
|
|
284
|
-
|
|
255
|
+
All metadata is provided by the caller (typically ComputeShaBins.stage_worker())
|
|
256
|
+
which performs the file I/O. PhotoFile.__init__ never opens files - it's a
|
|
257
|
+
pure data container.
|
|
285
258
|
|
|
286
259
|
Args:
|
|
287
260
|
path: Path to the photo file (None for anonymized test fixtures)
|
|
288
261
|
mime: MIME type
|
|
289
262
|
size_bytes: File size in bytes
|
|
290
263
|
file_id: Unique identifier
|
|
264
|
+
width: Image width with EXIF orientation applied
|
|
265
|
+
height: Image height with EXIF orientation applied
|
|
266
|
+
template: Optional template pattern for this file (for test mocking)
|
|
267
|
+
template_index: Optional template index tuple (for test mocking)
|
|
291
268
|
"""
|
|
292
269
|
self.id: int = file_id
|
|
293
270
|
self.path: Path | None = path
|
|
294
271
|
self.mime: str = mime
|
|
295
272
|
self.size_bytes: int = size_bytes
|
|
273
|
+
self.width: int = width
|
|
274
|
+
self.height: int = height
|
|
296
275
|
|
|
297
|
-
|
|
298
|
-
self.
|
|
299
|
-
|
|
300
|
-
@property
|
|
301
|
-
def pixels(self) -> int:
|
|
302
|
-
"""Get pixel count (lazy-loaded and cached).
|
|
303
|
-
|
|
304
|
-
Computes width * height on first access by opening the image.
|
|
305
|
-
Cached for subsequent accesses.
|
|
276
|
+
self.pixels: int = self.width * self.height
|
|
277
|
+
self.aspect_ratio: float = self.width / self.height if self.height != 0 else 0.0
|
|
306
278
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
"""
|
|
310
|
-
if "pixels" not in self.cache:
|
|
311
|
-
self.cache["pixels"] = self._compute_pixels()
|
|
312
|
-
result: int = self.cache["pixels"]
|
|
313
|
-
return result
|
|
314
|
-
|
|
315
|
-
def _compute_pixels(self) -> int:
|
|
316
|
-
"""Compute pixel count by opening the image.
|
|
279
|
+
# Cache for lazy-loaded values (EXIF, method preparations, similarity scores)
|
|
280
|
+
self.cache: dict[str | tuple[str, int], Any] = {}
|
|
317
281
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
assert self.path is not None, f"Photo {self.id} has None path - cannot compute pixels"
|
|
322
|
-
with Image.open(self.path) as img:
|
|
323
|
-
width: int = img.width
|
|
324
|
-
height: int = img.height
|
|
325
|
-
return width * height
|
|
282
|
+
# Store template in cache if provided (for tests)
|
|
283
|
+
if template is not None and template_index is not None:
|
|
284
|
+
self.cache["TEMPLATE"] = (template, template_index)
|
|
326
285
|
|
|
327
286
|
@contextmanager
|
|
328
287
|
def image_data(self) -> Iterator[ImageData]:
|
|
@@ -341,23 +300,13 @@ class PhotoFile:
|
|
|
341
300
|
AssertionError: If path is None and dimension values not in cache
|
|
342
301
|
|
|
343
302
|
Example:
|
|
344
|
-
>>> with
|
|
303
|
+
>>> with self.image_data() as img:
|
|
345
304
|
... # No pixels loaded yet
|
|
346
|
-
... if img.
|
|
305
|
+
... if img._photo.aspect_ratio < 0.5:
|
|
347
306
|
... return # Early exit, pixels never loaded
|
|
348
307
|
... # Only load pixels if needed
|
|
349
308
|
... pixels = img.get_pixels()
|
|
350
309
|
"""
|
|
351
|
-
# For test fixtures: Allow path=None if dimension values are pre-populated
|
|
352
|
-
if self.path is None:
|
|
353
|
-
required_keys = {"aspect_ratio", "width", "height"}
|
|
354
|
-
cache_str_keys = {k for k in self.cache if isinstance(k, str)}
|
|
355
|
-
missing_keys = required_keys - cache_str_keys
|
|
356
|
-
assert not missing_keys, (
|
|
357
|
-
f"Cannot get image data for photo {self.id}: path is None. "
|
|
358
|
-
f"Test fixtures must pre-populate: {missing_keys}"
|
|
359
|
-
)
|
|
360
|
-
|
|
361
310
|
# Create lazy accessor
|
|
362
311
|
data = ImageData(self)
|
|
363
312
|
|
|
@@ -388,19 +337,57 @@ class PhotoFile:
|
|
|
388
337
|
The tuple is designed for use with min() to pick the "best" photo:
|
|
389
338
|
- Prefer higher pixel count (negated)
|
|
390
339
|
- Prefer larger file size (negated)
|
|
391
|
-
- Use
|
|
340
|
+
- Use template as tiebreaker (keeps sequences together)
|
|
392
341
|
- Use ID as final tiebreaker
|
|
393
342
|
|
|
394
343
|
Returns:
|
|
395
|
-
Tuple of (-pixels, -size_bytes,
|
|
344
|
+
Tuple of (-pixels, -size_bytes, template, id)
|
|
345
|
+
|
|
346
|
+
"""
|
|
347
|
+
return -self.pixels, -self.size_bytes, self.template, self.id
|
|
348
|
+
|
|
349
|
+
@property
|
|
350
|
+
def template(self) -> str:
|
|
351
|
+
"""Get template pattern for this photo's filename.
|
|
352
|
+
|
|
353
|
+
Template is extracted during SHA binning stage and cached.
|
|
354
|
+
Before SHA stage, falls back to the full path string.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Template string (e.g., "IMG_{P0}.jpg" for "IMG_1234.jpg")
|
|
396
358
|
|
|
359
|
+
Example:
|
|
360
|
+
>>> photo.template # After SHA stage
|
|
361
|
+
"IMG_{P0}.jpg"
|
|
397
362
|
"""
|
|
398
|
-
|
|
363
|
+
if "TEMPLATE" in self.cache:
|
|
364
|
+
template_str: str = cast(str, self.cache["TEMPLATE"][0])
|
|
365
|
+
return template_str
|
|
366
|
+
return str(self.path) # Fallback before SHA stage
|
|
367
|
+
|
|
368
|
+
@property
|
|
369
|
+
def template_index(self) -> INDEX_T:
|
|
370
|
+
"""Get digit sequence index for this photo's filename.
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
Tuple of digit sequences (e.g., ("1234",) for "IMG_1234.jpg")
|
|
374
|
+
Empty tuple if template not yet cached.
|
|
375
|
+
|
|
376
|
+
Example:
|
|
377
|
+
>>> photo.template_index # After SHA stage
|
|
378
|
+
("1234",)
|
|
379
|
+
"""
|
|
380
|
+
if "TEMPLATE" in self.cache:
|
|
381
|
+
index: INDEX_T = cast(INDEX_T, self.cache["TEMPLATE"][1])
|
|
382
|
+
return index
|
|
383
|
+
return () # Fallback before SHA stage
|
|
399
384
|
|
|
400
385
|
# === Lazy-loading properties for metadata ===
|
|
401
386
|
|
|
402
387
|
@property
|
|
403
|
-
def exif_data(
|
|
388
|
+
def exif_data(
|
|
389
|
+
self,
|
|
390
|
+
) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
|
|
404
391
|
"""Get EXIF data (lazy-loaded and cached).
|
|
405
392
|
|
|
406
393
|
Returns:
|
|
@@ -412,7 +399,9 @@ class PhotoFile:
|
|
|
412
399
|
return result
|
|
413
400
|
|
|
414
401
|
@property
|
|
415
|
-
def image_properties(
|
|
402
|
+
def image_properties(
|
|
403
|
+
self,
|
|
404
|
+
) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
|
|
416
405
|
"""Get PIL Image properties (format/mode/size lazy-loaded).
|
|
417
406
|
|
|
418
407
|
Note: Width, height, and aspect_ratio are no longer included here.
|
|
@@ -427,7 +416,9 @@ class PhotoFile:
|
|
|
427
416
|
return cast(dict[str, Any], self.cache["image_props"])
|
|
428
417
|
|
|
429
418
|
@property
|
|
430
|
-
def google_metadata(
|
|
419
|
+
def google_metadata(
|
|
420
|
+
self,
|
|
421
|
+
) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
|
|
431
422
|
"""Get Google Photos sidecar metadata (lazy-loaded and cached).
|
|
432
423
|
|
|
433
424
|
Returns:
|
|
@@ -441,7 +432,9 @@ class PhotoFile:
|
|
|
441
432
|
return result
|
|
442
433
|
|
|
443
434
|
@property
|
|
444
|
-
def xmp_metadata(
|
|
435
|
+
def xmp_metadata(
|
|
436
|
+
self,
|
|
437
|
+
) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
|
|
445
438
|
"""Get XMP sidecar metadata (lazy-loaded and cached).
|
|
446
439
|
|
|
447
440
|
Returns:
|
|
@@ -455,7 +448,9 @@ class PhotoFile:
|
|
|
455
448
|
return result
|
|
456
449
|
|
|
457
450
|
@property
|
|
458
|
-
def supplemental_metadata(
|
|
451
|
+
def supplemental_metadata(
|
|
452
|
+
self,
|
|
453
|
+
) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
|
|
459
454
|
"""Get supplemental metadata (lazy-loaded and cached).
|
|
460
455
|
|
|
461
456
|
Returns:
|
|
@@ -470,7 +465,9 @@ class PhotoFile:
|
|
|
470
465
|
|
|
471
466
|
# === Internal metadata loaders ===
|
|
472
467
|
|
|
473
|
-
def _load_exif(
|
|
468
|
+
def _load_exif(
|
|
469
|
+
self,
|
|
470
|
+
) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
|
|
474
471
|
"""Load EXIF data from the image.
|
|
475
472
|
|
|
476
473
|
Returns:
|
|
@@ -498,7 +495,9 @@ class PhotoFile:
|
|
|
498
495
|
|
|
499
496
|
return result
|
|
500
497
|
|
|
501
|
-
def _load_image_format(
|
|
498
|
+
def _load_image_format(
|
|
499
|
+
self,
|
|
500
|
+
) -> dict[str, Any]: # pragma: no cover - Reserved for future metadata features
|
|
502
501
|
"""Load PIL Image format and mode (lightweight properties).
|
|
503
502
|
|
|
504
503
|
Note: Width, height, aspect_ratio are no longer eager properties.
|