photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/app.py +6 -11
  3. orchestrator/build_pipeline.py +19 -21
  4. orchestrator/orchestrator_runner.py +11 -8
  5. orchestrator/pipeline_builder.py +126 -126
  6. orchestrator/pipeline_orchestrator.py +604 -604
  7. orchestrator/review_persistence.py +162 -162
  8. orchestrator/static/orchestrator.css +76 -76
  9. orchestrator/static/orchestrator.html +11 -5
  10. orchestrator/static/orchestrator.js +3 -1
  11. overlap_metrics/__init__.py +1 -1
  12. overlap_metrics/config.py +135 -135
  13. overlap_metrics/core.py +284 -284
  14. overlap_metrics/estimators.py +292 -292
  15. overlap_metrics/metrics.py +307 -307
  16. overlap_metrics/registry.py +99 -99
  17. overlap_metrics/utils.py +104 -104
  18. photo_compare/__init__.py +1 -1
  19. photo_compare/base.py +285 -285
  20. photo_compare/config.py +225 -225
  21. photo_compare/distance.py +15 -15
  22. photo_compare/feature_methods.py +173 -173
  23. photo_compare/file_hash.py +29 -29
  24. photo_compare/hash_methods.py +99 -99
  25. photo_compare/histogram_methods.py +118 -118
  26. photo_compare/pixel_methods.py +58 -58
  27. photo_compare/structural_methods.py +104 -104
  28. photo_compare/types.py +28 -28
  29. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
  30. photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
  31. scripts/orchestrate.py +12 -10
  32. utils/__init__.py +4 -3
  33. utils/base_pipeline_stage.py +171 -171
  34. utils/base_ports.py +176 -176
  35. utils/benchmark_utils.py +823 -823
  36. utils/channel.py +74 -74
  37. utils/comparison_gates.py +40 -21
  38. utils/compute_benchmarks.py +355 -355
  39. utils/compute_identical.py +94 -24
  40. utils/compute_indices.py +235 -235
  41. utils/compute_perceptual_hash.py +127 -127
  42. utils/compute_perceptual_match.py +240 -240
  43. utils/compute_sha_bins.py +64 -20
  44. utils/compute_template_similarity.py +1 -1
  45. utils/compute_versions.py +483 -483
  46. utils/config.py +8 -5
  47. utils/data_io.py +83 -83
  48. utils/graph_context.py +44 -44
  49. utils/logger.py +2 -2
  50. utils/models.py +2 -2
  51. utils/photo_file.py +90 -91
  52. utils/pipeline_graph.py +334 -334
  53. utils/pipeline_stage.py +408 -408
  54. utils/plot_helpers.py +123 -123
  55. utils/ports.py +136 -136
  56. utils/progress.py +415 -415
  57. utils/report_builder.py +139 -139
  58. utils/review_types.py +55 -55
  59. utils/review_utils.py +10 -19
  60. utils/sequence.py +10 -8
  61. utils/sequence_clustering.py +1 -1
  62. utils/template.py +57 -57
  63. utils/template_parsing.py +71 -0
  64. photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
  65. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
  66. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
  67. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
  68. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
orchestrator/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
- """Orchestration layer for photo deduplication workflow.
1
+ """Orchestration layer for photo stack finding workflow.
2
2
 
3
3
  This module provides the web-based interface and pipeline orchestration for the
4
- photo deduplication system. It includes:
4
+ photo stack finding system. It includes:
5
5
 
6
6
  - FastAPI web application (app)
7
7
  - Pipeline orchestration and execution (PipelineOrchestrator)
orchestrator/app.py CHANGED
@@ -1,4 +1,4 @@
1
- """FastAPI orchestration server for photo deduplication.
1
+ """FastAPI orchestration server for photo stack finding.
2
2
 
3
3
  This provides a web-based interface for configuring and running the pipeline.
4
4
 
@@ -209,7 +209,7 @@ async def lifespan_manager(app: FastAPI) -> AsyncIterator[None]:
209
209
 
210
210
 
211
211
  # Pass the new lifespan function to the FastAPI constructor
212
- app = FastAPI(title="Photo Dedup Orchestrator", lifespan=lifespan_manager)
212
+ app = FastAPI(title="Photo Stack Finder Orchestrator", lifespan=lifespan_manager)
213
213
 
214
214
 
215
215
  @app.get("/")
@@ -876,9 +876,7 @@ async def websocket_progress(
876
876
  async def serve_review_identical() -> FileResponse:
877
877
  """Serve identical files review interface."""
878
878
  static_path = (
879
- Path(CONFIG.orchestrator.STATIC_DIR)
880
- if CONFIG.orchestrator.STATIC_DIR
881
- else Path(__file__).parent / "static"
879
+ Path(CONFIG.orchestrator.STATIC_DIR) if CONFIG.orchestrator.STATIC_DIR else Path(__file__).parent / "static"
882
880
  )
883
881
  return FileResponse(static_path / "review_identical.html")
884
882
 
@@ -887,9 +885,7 @@ async def serve_review_identical() -> FileResponse:
887
885
  async def serve_review_sequences() -> FileResponse:
888
886
  """Serve sequences review interface."""
889
887
  static_path = (
890
- Path(CONFIG.orchestrator.STATIC_DIR)
891
- if CONFIG.orchestrator.STATIC_DIR
892
- else Path(__file__).parent / "static"
888
+ Path(CONFIG.orchestrator.STATIC_DIR) if CONFIG.orchestrator.STATIC_DIR else Path(__file__).parent / "static"
893
889
  )
894
890
  return FileResponse(static_path / "review_sequences.html")
895
891
 
@@ -899,13 +895,12 @@ static_dir = Path(CONFIG.orchestrator.STATIC_DIR) if CONFIG.orchestrator.STATIC_
899
895
  if static_dir and static_dir.exists():
900
896
  app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
901
897
 
898
+
902
899
  # Serve review_common.js from static directory
903
900
  @app.get("/review_common.js")
904
901
  async def serve_review_common_js() -> FileResponse:
905
902
  """Serve review_common.js from static directory."""
906
903
  static_path = (
907
- Path(CONFIG.orchestrator.STATIC_DIR)
908
- if CONFIG.orchestrator.STATIC_DIR
909
- else Path(__file__).parent / "static"
904
+ Path(CONFIG.orchestrator.STATIC_DIR) if CONFIG.orchestrator.STATIC_DIR else Path(__file__).parent / "static"
910
905
  )
911
906
  return FileResponse(static_path / "review_common.js", media_type="application/javascript")
@@ -1,6 +1,6 @@
1
1
  """Pipeline construction using PipelineBuilder pattern.
2
2
 
3
- This module constructs the complete photo deduplication pipeline using the
3
+ This module constructs the complete photo stack finding pipeline using the
4
4
  new port-based orchestration system. All 8 stages are wired together via
5
5
  Channel connections based on their InputPort/OutputPort declarations.
6
6
  """
@@ -19,7 +19,6 @@ from utils import (
19
19
  ComputePerceptualHash,
20
20
  ComputePerceptualMatch,
21
21
  ComputeShaBins,
22
- ComputeTemplates,
23
22
  ComputeTemplateSimilarity,
24
23
  ComputeVersions,
25
24
  InputPort,
@@ -31,18 +30,21 @@ from .pipeline_orchestrator import PipelineOrchestrator
31
30
 
32
31
 
33
32
  def build_pipeline(source_dir: Path) -> PipelineOrchestrator:
34
- """Build the complete photo deduplication pipeline.
35
-
36
- Constructs a pipeline graph with 8-9 stages connected via ports:
37
- 1. ComputeShaBins - Hash files and bin by SHA256
38
- 2. ComputeIdentical - Find byte-identical duplicates
39
- 3. ComputeTemplates - Bin photos by filename template
40
- 4. ComputeVersions - Detect version patterns in filenames
41
- 5. ComputeTemplateSimilarity - Match photos with similar templates
42
- 6. ComputeIndices - Find sequences with overlapping indices
43
- 7. ComputePerceptualHash - Compute perceptual hashes and bin
44
- 8. ComputePerceptualMatch - Match photos by perceptual hash similarity
45
- 9. ComputeBenchmarks - (Optional, controlled by CONFIG.benchmark.ENABLED)
33
+ """Build the complete photo stack finding pipeline.
34
+
35
+ Constructs a pipeline graph with 7-8 stages connected via ports:
36
+ 1. ComputeShaBins - Hash files, bin by SHA256, extract templates
37
+ 2. ComputeIdentical - Find byte-identical duplicates, output template bins
38
+ 3. ComputeVersions - Detect version patterns in filenames
39
+ 4. ComputeTemplateSimilarity - Match photos with similar templates
40
+ 5. ComputeIndices - Find sequences with overlapping indices
41
+ 6. ComputePerceptualHash - Compute perceptual hashes and bin
42
+ 7. ComputePerceptualMatch - Match photos by perceptual hash similarity
43
+ 8. ComputeBenchmarks - (Optional, controlled by CONFIG.benchmark.ENABLED)
44
+
45
+ Note: ComputeTemplates stage has been merged into ComputeIdentical.
46
+ Template extraction now happens during SHA binning in ComputeShaBins,
47
+ and template binning happens in ComputeIdentical's finalise() method.
46
48
 
47
49
  Args:
48
50
  source_dir: Root directory containing photos to process
@@ -60,17 +62,13 @@ def build_pipeline(source_dir: Path) -> PipelineOrchestrator:
60
62
  # SHA256 Hashing and Binning
61
63
  sha_bins_stage = ComputeShaBins(source_path=source_dir)
62
64
 
63
- # Identical Files Detection
65
+ # Identical Files Detection (outputs template bins)
64
66
  identical_stage = ComputeIdentical()
65
67
  Channel(sha_bins_stage.sha_bins_o, identical_stage.sha_bins_i)
66
68
 
67
- # Template Binning
68
- templates_stage = ComputeTemplates()
69
- Channel(identical_stage.nonidentical_o, templates_stage.nonidentical_photos_i)
70
-
71
- # Version Detection
69
+ # Version Detection (receives template bins directly from ComputeIdentical)
72
70
  versions_stage = ComputeVersions()
73
- Channel(templates_stage.template_bins_o, versions_stage.template_bins_i)
71
+ Channel(identical_stage.nonidentical_o, versions_stage.template_bins_i)
74
72
 
75
73
  # Template Similarity
76
74
  template_similarity_stage = ComputeTemplateSimilarity()
@@ -68,25 +68,25 @@ def get_os_subdir() -> str:
68
68
 
69
69
  Returns:
70
70
  OS-specific subdirectory name:
71
- - 'window' for Windows
71
+ - 'windows' for Windows
72
72
  - 'linux' for Linux
73
73
  - 'darwin' for macOS
74
74
  - Platform name for others
75
75
 
76
76
  Example:
77
- work_dir = source_parent / "photo_dedup" / get_os_subdir()
78
- # Windows: .../photo_dedup/window/
79
- # Linux: .../photo_dedup/linux/
80
- # macOS: .../photo_dedup/darwin/
77
+ work_dir = source_parent / "photo_stack_finder" / get_os_subdir()
78
+ # Windows: .../photo_stack_finder/windows/
79
+ # Linux: .../photo_stack_finder/linux/
80
+ # macOS: .../photo_stack_finder/darwin/
81
81
  """
82
82
  platform = sys.platform
83
83
  if platform.startswith("win"):
84
- return "window"
84
+ return "windows"
85
85
  if platform.startswith("linux"):
86
86
  return "linux"
87
87
  if platform.startswith("darwin"):
88
88
  return "darwin"
89
- return platform # Fallback for other platforms
89
+ return platform # Fallback for other platforms # Fallback for other platforms
90
90
 
91
91
 
92
92
  @dataclass
@@ -468,7 +468,7 @@ class OrchestratorRunner:
468
468
  # Default work_dir if not provided: OS-specific subdirectory
469
469
  source_path = Path(CONFIG.paths.SOURCE_DIR)
470
470
  os_subdir = get_os_subdir()
471
- CONFIG.paths.WORK_DIR = str(source_path.parent / "photo_dedup" / os_subdir)
471
+ CONFIG.paths.WORK_DIR = str(source_path.parent / "photo_stack_finder" / os_subdir)
472
472
 
473
473
  # Create work directory if it doesn't exist
474
474
  CONFIG.paths.work_dir.mkdir(parents=True, exist_ok=True)
@@ -483,6 +483,9 @@ class OrchestratorRunner:
483
483
  if "debug_mode" in config:
484
484
  CONFIG.processing.DEBUG_MODE = config["debug_mode"]
485
485
 
486
+ if "skip_byte_identical" in config:
487
+ CONFIG.processing.SKIP_BYTE_IDENTICAL = config["skip_byte_identical"]
488
+
486
489
  # Update gate thresholds if provided
487
490
  if config.get("gate_thresholds"):
488
491
  assert CONFIG.processing.GATE_THRESHOLDS is not None
@@ -1,126 +1,126 @@
1
- """Pipeline builder with automatic stage and channel registration.
2
-
3
- Provides a context manager that enables declarative pipeline construction:
4
- - Stages created within the context auto-register with the graph
5
- - Channels created within the context auto-register edges
6
- - Graph validation happens automatically on context exit
7
- - Execution order is computed automatically
8
- - PipelineOrchestrator is created ready to execute
9
-
10
- Usage:
11
- with PipelineBuilder() as builder:
12
- # Stages and channels auto-register during construction
13
- stage1 = MyStage(path1, "stage1")
14
- stage2 = MyStage(path2, "stage2")
15
- Channel(stage1.output_o, stage2.input_i)
16
-
17
- # After context exit, builder.orchestrator is ready
18
- builder.orchestrator.execute()
19
-
20
- Architecture:
21
- - Uses graph_context module for auto-registration
22
- - Context manager pattern provides explicit scoping
23
- - Validates graph structure on exit (fail-fast on errors)
24
- - Cleans up registration state even on exceptions
25
- """
26
-
27
- from __future__ import annotations
28
-
29
- from typing import Any, Literal
30
-
31
- from utils import graph_context
32
- from utils.pipeline_graph import PipelineGraph
33
-
34
- from .pipeline_orchestrator import PipelineOrchestrator
35
-
36
-
37
- class PipelineBuilder:
38
- """Context manager for building pipeline graphs with auto-registration.
39
-
40
- Creates a scoped context where:
41
- - PipelineStage instances automatically register with the graph
42
- - Channel instances automatically register edges
43
- - Graph validation happens on context exit
44
- - Orchestrator is created and ready to execute
45
-
46
- The builder uses the graph_context module to enable auto-registration.
47
- This provides explicit scoping through the context manager while avoiding
48
- manual registration calls.
49
-
50
- Attributes:
51
- graph: The PipelineGraph being constructed
52
- orchestrator: The PipelineOrchestrator created on successful exit
53
- (None until __exit__ completes successfully)
54
- """
55
-
56
- def __init__(self) -> None:
57
- """Initialize builder with empty graph.
58
-
59
- The orchestrator is not created until __exit__ succeeds.
60
- """
61
- self.graph = PipelineGraph()
62
- self.orchestrator: PipelineOrchestrator | None = None
63
-
64
- def __enter__(self) -> PipelineBuilder:
65
- """Enter context - enable auto-registration.
66
-
67
- Sets the active graph context to enable stages and channels to
68
- auto-register during construction.
69
-
70
- Returns:
71
- self for use in 'with' statement
72
- """
73
- graph_context.set_active_graph(self.graph)
74
- return self
75
-
76
- def __exit__(
77
- self,
78
- exc_type: type[BaseException] | None,
79
- exc_val: BaseException | None,
80
- exc_tb: Any,
81
- ) -> Literal[False]:
82
- """Exit context - validate graph and create orchestrator.
83
-
84
- Performs final graph validation and setup:
85
- 1. If no exception occurred during construction:
86
- - Validates graph structure (cycles, connectivity, ports)
87
- - Computes execution order (topological sort)
88
- - Creates PipelineOrchestrator ready to execute
89
-
90
- 2. Always clears the active graph context (stops registration)
91
-
92
- Args:
93
- exc_type: Exception type (if raised in context)
94
- exc_val: Exception value (if raised in context)
95
- exc_tb: Exception traceback (if raised in context)
96
-
97
- Returns:
98
- False (never suppresses exceptions)
99
-
100
- Raises:
101
- ValueError: If graph validation fails (cycles, disconnected components,
102
- unbound ports, etc.)
103
-
104
- Note:
105
- The orchestrator is only created if no exception occurred during
106
- construction AND validation succeeds. Check if builder.orchestrator
107
- is not None before using.
108
- """
109
- try:
110
- # Only validate and create orchestrator if construction succeeded
111
- if exc_type is None:
112
- # Validate graph structure (raises ValueError on failure)
113
- self.graph.validate()
114
-
115
- # Compute execution order (topological sort)
116
- self.graph.compute_execution_order()
117
-
118
- # Create orchestrator ready to execute
119
- self.orchestrator = PipelineOrchestrator(self.graph)
120
- finally:
121
- # Always clear the active graph context to stop registration
122
- # This ensures clean state even if validation fails or exceptions occur
123
- graph_context.set_active_graph(None)
124
-
125
- # Never suppress exceptions
126
- return False
1
+ """Pipeline builder with automatic stage and channel registration.
2
+
3
+ Provides a context manager that enables declarative pipeline construction:
4
+ - Stages created within the context auto-register with the graph
5
+ - Channels created within the context auto-register edges
6
+ - Graph validation happens automatically on context exit
7
+ - Execution order is computed automatically
8
+ - PipelineOrchestrator is created ready to execute
9
+
10
+ Usage:
11
+ with PipelineBuilder() as builder:
12
+ # Stages and channels auto-register during construction
13
+ stage1 = MyStage(path1, "stage1")
14
+ stage2 = MyStage(path2, "stage2")
15
+ Channel(stage1.output_o, stage2.input_i)
16
+
17
+ # After context exit, builder.orchestrator is ready
18
+ builder.orchestrator.execute()
19
+
20
+ Architecture:
21
+ - Uses graph_context module for auto-registration
22
+ - Context manager pattern provides explicit scoping
23
+ - Validates graph structure on exit (fail-fast on errors)
24
+ - Cleans up registration state even on exceptions
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ from typing import Any, Literal
30
+
31
+ from utils import graph_context
32
+ from utils.pipeline_graph import PipelineGraph
33
+
34
+ from .pipeline_orchestrator import PipelineOrchestrator
35
+
36
+
37
+ class PipelineBuilder:
38
+ """Context manager for building pipeline graphs with auto-registration.
39
+
40
+ Creates a scoped context where:
41
+ - PipelineStage instances automatically register with the graph
42
+ - Channel instances automatically register edges
43
+ - Graph validation happens on context exit
44
+ - Orchestrator is created and ready to execute
45
+
46
+ The builder uses the graph_context module to enable auto-registration.
47
+ This provides explicit scoping through the context manager while avoiding
48
+ manual registration calls.
49
+
50
+ Attributes:
51
+ graph: The PipelineGraph being constructed
52
+ orchestrator: The PipelineOrchestrator created on successful exit
53
+ (None until __exit__ completes successfully)
54
+ """
55
+
56
+ def __init__(self) -> None:
57
+ """Initialize builder with empty graph.
58
+
59
+ The orchestrator is not created until __exit__ succeeds.
60
+ """
61
+ self.graph = PipelineGraph()
62
+ self.orchestrator: PipelineOrchestrator | None = None
63
+
64
+ def __enter__(self) -> PipelineBuilder:
65
+ """Enter context - enable auto-registration.
66
+
67
+ Sets the active graph context to enable stages and channels to
68
+ auto-register during construction.
69
+
70
+ Returns:
71
+ self for use in 'with' statement
72
+ """
73
+ graph_context.set_active_graph(self.graph)
74
+ return self
75
+
76
+ def __exit__(
77
+ self,
78
+ exc_type: type[BaseException] | None,
79
+ exc_val: BaseException | None,
80
+ exc_tb: Any,
81
+ ) -> Literal[False]:
82
+ """Exit context - validate graph and create orchestrator.
83
+
84
+ Performs final graph validation and setup:
85
+ 1. If no exception occurred during construction:
86
+ - Validates graph structure (cycles, connectivity, ports)
87
+ - Computes execution order (topological sort)
88
+ - Creates PipelineOrchestrator ready to execute
89
+
90
+ 2. Always clears the active graph context (stops registration)
91
+
92
+ Args:
93
+ exc_type: Exception type (if raised in context)
94
+ exc_val: Exception value (if raised in context)
95
+ exc_tb: Exception traceback (if raised in context)
96
+
97
+ Returns:
98
+ False (never suppresses exceptions)
99
+
100
+ Raises:
101
+ ValueError: If graph validation fails (cycles, disconnected components,
102
+ unbound ports, etc.)
103
+
104
+ Note:
105
+ The orchestrator is only created if no exception occurred during
106
+ construction AND validation succeeds. Check if builder.orchestrator
107
+ is not None before using.
108
+ """
109
+ try:
110
+ # Only validate and create orchestrator if construction succeeded
111
+ if exc_type is None:
112
+ # Validate graph structure (raises ValueError on failure)
113
+ self.graph.validate()
114
+
115
+ # Compute execution order (topological sort)
116
+ self.graph.compute_execution_order()
117
+
118
+ # Create orchestrator ready to execute
119
+ self.orchestrator = PipelineOrchestrator(self.graph)
120
+ finally:
121
+ # Always clear the active graph context to stop registration
122
+ # This ensures clean state even if validation fails or exceptions occur
123
+ graph_context.set_active_graph(None)
124
+
125
+ # Never suppress exceptions
126
+ return False