photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/app.py +6 -11
  3. orchestrator/build_pipeline.py +19 -21
  4. orchestrator/orchestrator_runner.py +11 -8
  5. orchestrator/pipeline_builder.py +126 -126
  6. orchestrator/pipeline_orchestrator.py +604 -604
  7. orchestrator/review_persistence.py +162 -162
  8. orchestrator/static/orchestrator.css +76 -76
  9. orchestrator/static/orchestrator.html +11 -5
  10. orchestrator/static/orchestrator.js +3 -1
  11. overlap_metrics/__init__.py +1 -1
  12. overlap_metrics/config.py +135 -135
  13. overlap_metrics/core.py +284 -284
  14. overlap_metrics/estimators.py +292 -292
  15. overlap_metrics/metrics.py +307 -307
  16. overlap_metrics/registry.py +99 -99
  17. overlap_metrics/utils.py +104 -104
  18. photo_compare/__init__.py +1 -1
  19. photo_compare/base.py +285 -285
  20. photo_compare/config.py +225 -225
  21. photo_compare/distance.py +15 -15
  22. photo_compare/feature_methods.py +173 -173
  23. photo_compare/file_hash.py +29 -29
  24. photo_compare/hash_methods.py +99 -99
  25. photo_compare/histogram_methods.py +118 -118
  26. photo_compare/pixel_methods.py +58 -58
  27. photo_compare/structural_methods.py +104 -104
  28. photo_compare/types.py +28 -28
  29. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
  30. photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
  31. scripts/orchestrate.py +12 -10
  32. utils/__init__.py +4 -3
  33. utils/base_pipeline_stage.py +171 -171
  34. utils/base_ports.py +176 -176
  35. utils/benchmark_utils.py +823 -823
  36. utils/channel.py +74 -74
  37. utils/comparison_gates.py +40 -21
  38. utils/compute_benchmarks.py +355 -355
  39. utils/compute_identical.py +94 -24
  40. utils/compute_indices.py +235 -235
  41. utils/compute_perceptual_hash.py +127 -127
  42. utils/compute_perceptual_match.py +240 -240
  43. utils/compute_sha_bins.py +64 -20
  44. utils/compute_template_similarity.py +1 -1
  45. utils/compute_versions.py +483 -483
  46. utils/config.py +8 -5
  47. utils/data_io.py +83 -83
  48. utils/graph_context.py +44 -44
  49. utils/logger.py +2 -2
  50. utils/models.py +2 -2
  51. utils/photo_file.py +90 -91
  52. utils/pipeline_graph.py +334 -334
  53. utils/pipeline_stage.py +408 -408
  54. utils/plot_helpers.py +123 -123
  55. utils/ports.py +136 -136
  56. utils/progress.py +415 -415
  57. utils/report_builder.py +139 -139
  58. utils/review_types.py +55 -55
  59. utils/review_utils.py +10 -19
  60. utils/sequence.py +10 -8
  61. utils/sequence_clustering.py +1 -1
  62. utils/template.py +57 -57
  63. utils/template_parsing.py +71 -0
  64. photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
  65. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
  66. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
  67. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
  68. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,17 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: photo-stack-finder
3
- Version: 0.1.7
4
- Summary: Photo deduplication using perceptual hashing and sequence detection
3
+ Version: 0.1.8
4
+ Summary: Photo organization and duplicate detection using perceptual hashing and sequence detection
5
5
  Author: Geoff Barrett
6
6
  Maintainer: Geoff Barrett
7
- License: AGPL-3.0-or-later
8
- Project-URL: Homepage, https://github.com/gbarrett28/photo_dedup
9
- Project-URL: Repository, https://github.com/gbarrett28/photo_dedup
10
- Project-URL: Issues, https://github.com/gbarrett28/photo_dedup/issues
11
- Project-URL: Discussions, https://github.com/gbarrett28/photo_dedup/discussions
12
- Keywords: photo,deduplication,perceptual-hashing,image-processing
7
+ License-Expression: AGPL-3.0-or-later
8
+ Project-URL: Homepage, https://github.com/gbarrett28/photo_stack_finder
9
+ Project-URL: Repository, https://github.com/gbarrett28/photo_stack_finder
10
+ Project-URL: Issues, https://github.com/gbarrett28/photo_stack_finder/issues
11
+ Project-URL: Discussions, https://github.com/gbarrett28/photo_stack_finder/discussions
12
+ Keywords: photo,organization,duplicate-detection,perceptual-hashing,image-processing
13
13
  Classifier: Development Status :: 4 - Beta
14
14
  Classifier: Intended Audience :: End Users/Desktop
15
- Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
16
15
  Classifier: Programming Language :: Python :: 3
17
16
  Classifier: Programming Language :: Python :: 3.11
18
17
  Classifier: Programming Language :: Python :: 3.12
@@ -54,7 +53,7 @@ Requires-Dist: pytest; extra == "dev"
54
53
  Requires-Dist: pytest-cov; extra == "dev"
55
54
  Dynamic: license-file
56
55
 
57
- # Photo Dedup
56
+ # Photo Stack Finder
58
57
 
59
58
  [![License: AGPL v3](https://img.shields.io/badge/License-AGPL_v3-blue.svg)](https://www.gnu.org/licenses/agpl-3.0)
60
59
  [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
@@ -92,20 +91,20 @@ You love Google Photos' daily memories and search features. But they're **clutte
92
91
 
93
92
  **The Deployment Problem:**
94
93
 
95
- You can't easily act on the duplicates Photo Dedup finds:
94
+ You can't easily act on the duplicates Photo Stack Finder finds:
96
95
  - ❌ **Takeout → Delete All → Re-upload** loses face recognition training
97
96
  - ❌ **Not reliable enough** for automatic deletion
98
97
  - ❌ **Google doesn't support bulk delete** anyway
99
98
  - ❌ **Manual deletion** of thousands of photos is impractical
100
99
 
101
- **Photo Dedup's Real Value:**
100
+ **Photo Stack Finder's Real Value:**
102
101
 
103
102
  1. **Immediate:** Identify duplicate photo stacks in your library
104
103
  2. **Analysis:** Understand how many duplicates you have and why
105
104
  3. **Advocacy:** Generate evidence for a Google Photos enhancement request
106
105
  4. **Future-ready:** Export stack definitions if/when Google adds stacking support
107
106
 
108
- **The Solution:** Photo Dedup intelligently finds **spatial duplicates** - all photos that originate from the **same source image** using multiple techniques:
107
+ **The Solution:** Photo Stack Finder intelligently finds **spatial duplicates** - all photos that originate from the **same source image** using multiple techniques:
109
108
  - **Byte-identical detection** - Same file, different names/locations
110
109
  - **Perceptual hashing** - Different resolutions, crops, edits, or format conversions of the same photo
111
110
  - **Template similarity** - Hierarchical clustering to find versions of the same original
@@ -154,7 +153,7 @@ You can't easily act on the duplicates Photo Dedup finds:
154
153
  - JPEG + HEIC (same photo, different formats)
155
154
  - IMG_1234.jpg + IMG_1234(1).jpg (sync duplicates)
156
155
 
157
- **How Photo Dedup Helps:**
156
+ **How Photo Stack Finder Helps:**
158
157
 
159
158
  1. **Proves it's solvable:** This tool finds the stacks - Google could do it natively
160
159
  2. **Quantifies the problem:** See how many duplicates you have
@@ -197,11 +196,11 @@ See **[GOOGLE_ENHANCEMENT_REQUEST.md](GOOGLE_ENHANCEMENT_REQUEST.md)** for how t
197
196
 
198
197
  ```bash
199
198
  # Install directly from GitHub
200
- pip install git+https://github.com/gbarrett28/photo_dedup.git
199
+ pip install git+https://github.com/gbarrett28/photo_stack_finder.git
201
200
 
202
201
  # Or clone and install in development mode
203
- git clone https://github.com/gbarrett28/photo_dedup.git
204
- cd photo_dedup
202
+ git clone https://github.com/gbarrett28/photo_stack_finder.git
203
+ cd photo_stack_finder
205
204
  pip install -e .
206
205
  ```
207
206
 
@@ -209,8 +208,8 @@ pip install -e .
209
208
 
210
209
  ```bash
211
210
  # Clone the repository
212
- git clone https://github.com/gbarrett28/photo_dedup.git
213
- cd photo_dedup
211
+ git clone https://github.com/gbarrett28/photo_stack_finder.git
212
+ cd photo_stack_finder
214
213
 
215
214
  # Create virtual environment
216
215
  python -m venv .venv
@@ -227,7 +226,7 @@ pip install -e ".[dev]" # Install with development tools
227
226
 
228
227
  ```bash
229
228
  # After pip installation, simply run:
230
- photo-dedup
229
+ photo-stack-finder
231
230
 
232
231
  # Or if running from source:
233
232
  cd src/scripts
@@ -301,7 +300,7 @@ Archived implementation plans, test strategies, and status reports are in **[doc
301
300
  ## Project Structure
302
301
 
303
302
  ```
304
- photo_dedup/
303
+ photo_stack_finder/
305
304
  ├── src/
306
305
  │ ├── utils/ # Pipeline stages and utilities
307
306
  │ ├── photo_compare/ # Image comparison algorithms
@@ -392,7 +391,7 @@ This software is licensed under AGPL v3, which means:
392
391
  - ✅ **Network use requires sharing source** (even SaaS deployments)
393
392
  - 💼 **Commercial licensing available** for proprietary use
394
393
 
395
- 💼 **Commercial licensing:** Contact via [GitHub Discussions](https://github.com/gbarrett28/photo_dedup/discussions)
394
+ 💼 **Commercial licensing:** Contact via [GitHub Discussions](https://github.com/gbarrett28/photo_stack_finder/discussions)
396
395
 
397
396
  📄 **See [LICENSE](LICENSE) for full legal text**
398
397
  📖 **See [LICENSING.md](LICENSING.md) for usage examples and FAQ**
@@ -0,0 +1,75 @@
1
+ orchestrator/__init__.py,sha256=PL9kksVIKleMuDzY_XqjJSfMWXTQKbvug2D7B828UUA,1315
2
+ orchestrator/app.py,sha256=QV93njSDXRQR22fWy-QC7uxnCyyWeSCB__SzwM-sS9o,34257
3
+ orchestrator/build_pipeline.py,sha256=CrQxY1k_sWanwOB4AaKmf8N2n9N9hIICU9vDcsCB0SI,4481
4
+ orchestrator/orchestrator_runner.py,sha256=VlaXfj26BfD6QMmdravL3KB6HX_GbMsYkuy_vKK7F4s,20262
5
+ orchestrator/pipeline_builder.py,sha256=0cZRyceoWt8gUjTCdmO6Eo88NwJIH8LvYbd8LpAxkIQ,4549
6
+ orchestrator/pipeline_orchestrator.py,sha256=ouAnxPZ86vkzfsc_T1v0cL6sFcBHEluM6JarYdQUfIU,25328
7
+ orchestrator/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ orchestrator/review_persistence.py,sha256=YJh6nhvs8AzuucOw0yRm3yyKc-IrBND686zIqcfdzqw,5751
9
+ orchestrator/static/favicon.svg,sha256=TVNER-nLkPBBb9lZRaruak9jvyXyyfIKgyD2koQcyqk,622
10
+ orchestrator/static/orchestrator.css,sha256=FFo_jgobZsTp0dT-kE-WAphRslX-ube-9jLwvRRmCRU,11672
11
+ orchestrator/static/orchestrator.html,sha256=hNYOrmXEX0RcxNkhz8iP96wSmNyYL6K6SaNd1CWo_50,13963
12
+ orchestrator/static/orchestrator.js,sha256=IKFQT5gg6r9sxcD5z8NZQNJIYEtvYO_UyaSEEN3DI9o,32512
13
+ orchestrator/static/review_common.js,sha256=t8-AI4ohQbtqSC1iWvR5x0FNlwIxl8gVO2qZsI-VtnI,5875
14
+ orchestrator/static/review_identical.html,sha256=Pq8TYbDSBbajC3p_XvlIOC1lbv0eJN_TcymGUw1lDE8,31487
15
+ orchestrator/static/review_sequences.html,sha256=oEWRtcMfEvu09F3r-hw8rRfECjcE48BHtht2CdGlw3E,54681
16
+ overlap_metrics/__init__.py,sha256=9RbbN6RoFBX4JdVHBTmh2WGG2p94U8th-uoG52qztoY,11184
17
+ overlap_metrics/config.py,sha256=U7i1MbxHGWHjod-zNlx2Aptl2yJzCzTCo5_2a1LAExs,3832
18
+ overlap_metrics/core.py,sha256=LiF-Ti6l0GrwxySzwxY2xwRBjJxC0brQf1e9yS8HiRA,9372
19
+ overlap_metrics/estimators.py,sha256=9v5BeCCJph_d6JdSN6x0D9NbrukCNx71a5K8Q1oBNsk,10741
20
+ overlap_metrics/metrics.py,sha256=Rvk4DJFWM4m7NrkP22LUE38I-A_pA_ONiXM3GjJ05jY,9873
21
+ overlap_metrics/registry.py,sha256=GZOs0gNCKPwUt60tJ7td6pLZozDstDzoIlxjNaQCRSQ,3552
22
+ overlap_metrics/utils.py,sha256=IICoLUy08AnEsMLqxRG7zftlBSmRvqYiSrg2kwK2S0Q,4521
23
+ photo_compare/__init__.py,sha256=0lsL76bUIomc_kdy_AFY7irsGcBfqI8r7IO8lYYYg88,4876
24
+ photo_compare/base.py,sha256=YsLim2GMB_oLH4GgeMYfW77kiEuonUEMoIMzJ8G3hnM,9356
25
+ photo_compare/config.py,sha256=16f32Fh-yLGrZuTudqYMrb0hDWHDbkYtgDvm3sR61Zw,7556
26
+ photo_compare/distance.py,sha256=Y1gW-pl6PUqtvsRTph4HhXyg_xXmj_IlRKEk2hZ6tg8,566
27
+ photo_compare/feature_methods.py,sha256=wjplbd9Qd1oYenQsVDoGXvUnrPTqiBEDsSihP4o2WSA,6904
28
+ photo_compare/file_hash.py,sha256=5IDCQlMK8qIvZF4M_w9Wr53Q7TSfoHg79ENnVjiH1pc,977
29
+ photo_compare/hash_methods.py,sha256=mRzbvPLpnbtXKcrIDyMBb8DWgjwE220ToJUD1ed4zmc,3684
30
+ photo_compare/histogram_methods.py,sha256=G20hOiHJi_LEII5x1_ZgU-FnZ8lVyENQc-LSsZAg-MU,5018
31
+ photo_compare/pixel_methods.py,sha256=3tUZ0ZR6CqKGJg2YOn-wMEoE0LWMJ6113n1q1lyeJZE,2363
32
+ photo_compare/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ photo_compare/structural_methods.py,sha256=nimULDnzb98cShv5-MM8sumFGYnpQv-bxAF_X5NMq1o,4255
34
+ photo_compare/types.py,sha256=PyA-ydJcTJwHMtaFXpJwe73GwTBwb9I9odc90XrbkKU,971
35
+ photo_stack_finder-0.1.8.dist-info/licenses/LICENSE,sha256=bmBV3PYoSFRs600gmAehqmuaIZIVUl8kGQoZWhapVnM,1468
36
+ scripts/orchestrate.py,sha256=HOGDxrMDCtCofj3l_ryuGunJytfKKISUuGC7GcSKs50,9287
37
+ utils/__init__.py,sha256=HBnCM-7JniIigj-NctrXV9UXAlZNRMijf_GdNDzlBkY,5196
38
+ utils/base_pipeline_stage.py,sha256=xPVFt8RjNqI6f80wshICzyShiP0kQbQxNgDUnmDpE6Y,6369
39
+ utils/base_ports.py,sha256=6wHfAdvTz5oB1hLWrs4lMQB-mzsOe6T4v0JR9_aAW3A,5240
40
+ utils/benchmark_utils.py,sha256=RdlCpKdrZXFOUUc5rXQ8yL8KcgS09kZi-bU37nqvzgg,30026
41
+ utils/channel.py,sha256=f2njTAs6bCtYNui31yIrXrtNN1CK0E86M7wUJiTP768,2586
42
+ utils/comparison_gates.py,sha256=nH5UBWbFpLPCLVyON0veiH_KgtBrVBayIgm3abbX_s0,23794
43
+ utils/compute_benchmarks.py,sha256=tienoHmMB2iQgQLZRtmocj1jc40XYqlbrDSVGyxViTA,13868
44
+ utils/compute_identical.py,sha256=OVzs8iDAB2oeV3rH0o6vwHqHbFUU443civu6XAjjxcg,10373
45
+ utils/compute_indices.py,sha256=W3MvToDHgrGtN-_Q-LaWNXQ7VEJCJiODRsfx7sPUWnk,9304
46
+ utils/compute_perceptual_hash.py,sha256=EHwDbnXd5JCk_vMQPU0F59SQEvLpIIz_qO5h7im0HRQ,4915
47
+ utils/compute_perceptual_match.py,sha256=hIZkA7vtFoHvOg3YbEwioEx0XZxxk7VIzFI6nS-I680,10258
48
+ utils/compute_sha_bins.py,sha256=kSAHM-5gtZ6LA8daebUoX1btlJ2o2QVNC0CdIvU-_Xg,7019
49
+ utils/compute_template_similarity.py,sha256=YU4IgQdinp7X7Vv63oz6K7rfGOke3TRTUJ-EBxbxloA,21449
50
+ utils/compute_templates.py,sha256=yVYfmaJlfOgeDJ2tdJ5iLd-n4SnPbjepl7eUBP3N1Js,5079
51
+ utils/compute_versions.py,sha256=GnvuJ4Bv_u3qlyDclRJ9H3dQ62nmfLCs5CAe2bJVyS4,20337
52
+ utils/config.py,sha256=R8xy9RLZAliLY9J8ZKkQABQX6KugfuFum7L5EwagEZA,15660
53
+ utils/data_io.py,sha256=MTkLdWYFTi6qfuUNKJR9JJ2d1gL-Hg7NG8ws-nByepk,2412
54
+ utils/graph_context.py,sha256=5vIiSOkzQ1s1vVliWYyjSy7NPvnMEDvrH9aqsivgneM,1558
55
+ utils/logger.py,sha256=vgKgsAAcqb-0dlOYdRHtsnBTqbuHOVTU-9h35plDgqU,1887
56
+ utils/models.py,sha256=TZkGh7AYHp36yJy5i06iSDdQiJh7Nq5GmGx1vpAnkGA,15300
57
+ utils/photo_file.py,sha256=IHuo_x6VbLeHfl2QDtkDAO1xtaObCMolt2OrBioNtdY,20679
58
+ utils/pipeline_graph.py,sha256=A39fIj647_IIS43tubvNROFZN2K_oj6hgkf3dG9H6lc,13262
59
+ utils/pipeline_stage.py,sha256=vuEUw9CCkIQzqS0k3Uy_Ez2XYCxGOgIXwDCStfgyLXQ,16978
60
+ utils/plot_helpers.py,sha256=Q1M1o8QndLhMXvNaxx-0bnusvQYGkU_2GyC6jsB1iNI,4451
61
+ utils/ports.py,sha256=JlwlLW0ViwhofeYWrQ_clboZkM4x5hnsg5GkefyNJgs,4318
62
+ utils/progress.py,sha256=q7M4xTYowsX8T_fXbDkJDzbcjQ7qhE_ayCZawEnbc3c,14936
63
+ utils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
+ utils/report_builder.py,sha256=okRhMUAbV1540eS8epyZ97Lkalq1a8JFOhM4MXXBpzA,3940
65
+ utils/review_types.py,sha256=PY08IioeOSIYKvxCZlcbyyvbYLPVG8hzXFbUCBp_Me4,1713
66
+ utils/review_utils.py,sha256=-mGRFF7s6fde67BDxnwPJNogw0bQeVAEw6ztPYMr6xU,8520
67
+ utils/sequence.py,sha256=CPPn4CWQb16iAhVVswknwBUl0gTEHzNJHCvSOV6Hevk,34751
68
+ utils/sequence_clustering.py,sha256=MuUSWbQ-7W6PJhbdFKrwPUBHNBe2JNNv2Atvgg0_uEg,5754
69
+ utils/template.py,sha256=zB5JZOc4HRylDrj1u7kV5NcxsbLYy2ZjeLwtVQcM7lA,1704
70
+ utils/template_parsing.py,sha256=Li1kseXrnjPf_KiIyae7iMEZWQg60bQIA8zOta1POfE,2460
71
+ photo_stack_finder-0.1.8.dist-info/METADATA,sha256=THw8oGEoaRtvmxmaQ-qx5tAOLGDBJQ_xxnfymePU_50,17193
72
+ photo_stack_finder-0.1.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
73
+ photo_stack_finder-0.1.8.dist-info/entry_points.txt,sha256=ZHWJIeylUGZYa_8Rjt3QxbWiCCVZZx3FQShTNg91zNQ,64
74
+ photo_stack_finder-0.1.8.dist-info/top_level.txt,sha256=VmNqTeDiNgCj3LZJICyRF2Uc4buZQ7LxgSob-BoImk0,57
75
+ photo_stack_finder-0.1.8.dist-info/RECORD,,
scripts/orchestrate.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- """Photo Deduplication Orchestrator - Web Interface Entry Point.
2
+ """Photo Stack Finderlication Orchestrator - Web Interface Entry Point.
3
3
 
4
- This provides a web-based interface for the photo deduplication pipeline.
4
+ This provides a web-based interface for the photo stack finding pipeline.
5
5
 
6
6
  Usage:
7
7
  python orchestrate.py
@@ -38,10 +38,10 @@ def build_arg_parser() -> argparse.ArgumentParser:
38
38
  Configured ArgumentParser instance
39
39
  """
40
40
  parser = argparse.ArgumentParser(
41
- description="Photo Dedup Orchestrator - Web Interface",
41
+ description="Photo Stack Finder Orchestrator - Web Interface",
42
42
  formatter_class=argparse.RawDescriptionHelpFormatter,
43
43
  epilog="""
44
- This is a web interface for the photo deduplication pipeline.
44
+ This is a web interface for the photo stack finding pipeline.
45
45
 
46
46
  For the web interface, just run:
47
47
  - python orchestrate.py
@@ -277,11 +277,13 @@ def main() -> None:
277
277
 
278
278
  # Add platform-specific instructions
279
279
  if is_linux:
280
- lines.extend([
281
- "To access the web interface:",
282
- f" Open your browser to: {url}",
283
- "",
284
- ])
280
+ lines.extend(
281
+ [
282
+ "To access the web interface:",
283
+ f" Open your browser to: {url}",
284
+ "",
285
+ ]
286
+ )
285
287
 
286
288
  lines.append("Press Ctrl+C to stop the server")
287
289
  print_banner("", lines)
@@ -302,7 +304,7 @@ def main() -> None:
302
304
 
303
305
  # Start server
304
306
  try:
305
- uvicorn.run(app, host=args.host, port=args.port, log_level="info")
307
+ uvicorn.run(app, host=args.host, port=args.port, log_level="info", access_log=False)
306
308
  except KeyboardInterrupt:
307
309
  print("\n🛑 Server stopped by user")
308
310
  except Exception:
utils/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- """Utilities for photo deduplication."""
1
+ """Utilities for photo stack finding."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -26,7 +26,6 @@ from .compute_perceptual_hash import ComputePerceptualHash
26
26
  from .compute_perceptual_match import ComputePerceptualMatch
27
27
  from .compute_sha_bins import ComputeShaBins
28
28
  from .compute_template_similarity import ComputeTemplateSimilarity
29
- from .compute_templates import ComputeTemplates
30
29
  from .compute_versions import ComputeVersions
31
30
 
32
31
  # Config module exports
@@ -109,6 +108,9 @@ from .template import (
109
108
  partial_format,
110
109
  )
111
110
 
111
+ # Template parsing module exports (extract_template not exported - internal use only)
112
+ # INDEX_T exported via sequence module
113
+
112
114
  __all__ = [
113
115
  # Config
114
116
  "CONFIG",
@@ -136,7 +138,6 @@ __all__ = [
136
138
  "ComputePerceptualMatch",
137
139
  "ComputeShaBins",
138
140
  "ComputeTemplateSimilarity",
139
- "ComputeTemplates",
140
141
  "ComputeVersions",
141
142
  # Configuration
142
143
  "Config",
@@ -1,171 +1,171 @@
1
- from __future__ import annotations
2
-
3
- from abc import ABC, abstractmethod
4
- from collections.abc import Callable
5
- from pathlib import Path
6
-
7
- from .base_ports import StageProtocol
8
- from .models import IdenticalGroup, ReviewType, SequenceGroup
9
- from .ports import InputPort
10
- from .progress import ProgressInfo, ProgressTracker
11
-
12
-
13
- class BasePipelineStage(ABC, StageProtocol):
14
- """Base class for polymorphic PipelineStage for use in lists of pipeline stages."""
15
-
16
- path: Path
17
- stage_name: str
18
- stage_id: int | None
19
- description: str
20
- sequence_review_result: list[SequenceGroup]
21
- identical_review_result: list[IdenticalGroup]
22
- _progress_tracker: ProgressTracker | None
23
- _phase_callback: Callable[[str], None] | None # Called by run() to notify phase changes
24
- ref_photos_init: int | None
25
- ref_photos_final: int | None
26
- ref_seqs_init: int | None
27
- ref_seqs_final: int | None
28
- total_photos: int | None # Total photos including duplicates (invariant - should never change)
29
-
30
- def __init__(
31
- self,
32
- path: Path,
33
- stage_name: str,
34
- ):
35
- """Initialize pipeline stage with output path and name.
36
-
37
- Args:
38
- path: Path where stage results will be cached
39
- stage_name: Human-readable name for progress tracking
40
- """
41
- self.path = path
42
- self.stage_name = stage_name
43
- self.stage_id = None # Set by PipelineGraph.compute_execution_order()
44
- self.description = "" # Override in subclasses for UI tooltips
45
- self.sequence_review_result = [] # Pre-computed sequence review data (built during run())
46
- self.identical_review_result = [] # Pre-computed identical review data (built during run()) # Pre-computed identical review data (built during run())
47
- self._progress_tracker = None
48
- self._phase_callback = None # Set by orchestrator before calling run()
49
-
50
- self.ref_photos_init = None
51
- self.ref_photos_final = None
52
- self.ref_seqs_init = None
53
- self.ref_seqs_final = None
54
- self.total_photos = None
55
-
56
- # Performance metrics (set after stage completes)
57
- self.elapsed_seconds: float | None = None
58
- self.throughput: float | None = None # items per second
59
-
60
- def get_ref_photo_count(self) -> int | None:
61
- """Get count of reference photos after stage has run.
62
-
63
- Returns:
64
- Number of reference photos after stage has run (None if there are none or the stage has not run).
65
- """
66
- return self.ref_photos_final
67
-
68
- def get_ref_sequence_count(self) -> int | None:
69
- """Get count of reference sequences after stage has run.
70
-
71
- Returns:
72
- Number of reference sequences after stage has run (None if there are none or the stage has not run).
73
- """
74
- return self.ref_seqs_final
75
-
76
- @abstractmethod
77
- def run(self) -> None:
78
- """Execute pipeline stage - must be implemented by subclass."""
79
- ...
80
-
81
- @abstractmethod
82
- def finalise(self) -> None:
83
- """Hook to call at the end of run - must be implemented by subclass."""
84
- ...
85
-
86
- @abstractmethod
87
- def needs_review(self) -> ReviewType:
88
- """Discover what type of review this stage produces.
89
-
90
- This allows the orchestrator to dynamically discover which stages
91
- produce reviewable output without hard-coding stage names.
92
-
93
- Returns:
94
- - "none": No reviewable output (default)
95
- - "photos": Produces photo groups (byte-identical duplicates)
96
- - "sequences": Produces sequence groups (similar sequences)
97
- """
98
- ...
99
-
100
- @abstractmethod
101
- def has_review_data(self) -> bool:
102
- """Check if review data is ACTUALLY available for this stage.
103
-
104
- Checks three conditions:
105
- 1. Stage has completed (cache file exists)
106
- 2. Stage is capable of producing review data (needs_review() != "none")
107
- 3. Review data actually exists (review lists not empty)
108
-
109
- Returns:
110
- True if stage has completed and has reviewable data available
111
- """
112
- ...
113
-
114
- def get_progress(self) -> ProgressInfo | None:
115
- """Get current progress information for UI polling.
116
-
117
- Returns:
118
- ProgressInfo with formatted progress data if stage is currently executing,
119
- None if stage is not running
120
-
121
- Note:
122
- This method is called by the orchestrator during execution to poll
123
- progress for UI updates.
124
- """
125
- # Direct access with combined check for type narrowing
126
- if self._progress_tracker is None:
127
- return None
128
- return self._progress_tracker.get_snapshot()
129
-
130
- def get_cache_timestamp(self) -> float:
131
- """Get the modification time of the cache file.
132
-
133
- Returns:
134
- Cache file's mtime (seconds since epoch), or raises RuntimeError if cache doesn't exist
135
- """
136
- if self.path.exists():
137
- mtime = self.path.stat().st_mtime
138
- if mtime is None:
139
- raise RuntimeError("mtime is None for existing file")
140
- return mtime
141
- raise RuntimeError(f"{self.stage_name} get_cache_timestamp called before cache file has been created")
142
-
143
- def _cache_is_valid(self) -> bool:
144
- """Check if cache exists and is newer than all input port dependencies.
145
-
146
- Uses isinstance() check on __dict__ items to avoid triggering property getters
147
- and causing errors when stages haven't run yet.
148
-
149
- Returns:
150
- True if cache is valid and can be used, False otherwise
151
- """
152
- if not self.path.exists():
153
- return False
154
-
155
- cache_mtime = self.get_cache_timestamp()
156
-
157
- # Check if any input port dependency is newer than our cache
158
- # Use __dict__ to avoid triggering property getters
159
- for attr_value in self.__dict__.values():
160
- # Check for InputPort type directly
161
- if isinstance(attr_value, InputPort) and attr_value.is_bound():
162
- upstream_timestamp = attr_value.timestamp()
163
-
164
- if upstream_timestamp is None:
165
- raise ValueError(f"{self.__class__.__name__} getting None timestamp from upstream")
166
-
167
- if upstream_timestamp > cache_mtime:
168
- # Upstream data is newer, our cache is stale
169
- return False
170
-
171
- return True
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import Callable
5
+ from pathlib import Path
6
+
7
+ from .base_ports import StageProtocol
8
+ from .models import IdenticalGroup, ReviewType, SequenceGroup
9
+ from .ports import InputPort
10
+ from .progress import ProgressInfo, ProgressTracker
11
+
12
+
13
+ class BasePipelineStage(ABC, StageProtocol):
14
+ """Base class for polymorphic PipelineStage for use in lists of pipeline stages."""
15
+
16
+ path: Path
17
+ stage_name: str
18
+ stage_id: int | None
19
+ description: str
20
+ sequence_review_result: list[SequenceGroup]
21
+ identical_review_result: list[IdenticalGroup]
22
+ _progress_tracker: ProgressTracker | None
23
+ _phase_callback: Callable[[str], None] | None # Called by run() to notify phase changes
24
+ ref_photos_init: int | None
25
+ ref_photos_final: int | None
26
+ ref_seqs_init: int | None
27
+ ref_seqs_final: int | None
28
+ total_photos: int | None # Total photos including duplicates (invariant - should never change)
29
+
30
+ def __init__(
31
+ self,
32
+ path: Path,
33
+ stage_name: str,
34
+ ):
35
+ """Initialize pipeline stage with output path and name.
36
+
37
+ Args:
38
+ path: Path where stage results will be cached
39
+ stage_name: Human-readable name for progress tracking
40
+ """
41
+ self.path = path
42
+ self.stage_name = stage_name
43
+ self.stage_id = None # Set by PipelineGraph.compute_execution_order()
44
+ self.description = "" # Override in subclasses for UI tooltips
45
+ self.sequence_review_result = [] # Pre-computed sequence review data (built during run())
46
+ self.identical_review_result = [] # Pre-computed identical review data (built during run()) # Pre-computed identical review data (built during run())
47
+ self._progress_tracker = None
48
+ self._phase_callback = None # Set by orchestrator before calling run()
49
+
50
+ self.ref_photos_init = None
51
+ self.ref_photos_final = None
52
+ self.ref_seqs_init = None
53
+ self.ref_seqs_final = None
54
+ self.total_photos = None
55
+
56
+ # Performance metrics (set after stage completes)
57
+ self.elapsed_seconds: float | None = None
58
+ self.throughput: float | None = None # items per second
59
+
60
+ def get_ref_photo_count(self) -> int | None:
61
+ """Get count of reference photos after stage has run.
62
+
63
+ Returns:
64
+ Number of reference photos after stage has run (None if there are none or the stage has not run).
65
+ """
66
+ return self.ref_photos_final
67
+
68
+ def get_ref_sequence_count(self) -> int | None:
69
+ """Get count of reference sequences after stage has run.
70
+
71
+ Returns:
72
+ Number of reference sequences after stage has run (None if there are none or the stage has not run).
73
+ """
74
+ return self.ref_seqs_final
75
+
76
+ @abstractmethod
77
+ def run(self) -> None:
78
+ """Execute pipeline stage - must be implemented by subclass."""
79
+ ...
80
+
81
+ @abstractmethod
82
+ def finalise(self) -> None:
83
+ """Hook to call at the end of run - must be implemented by subclass."""
84
+ ...
85
+
86
+ @abstractmethod
87
+ def needs_review(self) -> ReviewType:
88
+ """Discover what type of review this stage produces.
89
+
90
+ This allows the orchestrator to dynamically discover which stages
91
+ produce reviewable output without hard-coding stage names.
92
+
93
+ Returns:
94
+ - "none": No reviewable output (default)
95
+ - "photos": Produces photo groups (byte-identical duplicates)
96
+ - "sequences": Produces sequence groups (similar sequences)
97
+ """
98
+ ...
99
+
100
+ @abstractmethod
101
+ def has_review_data(self) -> bool:
102
+ """Check if review data is ACTUALLY available for this stage.
103
+
104
+ Checks three conditions:
105
+ 1. Stage has completed (cache file exists)
106
+ 2. Stage is capable of producing review data (needs_review() != "none")
107
+ 3. Review data actually exists (review lists not empty)
108
+
109
+ Returns:
110
+ True if stage has completed and has reviewable data available
111
+ """
112
+ ...
113
+
114
+ def get_progress(self) -> ProgressInfo | None:
115
+ """Get current progress information for UI polling.
116
+
117
+ Returns:
118
+ ProgressInfo with formatted progress data if stage is currently executing,
119
+ None if stage is not running
120
+
121
+ Note:
122
+ This method is called by the orchestrator during execution to poll
123
+ progress for UI updates.
124
+ """
125
+ # Direct access with combined check for type narrowing
126
+ if self._progress_tracker is None:
127
+ return None
128
+ return self._progress_tracker.get_snapshot()
129
+
130
+ def get_cache_timestamp(self) -> float:
131
+ """Get the modification time of the cache file.
132
+
133
+ Returns:
134
+ Cache file's mtime (seconds since epoch), or raises RuntimeError if cache doesn't exist
135
+ """
136
+ if self.path.exists():
137
+ mtime = self.path.stat().st_mtime
138
+ if mtime is None:
139
+ raise RuntimeError("mtime is None for existing file")
140
+ return mtime
141
+ raise RuntimeError(f"{self.stage_name} get_cache_timestamp called before cache file has been created")
142
+
143
+ def _cache_is_valid(self) -> bool:
144
+ """Check if cache exists and is newer than all input port dependencies.
145
+
146
+ Uses isinstance() check on __dict__ items to avoid triggering property getters
147
+ and causing errors when stages haven't run yet.
148
+
149
+ Returns:
150
+ True if cache is valid and can be used, False otherwise
151
+ """
152
+ if not self.path.exists():
153
+ return False
154
+
155
+ cache_mtime = self.get_cache_timestamp()
156
+
157
+ # Check if any input port dependency is newer than our cache
158
+ # Use __dict__ to avoid triggering property getters
159
+ for attr_value in self.__dict__.values():
160
+ # Check for InputPort type directly
161
+ if isinstance(attr_value, InputPort) and attr_value.is_bound():
162
+ upstream_timestamp = attr_value.timestamp()
163
+
164
+ if upstream_timestamp is None:
165
+ raise ValueError(f"{self.__class__.__name__} getting None timestamp from upstream")
166
+
167
+ if upstream_timestamp > cache_mtime:
168
+ # Upstream data is newer, our cache is stale
169
+ return False
170
+
171
+ return True