photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +2 -2
- orchestrator/app.py +6 -11
- orchestrator/build_pipeline.py +19 -21
- orchestrator/orchestrator_runner.py +11 -8
- orchestrator/pipeline_builder.py +126 -126
- orchestrator/pipeline_orchestrator.py +604 -604
- orchestrator/review_persistence.py +162 -162
- orchestrator/static/orchestrator.css +76 -76
- orchestrator/static/orchestrator.html +11 -5
- orchestrator/static/orchestrator.js +3 -1
- overlap_metrics/__init__.py +1 -1
- overlap_metrics/config.py +135 -135
- overlap_metrics/core.py +284 -284
- overlap_metrics/estimators.py +292 -292
- overlap_metrics/metrics.py +307 -307
- overlap_metrics/registry.py +99 -99
- overlap_metrics/utils.py +104 -104
- photo_compare/__init__.py +1 -1
- photo_compare/base.py +285 -285
- photo_compare/config.py +225 -225
- photo_compare/distance.py +15 -15
- photo_compare/feature_methods.py +173 -173
- photo_compare/file_hash.py +29 -29
- photo_compare/hash_methods.py +99 -99
- photo_compare/histogram_methods.py +118 -118
- photo_compare/pixel_methods.py +58 -58
- photo_compare/structural_methods.py +104 -104
- photo_compare/types.py +28 -28
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
- photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
- scripts/orchestrate.py +12 -10
- utils/__init__.py +4 -3
- utils/base_pipeline_stage.py +171 -171
- utils/base_ports.py +176 -176
- utils/benchmark_utils.py +823 -823
- utils/channel.py +74 -74
- utils/comparison_gates.py +40 -21
- utils/compute_benchmarks.py +355 -355
- utils/compute_identical.py +94 -24
- utils/compute_indices.py +235 -235
- utils/compute_perceptual_hash.py +127 -127
- utils/compute_perceptual_match.py +240 -240
- utils/compute_sha_bins.py +64 -20
- utils/compute_template_similarity.py +1 -1
- utils/compute_versions.py +483 -483
- utils/config.py +8 -5
- utils/data_io.py +83 -83
- utils/graph_context.py +44 -44
- utils/logger.py +2 -2
- utils/models.py +2 -2
- utils/photo_file.py +90 -91
- utils/pipeline_graph.py +334 -334
- utils/pipeline_stage.py +408 -408
- utils/plot_helpers.py +123 -123
- utils/ports.py +136 -136
- utils/progress.py +415 -415
- utils/report_builder.py +139 -139
- utils/review_types.py +55 -55
- utils/review_utils.py +10 -19
- utils/sequence.py +10 -8
- utils/sequence_clustering.py +1 -1
- utils/template.py +57 -57
- utils/template_parsing.py +71 -0
- photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: photo-stack-finder
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary: Photo
|
|
3
|
+
Version: 0.1.8
|
|
4
|
+
Summary: Photo organization and duplicate detection using perceptual hashing and sequence detection
|
|
5
5
|
Author: Geoff Barrett
|
|
6
6
|
Maintainer: Geoff Barrett
|
|
7
|
-
License: AGPL-3.0-or-later
|
|
8
|
-
Project-URL: Homepage, https://github.com/gbarrett28/
|
|
9
|
-
Project-URL: Repository, https://github.com/gbarrett28/
|
|
10
|
-
Project-URL: Issues, https://github.com/gbarrett28/
|
|
11
|
-
Project-URL: Discussions, https://github.com/gbarrett28/
|
|
12
|
-
Keywords: photo,
|
|
7
|
+
License-Expression: AGPL-3.0-or-later
|
|
8
|
+
Project-URL: Homepage, https://github.com/gbarrett28/photo_stack_finder
|
|
9
|
+
Project-URL: Repository, https://github.com/gbarrett28/photo_stack_finder
|
|
10
|
+
Project-URL: Issues, https://github.com/gbarrett28/photo_stack_finder/issues
|
|
11
|
+
Project-URL: Discussions, https://github.com/gbarrett28/photo_stack_finder/discussions
|
|
12
|
+
Keywords: photo,organization,duplicate-detection,perceptual-hashing,image-processing
|
|
13
13
|
Classifier: Development Status :: 4 - Beta
|
|
14
14
|
Classifier: Intended Audience :: End Users/Desktop
|
|
15
|
-
Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
|
|
16
15
|
Classifier: Programming Language :: Python :: 3
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -54,7 +53,7 @@ Requires-Dist: pytest; extra == "dev"
|
|
|
54
53
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
55
54
|
Dynamic: license-file
|
|
56
55
|
|
|
57
|
-
# Photo
|
|
56
|
+
# Photo Stack Finder
|
|
58
57
|
|
|
59
58
|
[](https://www.gnu.org/licenses/agpl-3.0)
|
|
60
59
|
[](https://www.python.org/downloads/)
|
|
@@ -92,20 +91,20 @@ You love Google Photos' daily memories and search features. But they're **clutte
|
|
|
92
91
|
|
|
93
92
|
**The Deployment Problem:**
|
|
94
93
|
|
|
95
|
-
You can't easily act on the duplicates Photo
|
|
94
|
+
You can't easily act on the duplicates Photo Stack Finder finds:
|
|
96
95
|
- ❌ **Takeout → Delete All → Re-upload** loses face recognition training
|
|
97
96
|
- ❌ **Not reliable enough** for automatic deletion
|
|
98
97
|
- ❌ **Google doesn't support bulk delete** anyway
|
|
99
98
|
- ❌ **Manual deletion** of thousands of photos is impractical
|
|
100
99
|
|
|
101
|
-
**Photo
|
|
100
|
+
**Photo Stack Finder's Real Value:**
|
|
102
101
|
|
|
103
102
|
1. **Immediate:** Identify duplicate photo stacks in your library
|
|
104
103
|
2. **Analysis:** Understand how many duplicates you have and why
|
|
105
104
|
3. **Advocacy:** Generate evidence for a Google Photos enhancement request
|
|
106
105
|
4. **Future-ready:** Export stack definitions if/when Google adds stacking support
|
|
107
106
|
|
|
108
|
-
**The Solution:** Photo
|
|
107
|
+
**The Solution:** Photo Stack Finder intelligently finds **spatial duplicates** - all photos that originate from the **same source image** using multiple techniques:
|
|
109
108
|
- **Byte-identical detection** - Same file, different names/locations
|
|
110
109
|
- **Perceptual hashing** - Different resolutions, crops, edits, or format conversions of the same photo
|
|
111
110
|
- **Template similarity** - Hierarchical clustering to find versions of the same original
|
|
@@ -154,7 +153,7 @@ You can't easily act on the duplicates Photo Dedup finds:
|
|
|
154
153
|
- JPEG + HEIC (same photo, different formats)
|
|
155
154
|
- IMG_1234.jpg + IMG_1234(1).jpg (sync duplicates)
|
|
156
155
|
|
|
157
|
-
**How Photo
|
|
156
|
+
**How Photo Stack Finder Helps:**
|
|
158
157
|
|
|
159
158
|
1. **Proves it's solvable:** This tool finds the stacks - Google could do it natively
|
|
160
159
|
2. **Quantifies the problem:** See how many duplicates you have
|
|
@@ -197,11 +196,11 @@ See **[GOOGLE_ENHANCEMENT_REQUEST.md](GOOGLE_ENHANCEMENT_REQUEST.md)** for how t
|
|
|
197
196
|
|
|
198
197
|
```bash
|
|
199
198
|
# Install directly from GitHub
|
|
200
|
-
pip install git+https://github.com/gbarrett28/
|
|
199
|
+
pip install git+https://github.com/gbarrett28/photo_stack_finder.git
|
|
201
200
|
|
|
202
201
|
# Or clone and install in development mode
|
|
203
|
-
git clone https://github.com/gbarrett28/
|
|
204
|
-
cd
|
|
202
|
+
git clone https://github.com/gbarrett28/photo_stack_finder.git
|
|
203
|
+
cd photo_stack_finder
|
|
205
204
|
pip install -e .
|
|
206
205
|
```
|
|
207
206
|
|
|
@@ -209,8 +208,8 @@ pip install -e .
|
|
|
209
208
|
|
|
210
209
|
```bash
|
|
211
210
|
# Clone the repository
|
|
212
|
-
git clone https://github.com/gbarrett28/
|
|
213
|
-
cd
|
|
211
|
+
git clone https://github.com/gbarrett28/photo_stack_finder.git
|
|
212
|
+
cd photo_stack_finder
|
|
214
213
|
|
|
215
214
|
# Create virtual environment
|
|
216
215
|
python -m venv .venv
|
|
@@ -227,7 +226,7 @@ pip install -e ".[dev]" # Install with development tools
|
|
|
227
226
|
|
|
228
227
|
```bash
|
|
229
228
|
# After pip installation, simply run:
|
|
230
|
-
photo-
|
|
229
|
+
photo-stack-finder
|
|
231
230
|
|
|
232
231
|
# Or if running from source:
|
|
233
232
|
cd src/scripts
|
|
@@ -301,7 +300,7 @@ Archived implementation plans, test strategies, and status reports are in **[doc
|
|
|
301
300
|
## Project Structure
|
|
302
301
|
|
|
303
302
|
```
|
|
304
|
-
|
|
303
|
+
photo_stack_finder/
|
|
305
304
|
├── src/
|
|
306
305
|
│ ├── utils/ # Pipeline stages and utilities
|
|
307
306
|
│ ├── photo_compare/ # Image comparison algorithms
|
|
@@ -392,7 +391,7 @@ This software is licensed under AGPL v3, which means:
|
|
|
392
391
|
- ✅ **Network use requires sharing source** (even SaaS deployments)
|
|
393
392
|
- 💼 **Commercial licensing available** for proprietary use
|
|
394
393
|
|
|
395
|
-
💼 **Commercial licensing:** Contact via [GitHub Discussions](https://github.com/gbarrett28/
|
|
394
|
+
💼 **Commercial licensing:** Contact via [GitHub Discussions](https://github.com/gbarrett28/photo_stack_finder/discussions)
|
|
396
395
|
|
|
397
396
|
📄 **See [LICENSE](LICENSE) for full legal text**
|
|
398
397
|
📖 **See [LICENSING.md](LICENSING.md) for usage examples and FAQ**
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
orchestrator/__init__.py,sha256=PL9kksVIKleMuDzY_XqjJSfMWXTQKbvug2D7B828UUA,1315
|
|
2
|
+
orchestrator/app.py,sha256=QV93njSDXRQR22fWy-QC7uxnCyyWeSCB__SzwM-sS9o,34257
|
|
3
|
+
orchestrator/build_pipeline.py,sha256=CrQxY1k_sWanwOB4AaKmf8N2n9N9hIICU9vDcsCB0SI,4481
|
|
4
|
+
orchestrator/orchestrator_runner.py,sha256=VlaXfj26BfD6QMmdravL3KB6HX_GbMsYkuy_vKK7F4s,20262
|
|
5
|
+
orchestrator/pipeline_builder.py,sha256=0cZRyceoWt8gUjTCdmO6Eo88NwJIH8LvYbd8LpAxkIQ,4549
|
|
6
|
+
orchestrator/pipeline_orchestrator.py,sha256=ouAnxPZ86vkzfsc_T1v0cL6sFcBHEluM6JarYdQUfIU,25328
|
|
7
|
+
orchestrator/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
orchestrator/review_persistence.py,sha256=YJh6nhvs8AzuucOw0yRm3yyKc-IrBND686zIqcfdzqw,5751
|
|
9
|
+
orchestrator/static/favicon.svg,sha256=TVNER-nLkPBBb9lZRaruak9jvyXyyfIKgyD2koQcyqk,622
|
|
10
|
+
orchestrator/static/orchestrator.css,sha256=FFo_jgobZsTp0dT-kE-WAphRslX-ube-9jLwvRRmCRU,11672
|
|
11
|
+
orchestrator/static/orchestrator.html,sha256=hNYOrmXEX0RcxNkhz8iP96wSmNyYL6K6SaNd1CWo_50,13963
|
|
12
|
+
orchestrator/static/orchestrator.js,sha256=IKFQT5gg6r9sxcD5z8NZQNJIYEtvYO_UyaSEEN3DI9o,32512
|
|
13
|
+
orchestrator/static/review_common.js,sha256=t8-AI4ohQbtqSC1iWvR5x0FNlwIxl8gVO2qZsI-VtnI,5875
|
|
14
|
+
orchestrator/static/review_identical.html,sha256=Pq8TYbDSBbajC3p_XvlIOC1lbv0eJN_TcymGUw1lDE8,31487
|
|
15
|
+
orchestrator/static/review_sequences.html,sha256=oEWRtcMfEvu09F3r-hw8rRfECjcE48BHtht2CdGlw3E,54681
|
|
16
|
+
overlap_metrics/__init__.py,sha256=9RbbN6RoFBX4JdVHBTmh2WGG2p94U8th-uoG52qztoY,11184
|
|
17
|
+
overlap_metrics/config.py,sha256=U7i1MbxHGWHjod-zNlx2Aptl2yJzCzTCo5_2a1LAExs,3832
|
|
18
|
+
overlap_metrics/core.py,sha256=LiF-Ti6l0GrwxySzwxY2xwRBjJxC0brQf1e9yS8HiRA,9372
|
|
19
|
+
overlap_metrics/estimators.py,sha256=9v5BeCCJph_d6JdSN6x0D9NbrukCNx71a5K8Q1oBNsk,10741
|
|
20
|
+
overlap_metrics/metrics.py,sha256=Rvk4DJFWM4m7NrkP22LUE38I-A_pA_ONiXM3GjJ05jY,9873
|
|
21
|
+
overlap_metrics/registry.py,sha256=GZOs0gNCKPwUt60tJ7td6pLZozDstDzoIlxjNaQCRSQ,3552
|
|
22
|
+
overlap_metrics/utils.py,sha256=IICoLUy08AnEsMLqxRG7zftlBSmRvqYiSrg2kwK2S0Q,4521
|
|
23
|
+
photo_compare/__init__.py,sha256=0lsL76bUIomc_kdy_AFY7irsGcBfqI8r7IO8lYYYg88,4876
|
|
24
|
+
photo_compare/base.py,sha256=YsLim2GMB_oLH4GgeMYfW77kiEuonUEMoIMzJ8G3hnM,9356
|
|
25
|
+
photo_compare/config.py,sha256=16f32Fh-yLGrZuTudqYMrb0hDWHDbkYtgDvm3sR61Zw,7556
|
|
26
|
+
photo_compare/distance.py,sha256=Y1gW-pl6PUqtvsRTph4HhXyg_xXmj_IlRKEk2hZ6tg8,566
|
|
27
|
+
photo_compare/feature_methods.py,sha256=wjplbd9Qd1oYenQsVDoGXvUnrPTqiBEDsSihP4o2WSA,6904
|
|
28
|
+
photo_compare/file_hash.py,sha256=5IDCQlMK8qIvZF4M_w9Wr53Q7TSfoHg79ENnVjiH1pc,977
|
|
29
|
+
photo_compare/hash_methods.py,sha256=mRzbvPLpnbtXKcrIDyMBb8DWgjwE220ToJUD1ed4zmc,3684
|
|
30
|
+
photo_compare/histogram_methods.py,sha256=G20hOiHJi_LEII5x1_ZgU-FnZ8lVyENQc-LSsZAg-MU,5018
|
|
31
|
+
photo_compare/pixel_methods.py,sha256=3tUZ0ZR6CqKGJg2YOn-wMEoE0LWMJ6113n1q1lyeJZE,2363
|
|
32
|
+
photo_compare/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
+
photo_compare/structural_methods.py,sha256=nimULDnzb98cShv5-MM8sumFGYnpQv-bxAF_X5NMq1o,4255
|
|
34
|
+
photo_compare/types.py,sha256=PyA-ydJcTJwHMtaFXpJwe73GwTBwb9I9odc90XrbkKU,971
|
|
35
|
+
photo_stack_finder-0.1.8.dist-info/licenses/LICENSE,sha256=bmBV3PYoSFRs600gmAehqmuaIZIVUl8kGQoZWhapVnM,1468
|
|
36
|
+
scripts/orchestrate.py,sha256=HOGDxrMDCtCofj3l_ryuGunJytfKKISUuGC7GcSKs50,9287
|
|
37
|
+
utils/__init__.py,sha256=HBnCM-7JniIigj-NctrXV9UXAlZNRMijf_GdNDzlBkY,5196
|
|
38
|
+
utils/base_pipeline_stage.py,sha256=xPVFt8RjNqI6f80wshICzyShiP0kQbQxNgDUnmDpE6Y,6369
|
|
39
|
+
utils/base_ports.py,sha256=6wHfAdvTz5oB1hLWrs4lMQB-mzsOe6T4v0JR9_aAW3A,5240
|
|
40
|
+
utils/benchmark_utils.py,sha256=RdlCpKdrZXFOUUc5rXQ8yL8KcgS09kZi-bU37nqvzgg,30026
|
|
41
|
+
utils/channel.py,sha256=f2njTAs6bCtYNui31yIrXrtNN1CK0E86M7wUJiTP768,2586
|
|
42
|
+
utils/comparison_gates.py,sha256=nH5UBWbFpLPCLVyON0veiH_KgtBrVBayIgm3abbX_s0,23794
|
|
43
|
+
utils/compute_benchmarks.py,sha256=tienoHmMB2iQgQLZRtmocj1jc40XYqlbrDSVGyxViTA,13868
|
|
44
|
+
utils/compute_identical.py,sha256=OVzs8iDAB2oeV3rH0o6vwHqHbFUU443civu6XAjjxcg,10373
|
|
45
|
+
utils/compute_indices.py,sha256=W3MvToDHgrGtN-_Q-LaWNXQ7VEJCJiODRsfx7sPUWnk,9304
|
|
46
|
+
utils/compute_perceptual_hash.py,sha256=EHwDbnXd5JCk_vMQPU0F59SQEvLpIIz_qO5h7im0HRQ,4915
|
|
47
|
+
utils/compute_perceptual_match.py,sha256=hIZkA7vtFoHvOg3YbEwioEx0XZxxk7VIzFI6nS-I680,10258
|
|
48
|
+
utils/compute_sha_bins.py,sha256=kSAHM-5gtZ6LA8daebUoX1btlJ2o2QVNC0CdIvU-_Xg,7019
|
|
49
|
+
utils/compute_template_similarity.py,sha256=YU4IgQdinp7X7Vv63oz6K7rfGOke3TRTUJ-EBxbxloA,21449
|
|
50
|
+
utils/compute_templates.py,sha256=yVYfmaJlfOgeDJ2tdJ5iLd-n4SnPbjepl7eUBP3N1Js,5079
|
|
51
|
+
utils/compute_versions.py,sha256=GnvuJ4Bv_u3qlyDclRJ9H3dQ62nmfLCs5CAe2bJVyS4,20337
|
|
52
|
+
utils/config.py,sha256=R8xy9RLZAliLY9J8ZKkQABQX6KugfuFum7L5EwagEZA,15660
|
|
53
|
+
utils/data_io.py,sha256=MTkLdWYFTi6qfuUNKJR9JJ2d1gL-Hg7NG8ws-nByepk,2412
|
|
54
|
+
utils/graph_context.py,sha256=5vIiSOkzQ1s1vVliWYyjSy7NPvnMEDvrH9aqsivgneM,1558
|
|
55
|
+
utils/logger.py,sha256=vgKgsAAcqb-0dlOYdRHtsnBTqbuHOVTU-9h35plDgqU,1887
|
|
56
|
+
utils/models.py,sha256=TZkGh7AYHp36yJy5i06iSDdQiJh7Nq5GmGx1vpAnkGA,15300
|
|
57
|
+
utils/photo_file.py,sha256=IHuo_x6VbLeHfl2QDtkDAO1xtaObCMolt2OrBioNtdY,20679
|
|
58
|
+
utils/pipeline_graph.py,sha256=A39fIj647_IIS43tubvNROFZN2K_oj6hgkf3dG9H6lc,13262
|
|
59
|
+
utils/pipeline_stage.py,sha256=vuEUw9CCkIQzqS0k3Uy_Ez2XYCxGOgIXwDCStfgyLXQ,16978
|
|
60
|
+
utils/plot_helpers.py,sha256=Q1M1o8QndLhMXvNaxx-0bnusvQYGkU_2GyC6jsB1iNI,4451
|
|
61
|
+
utils/ports.py,sha256=JlwlLW0ViwhofeYWrQ_clboZkM4x5hnsg5GkefyNJgs,4318
|
|
62
|
+
utils/progress.py,sha256=q7M4xTYowsX8T_fXbDkJDzbcjQ7qhE_ayCZawEnbc3c,14936
|
|
63
|
+
utils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
+
utils/report_builder.py,sha256=okRhMUAbV1540eS8epyZ97Lkalq1a8JFOhM4MXXBpzA,3940
|
|
65
|
+
utils/review_types.py,sha256=PY08IioeOSIYKvxCZlcbyyvbYLPVG8hzXFbUCBp_Me4,1713
|
|
66
|
+
utils/review_utils.py,sha256=-mGRFF7s6fde67BDxnwPJNogw0bQeVAEw6ztPYMr6xU,8520
|
|
67
|
+
utils/sequence.py,sha256=CPPn4CWQb16iAhVVswknwBUl0gTEHzNJHCvSOV6Hevk,34751
|
|
68
|
+
utils/sequence_clustering.py,sha256=MuUSWbQ-7W6PJhbdFKrwPUBHNBe2JNNv2Atvgg0_uEg,5754
|
|
69
|
+
utils/template.py,sha256=zB5JZOc4HRylDrj1u7kV5NcxsbLYy2ZjeLwtVQcM7lA,1704
|
|
70
|
+
utils/template_parsing.py,sha256=Li1kseXrnjPf_KiIyae7iMEZWQg60bQIA8zOta1POfE,2460
|
|
71
|
+
photo_stack_finder-0.1.8.dist-info/METADATA,sha256=THw8oGEoaRtvmxmaQ-qx5tAOLGDBJQ_xxnfymePU_50,17193
|
|
72
|
+
photo_stack_finder-0.1.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
73
|
+
photo_stack_finder-0.1.8.dist-info/entry_points.txt,sha256=ZHWJIeylUGZYa_8Rjt3QxbWiCCVZZx3FQShTNg91zNQ,64
|
|
74
|
+
photo_stack_finder-0.1.8.dist-info/top_level.txt,sha256=VmNqTeDiNgCj3LZJICyRF2Uc4buZQ7LxgSob-BoImk0,57
|
|
75
|
+
photo_stack_finder-0.1.8.dist-info/RECORD,,
|
scripts/orchestrate.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""Photo
|
|
2
|
+
"""Photo Stack Finderlication Orchestrator - Web Interface Entry Point.
|
|
3
3
|
|
|
4
|
-
This provides a web-based interface for the photo
|
|
4
|
+
This provides a web-based interface for the photo stack finding pipeline.
|
|
5
5
|
|
|
6
6
|
Usage:
|
|
7
7
|
python orchestrate.py
|
|
@@ -38,10 +38,10 @@ def build_arg_parser() -> argparse.ArgumentParser:
|
|
|
38
38
|
Configured ArgumentParser instance
|
|
39
39
|
"""
|
|
40
40
|
parser = argparse.ArgumentParser(
|
|
41
|
-
description="Photo
|
|
41
|
+
description="Photo Stack Finder Orchestrator - Web Interface",
|
|
42
42
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
43
43
|
epilog="""
|
|
44
|
-
This is a web interface for the photo
|
|
44
|
+
This is a web interface for the photo stack finding pipeline.
|
|
45
45
|
|
|
46
46
|
For the web interface, just run:
|
|
47
47
|
- python orchestrate.py
|
|
@@ -277,11 +277,13 @@ def main() -> None:
|
|
|
277
277
|
|
|
278
278
|
# Add platform-specific instructions
|
|
279
279
|
if is_linux:
|
|
280
|
-
lines.extend(
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
280
|
+
lines.extend(
|
|
281
|
+
[
|
|
282
|
+
"To access the web interface:",
|
|
283
|
+
f" Open your browser to: {url}",
|
|
284
|
+
"",
|
|
285
|
+
]
|
|
286
|
+
)
|
|
285
287
|
|
|
286
288
|
lines.append("Press Ctrl+C to stop the server")
|
|
287
289
|
print_banner("", lines)
|
|
@@ -302,7 +304,7 @@ def main() -> None:
|
|
|
302
304
|
|
|
303
305
|
# Start server
|
|
304
306
|
try:
|
|
305
|
-
uvicorn.run(app, host=args.host, port=args.port, log_level="info")
|
|
307
|
+
uvicorn.run(app, host=args.host, port=args.port, log_level="info", access_log=False)
|
|
306
308
|
except KeyboardInterrupt:
|
|
307
309
|
print("\n🛑 Server stopped by user")
|
|
308
310
|
except Exception:
|
utils/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Utilities for photo
|
|
1
|
+
"""Utilities for photo stack finding."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
@@ -26,7 +26,6 @@ from .compute_perceptual_hash import ComputePerceptualHash
|
|
|
26
26
|
from .compute_perceptual_match import ComputePerceptualMatch
|
|
27
27
|
from .compute_sha_bins import ComputeShaBins
|
|
28
28
|
from .compute_template_similarity import ComputeTemplateSimilarity
|
|
29
|
-
from .compute_templates import ComputeTemplates
|
|
30
29
|
from .compute_versions import ComputeVersions
|
|
31
30
|
|
|
32
31
|
# Config module exports
|
|
@@ -109,6 +108,9 @@ from .template import (
|
|
|
109
108
|
partial_format,
|
|
110
109
|
)
|
|
111
110
|
|
|
111
|
+
# Template parsing module exports (extract_template not exported - internal use only)
|
|
112
|
+
# INDEX_T exported via sequence module
|
|
113
|
+
|
|
112
114
|
__all__ = [
|
|
113
115
|
# Config
|
|
114
116
|
"CONFIG",
|
|
@@ -136,7 +138,6 @@ __all__ = [
|
|
|
136
138
|
"ComputePerceptualMatch",
|
|
137
139
|
"ComputeShaBins",
|
|
138
140
|
"ComputeTemplateSimilarity",
|
|
139
|
-
"ComputeTemplates",
|
|
140
141
|
"ComputeVersions",
|
|
141
142
|
# Configuration
|
|
142
143
|
"Config",
|
utils/base_pipeline_stage.py
CHANGED
|
@@ -1,171 +1,171 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
from collections.abc import Callable
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from .base_ports import StageProtocol
|
|
8
|
-
from .models import IdenticalGroup, ReviewType, SequenceGroup
|
|
9
|
-
from .ports import InputPort
|
|
10
|
-
from .progress import ProgressInfo, ProgressTracker
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class BasePipelineStage(ABC, StageProtocol):
|
|
14
|
-
"""Base class for polymorphic PipelineStage for use in lists of pipeline stages."""
|
|
15
|
-
|
|
16
|
-
path: Path
|
|
17
|
-
stage_name: str
|
|
18
|
-
stage_id: int | None
|
|
19
|
-
description: str
|
|
20
|
-
sequence_review_result: list[SequenceGroup]
|
|
21
|
-
identical_review_result: list[IdenticalGroup]
|
|
22
|
-
_progress_tracker: ProgressTracker | None
|
|
23
|
-
_phase_callback: Callable[[str], None] | None # Called by run() to notify phase changes
|
|
24
|
-
ref_photos_init: int | None
|
|
25
|
-
ref_photos_final: int | None
|
|
26
|
-
ref_seqs_init: int | None
|
|
27
|
-
ref_seqs_final: int | None
|
|
28
|
-
total_photos: int | None # Total photos including duplicates (invariant - should never change)
|
|
29
|
-
|
|
30
|
-
def __init__(
|
|
31
|
-
self,
|
|
32
|
-
path: Path,
|
|
33
|
-
stage_name: str,
|
|
34
|
-
):
|
|
35
|
-
"""Initialize pipeline stage with output path and name.
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
path: Path where stage results will be cached
|
|
39
|
-
stage_name: Human-readable name for progress tracking
|
|
40
|
-
"""
|
|
41
|
-
self.path = path
|
|
42
|
-
self.stage_name = stage_name
|
|
43
|
-
self.stage_id = None # Set by PipelineGraph.compute_execution_order()
|
|
44
|
-
self.description = "" # Override in subclasses for UI tooltips
|
|
45
|
-
self.sequence_review_result = [] # Pre-computed sequence review data (built during run())
|
|
46
|
-
self.identical_review_result = [] # Pre-computed identical review data (built during run()) # Pre-computed identical review data (built during run())
|
|
47
|
-
self._progress_tracker = None
|
|
48
|
-
self._phase_callback = None # Set by orchestrator before calling run()
|
|
49
|
-
|
|
50
|
-
self.ref_photos_init = None
|
|
51
|
-
self.ref_photos_final = None
|
|
52
|
-
self.ref_seqs_init = None
|
|
53
|
-
self.ref_seqs_final = None
|
|
54
|
-
self.total_photos = None
|
|
55
|
-
|
|
56
|
-
# Performance metrics (set after stage completes)
|
|
57
|
-
self.elapsed_seconds: float | None = None
|
|
58
|
-
self.throughput: float | None = None # items per second
|
|
59
|
-
|
|
60
|
-
def get_ref_photo_count(self) -> int | None:
|
|
61
|
-
"""Get count of reference photos after stage has run.
|
|
62
|
-
|
|
63
|
-
Returns:
|
|
64
|
-
Number of reference photos after stage has run (None if there are none or the stage has not run).
|
|
65
|
-
"""
|
|
66
|
-
return self.ref_photos_final
|
|
67
|
-
|
|
68
|
-
def get_ref_sequence_count(self) -> int | None:
|
|
69
|
-
"""Get count of reference sequences after stage has run.
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
Number of reference sequences after stage has run (None if there are none or the stage has not run).
|
|
73
|
-
"""
|
|
74
|
-
return self.ref_seqs_final
|
|
75
|
-
|
|
76
|
-
@abstractmethod
|
|
77
|
-
def run(self) -> None:
|
|
78
|
-
"""Execute pipeline stage - must be implemented by subclass."""
|
|
79
|
-
...
|
|
80
|
-
|
|
81
|
-
@abstractmethod
|
|
82
|
-
def finalise(self) -> None:
|
|
83
|
-
"""Hook to call at the end of run - must be implemented by subclass."""
|
|
84
|
-
...
|
|
85
|
-
|
|
86
|
-
@abstractmethod
|
|
87
|
-
def needs_review(self) -> ReviewType:
|
|
88
|
-
"""Discover what type of review this stage produces.
|
|
89
|
-
|
|
90
|
-
This allows the orchestrator to dynamically discover which stages
|
|
91
|
-
produce reviewable output without hard-coding stage names.
|
|
92
|
-
|
|
93
|
-
Returns:
|
|
94
|
-
- "none": No reviewable output (default)
|
|
95
|
-
- "photos": Produces photo groups (byte-identical duplicates)
|
|
96
|
-
- "sequences": Produces sequence groups (similar sequences)
|
|
97
|
-
"""
|
|
98
|
-
...
|
|
99
|
-
|
|
100
|
-
@abstractmethod
|
|
101
|
-
def has_review_data(self) -> bool:
|
|
102
|
-
"""Check if review data is ACTUALLY available for this stage.
|
|
103
|
-
|
|
104
|
-
Checks three conditions:
|
|
105
|
-
1. Stage has completed (cache file exists)
|
|
106
|
-
2. Stage is capable of producing review data (needs_review() != "none")
|
|
107
|
-
3. Review data actually exists (review lists not empty)
|
|
108
|
-
|
|
109
|
-
Returns:
|
|
110
|
-
True if stage has completed and has reviewable data available
|
|
111
|
-
"""
|
|
112
|
-
...
|
|
113
|
-
|
|
114
|
-
def get_progress(self) -> ProgressInfo | None:
|
|
115
|
-
"""Get current progress information for UI polling.
|
|
116
|
-
|
|
117
|
-
Returns:
|
|
118
|
-
ProgressInfo with formatted progress data if stage is currently executing,
|
|
119
|
-
None if stage is not running
|
|
120
|
-
|
|
121
|
-
Note:
|
|
122
|
-
This method is called by the orchestrator during execution to poll
|
|
123
|
-
progress for UI updates.
|
|
124
|
-
"""
|
|
125
|
-
# Direct access with combined check for type narrowing
|
|
126
|
-
if self._progress_tracker is None:
|
|
127
|
-
return None
|
|
128
|
-
return self._progress_tracker.get_snapshot()
|
|
129
|
-
|
|
130
|
-
def get_cache_timestamp(self) -> float:
|
|
131
|
-
"""Get the modification time of the cache file.
|
|
132
|
-
|
|
133
|
-
Returns:
|
|
134
|
-
Cache file's mtime (seconds since epoch), or raises RuntimeError if cache doesn't exist
|
|
135
|
-
"""
|
|
136
|
-
if self.path.exists():
|
|
137
|
-
mtime = self.path.stat().st_mtime
|
|
138
|
-
if mtime is None:
|
|
139
|
-
raise RuntimeError("mtime is None for existing file")
|
|
140
|
-
return mtime
|
|
141
|
-
raise RuntimeError(f"{self.stage_name} get_cache_timestamp called before cache file has been created")
|
|
142
|
-
|
|
143
|
-
def _cache_is_valid(self) -> bool:
|
|
144
|
-
"""Check if cache exists and is newer than all input port dependencies.
|
|
145
|
-
|
|
146
|
-
Uses isinstance() check on __dict__ items to avoid triggering property getters
|
|
147
|
-
and causing errors when stages haven't run yet.
|
|
148
|
-
|
|
149
|
-
Returns:
|
|
150
|
-
True if cache is valid and can be used, False otherwise
|
|
151
|
-
"""
|
|
152
|
-
if not self.path.exists():
|
|
153
|
-
return False
|
|
154
|
-
|
|
155
|
-
cache_mtime = self.get_cache_timestamp()
|
|
156
|
-
|
|
157
|
-
# Check if any input port dependency is newer than our cache
|
|
158
|
-
# Use __dict__ to avoid triggering property getters
|
|
159
|
-
for attr_value in self.__dict__.values():
|
|
160
|
-
# Check for InputPort type directly
|
|
161
|
-
if isinstance(attr_value, InputPort) and attr_value.is_bound():
|
|
162
|
-
upstream_timestamp = attr_value.timestamp()
|
|
163
|
-
|
|
164
|
-
if upstream_timestamp is None:
|
|
165
|
-
raise ValueError(f"{self.__class__.__name__} getting None timestamp from upstream")
|
|
166
|
-
|
|
167
|
-
if upstream_timestamp > cache_mtime:
|
|
168
|
-
# Upstream data is newer, our cache is stale
|
|
169
|
-
return False
|
|
170
|
-
|
|
171
|
-
return True
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .base_ports import StageProtocol
|
|
8
|
+
from .models import IdenticalGroup, ReviewType, SequenceGroup
|
|
9
|
+
from .ports import InputPort
|
|
10
|
+
from .progress import ProgressInfo, ProgressTracker
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BasePipelineStage(ABC, StageProtocol):
|
|
14
|
+
"""Base class for polymorphic PipelineStage for use in lists of pipeline stages."""
|
|
15
|
+
|
|
16
|
+
path: Path
|
|
17
|
+
stage_name: str
|
|
18
|
+
stage_id: int | None
|
|
19
|
+
description: str
|
|
20
|
+
sequence_review_result: list[SequenceGroup]
|
|
21
|
+
identical_review_result: list[IdenticalGroup]
|
|
22
|
+
_progress_tracker: ProgressTracker | None
|
|
23
|
+
_phase_callback: Callable[[str], None] | None # Called by run() to notify phase changes
|
|
24
|
+
ref_photos_init: int | None
|
|
25
|
+
ref_photos_final: int | None
|
|
26
|
+
ref_seqs_init: int | None
|
|
27
|
+
ref_seqs_final: int | None
|
|
28
|
+
total_photos: int | None # Total photos including duplicates (invariant - should never change)
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
path: Path,
|
|
33
|
+
stage_name: str,
|
|
34
|
+
):
|
|
35
|
+
"""Initialize pipeline stage with output path and name.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
path: Path where stage results will be cached
|
|
39
|
+
stage_name: Human-readable name for progress tracking
|
|
40
|
+
"""
|
|
41
|
+
self.path = path
|
|
42
|
+
self.stage_name = stage_name
|
|
43
|
+
self.stage_id = None # Set by PipelineGraph.compute_execution_order()
|
|
44
|
+
self.description = "" # Override in subclasses for UI tooltips
|
|
45
|
+
self.sequence_review_result = [] # Pre-computed sequence review data (built during run())
|
|
46
|
+
self.identical_review_result = [] # Pre-computed identical review data (built during run()) # Pre-computed identical review data (built during run())
|
|
47
|
+
self._progress_tracker = None
|
|
48
|
+
self._phase_callback = None # Set by orchestrator before calling run()
|
|
49
|
+
|
|
50
|
+
self.ref_photos_init = None
|
|
51
|
+
self.ref_photos_final = None
|
|
52
|
+
self.ref_seqs_init = None
|
|
53
|
+
self.ref_seqs_final = None
|
|
54
|
+
self.total_photos = None
|
|
55
|
+
|
|
56
|
+
# Performance metrics (set after stage completes)
|
|
57
|
+
self.elapsed_seconds: float | None = None
|
|
58
|
+
self.throughput: float | None = None # items per second
|
|
59
|
+
|
|
60
|
+
def get_ref_photo_count(self) -> int | None:
|
|
61
|
+
"""Get count of reference photos after stage has run.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Number of reference photos after stage has run (None if there are none or the stage has not run).
|
|
65
|
+
"""
|
|
66
|
+
return self.ref_photos_final
|
|
67
|
+
|
|
68
|
+
def get_ref_sequence_count(self) -> int | None:
|
|
69
|
+
"""Get count of reference sequences after stage has run.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Number of reference sequences after stage has run (None if there are none or the stage has not run).
|
|
73
|
+
"""
|
|
74
|
+
return self.ref_seqs_final
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
def run(self) -> None:
|
|
78
|
+
"""Execute pipeline stage - must be implemented by subclass."""
|
|
79
|
+
...
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def finalise(self) -> None:
|
|
83
|
+
"""Hook to call at the end of run - must be implemented by subclass."""
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
def needs_review(self) -> ReviewType:
|
|
88
|
+
"""Discover what type of review this stage produces.
|
|
89
|
+
|
|
90
|
+
This allows the orchestrator to dynamically discover which stages
|
|
91
|
+
produce reviewable output without hard-coding stage names.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
- "none": No reviewable output (default)
|
|
95
|
+
- "photos": Produces photo groups (byte-identical duplicates)
|
|
96
|
+
- "sequences": Produces sequence groups (similar sequences)
|
|
97
|
+
"""
|
|
98
|
+
...
|
|
99
|
+
|
|
100
|
+
@abstractmethod
|
|
101
|
+
def has_review_data(self) -> bool:
|
|
102
|
+
"""Check if review data is ACTUALLY available for this stage.
|
|
103
|
+
|
|
104
|
+
Checks three conditions:
|
|
105
|
+
1. Stage has completed (cache file exists)
|
|
106
|
+
2. Stage is capable of producing review data (needs_review() != "none")
|
|
107
|
+
3. Review data actually exists (review lists not empty)
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
True if stage has completed and has reviewable data available
|
|
111
|
+
"""
|
|
112
|
+
...
|
|
113
|
+
|
|
114
|
+
def get_progress(self) -> ProgressInfo | None:
|
|
115
|
+
"""Get current progress information for UI polling.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
ProgressInfo with formatted progress data if stage is currently executing,
|
|
119
|
+
None if stage is not running
|
|
120
|
+
|
|
121
|
+
Note:
|
|
122
|
+
This method is called by the orchestrator during execution to poll
|
|
123
|
+
progress for UI updates.
|
|
124
|
+
"""
|
|
125
|
+
# Direct access with combined check for type narrowing
|
|
126
|
+
if self._progress_tracker is None:
|
|
127
|
+
return None
|
|
128
|
+
return self._progress_tracker.get_snapshot()
|
|
129
|
+
|
|
130
|
+
def get_cache_timestamp(self) -> float:
|
|
131
|
+
"""Get the modification time of the cache file.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Cache file's mtime (seconds since epoch), or raises RuntimeError if cache doesn't exist
|
|
135
|
+
"""
|
|
136
|
+
if self.path.exists():
|
|
137
|
+
mtime = self.path.stat().st_mtime
|
|
138
|
+
if mtime is None:
|
|
139
|
+
raise RuntimeError("mtime is None for existing file")
|
|
140
|
+
return mtime
|
|
141
|
+
raise RuntimeError(f"{self.stage_name} get_cache_timestamp called before cache file has been created")
|
|
142
|
+
|
|
143
|
+
def _cache_is_valid(self) -> bool:
|
|
144
|
+
"""Check if cache exists and is newer than all input port dependencies.
|
|
145
|
+
|
|
146
|
+
Uses isinstance() check on __dict__ items to avoid triggering property getters
|
|
147
|
+
and causing errors when stages haven't run yet.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
True if cache is valid and can be used, False otherwise
|
|
151
|
+
"""
|
|
152
|
+
if not self.path.exists():
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
cache_mtime = self.get_cache_timestamp()
|
|
156
|
+
|
|
157
|
+
# Check if any input port dependency is newer than our cache
|
|
158
|
+
# Use __dict__ to avoid triggering property getters
|
|
159
|
+
for attr_value in self.__dict__.values():
|
|
160
|
+
# Check for InputPort type directly
|
|
161
|
+
if isinstance(attr_value, InputPort) and attr_value.is_bound():
|
|
162
|
+
upstream_timestamp = attr_value.timestamp()
|
|
163
|
+
|
|
164
|
+
if upstream_timestamp is None:
|
|
165
|
+
raise ValueError(f"{self.__class__.__name__} getting None timestamp from upstream")
|
|
166
|
+
|
|
167
|
+
if upstream_timestamp > cache_mtime:
|
|
168
|
+
# Upstream data is newer, our cache is stale
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
return True
|