Perception 0.8.5__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {perception-0.8.5 → perception-0.9.0}/.github/workflows/release.yaml +3 -0
- {perception-0.8.5 → perception-0.9.0}/.pre-commit-config.yaml +14 -0
- {perception-0.8.5 → perception-0.9.0}/CHANGELOG.md +12 -1
- {perception-0.8.5/Perception.egg-info → perception-0.9.0}/PKG-INFO +12 -5
- {perception-0.8.5 → perception-0.9.0/Perception.egg-info}/PKG-INFO +12 -5
- {perception-0.8.5 → perception-0.9.0}/Perception.egg-info/SOURCES.txt +1 -0
- {perception-0.8.5 → perception-0.9.0}/Perception.egg-info/requires.txt +8 -4
- {perception-0.8.5 → perception-0.9.0}/README.md +4 -0
- perception-0.9.0/perception/_optional.py +42 -0
- {perception-0.8.5 → perception-0.9.0}/perception/approximate_deduplication/__init__.py +5 -2
- {perception-0.8.5 → perception-0.9.0}/perception/approximate_deduplication/_graph_backend.py +4 -6
- {perception-0.8.5 → perception-0.9.0}/perception/approximate_deduplication/index.py +9 -2
- {perception-0.8.5 → perception-0.9.0}/perception/approximate_deduplication/serve.py +10 -8
- {perception-0.8.5 → perception-0.9.0}/perception/benchmarking/common.py +9 -2
- {perception-0.8.5 → perception-0.9.0}/perception/benchmarking/extensions.c +146 -146
- {perception-0.8.5 → perception-0.9.0}/perception/benchmarking/image.py +9 -2
- {perception-0.8.5 → perception-0.9.0}/perception/benchmarking/video.py +3 -1
- {perception-0.8.5 → perception-0.9.0}/perception/benchmarking/video_transforms.py +3 -1
- {perception-0.8.5 → perception-0.9.0}/perception/extensions.cpp +146 -146
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/hasher.py +0 -1
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/image/pdq.py +3 -2
- {perception-0.8.5 → perception-0.9.0}/perception/local_descriptor_deduplication.py +8 -2
- {perception-0.8.5 → perception-0.9.0}/perception/testing/__init__.py +3 -1
- {perception-0.8.5 → perception-0.9.0}/perception/tools.py +3 -3
- {perception-0.8.5 → perception-0.9.0}/pyproject.toml +13 -5
- {perception-0.8.5 → perception-0.9.0}/uv.lock +29 -12
- {perception-0.8.5 → perception-0.9.0}/.dockerignore +0 -0
- {perception-0.8.5 → perception-0.9.0}/.git-blame-ignore-revs +0 -0
- {perception-0.8.5 → perception-0.9.0}/.gitattributes +0 -0
- {perception-0.8.5 → perception-0.9.0}/.github/dependabot.yaml +0 -0
- {perception-0.8.5 → perception-0.9.0}/.github/workflows/ci.yaml +0 -0
- {perception-0.8.5 → perception-0.9.0}/.github/workflows/gh-pages.yaml +0 -0
- {perception-0.8.5 → perception-0.9.0}/.gitignore +0 -0
- {perception-0.8.5 → perception-0.9.0}/.readthedocs.yaml +0 -0
- {perception-0.8.5 → perception-0.9.0}/CODE_OF_CONDUCT.md +0 -0
- {perception-0.8.5 → perception-0.9.0}/LICENSE +0 -0
- {perception-0.8.5 → perception-0.9.0}/MANIFEST.in +0 -0
- {perception-0.8.5 → perception-0.9.0}/Makefile +0 -0
- {perception-0.8.5 → perception-0.9.0}/Perception.egg-info/dependency_links.txt +0 -0
- {perception-0.8.5 → perception-0.9.0}/Perception.egg-info/top_level.txt +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/api/benchmarking.rst +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/api/hashers.rst +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/api/index.rst +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/api/tools.rst +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/conf.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/examples/benchmarking.rst +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/examples/deduplication.rst +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/examples/detecting_csam.rst +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/examples/index.rst +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/index.rst +0 -0
- {perception-0.8.5 → perception-0.9.0}/docs/requirements.txt +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/__init__.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/approximate_deduplication/debug.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/benchmarking/__init__.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/benchmarking/extensions.pyx +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/benchmarking/image_transforms.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/extensions.pyx +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/__init__.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/image/__init__.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/image/average.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/image/dhash.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/image/opencv.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/image/phash.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/image/wavelet.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/tools.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/video/__init__.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/video/framewise.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/hashers/video/tmk.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/py.typed +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/README.md +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image1.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image10.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image2.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image3.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image4.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image5.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image6.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image7.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image8.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/images/image9.jpg +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/logos/README.md +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/logos/logoipsum.png +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/videos/README.md +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/videos/expected_tmk.json.gz +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/videos/extra_channel_attached_pic.mp4 +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/videos/extra_channel_attached_pic_audio.mp4 +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/videos/rgb.m4v +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/videos/v1.m4v +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/videos/v2.m4v +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/testing/videos/v2s.mov +0 -0
- {perception-0.8.5 → perception-0.9.0}/perception/utils.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/setup.cfg +0 -0
- {perception-0.8.5 → perception-0.9.0}/setup.py +0 -0
- {perception-0.8.5 → perception-0.9.0}/tests/images/chair-square.png +0 -0
- {perception-0.8.5 → perception-0.9.0}/tests/images/chair-tall.png +0 -0
- {perception-0.8.5 → perception-0.9.0}/tests/images/chair.png +0 -0
- {perception-0.8.5 → perception-0.9.0}/tests/images/chair3.png +0 -0
|
@@ -24,3 +24,17 @@ repos:
|
|
|
24
24
|
rev: v1.8.0
|
|
25
25
|
hooks:
|
|
26
26
|
- id: mypy
|
|
27
|
+
# Limit to package sources; `[tool.mypy] exclude` is ignored when
|
|
28
|
+
# pre-commit passes file paths as arguments.
|
|
29
|
+
files: ^perception/
|
|
30
|
+
# mirrors-mypy runs in an isolated env, so re-list any deps mypy
|
|
31
|
+
# needs to follow imports / type-check our code. Keep in sync
|
|
32
|
+
# with the dev dependency group in pyproject.toml.
|
|
33
|
+
additional_dependencies:
|
|
34
|
+
- numpy>=1.26.4,<3.0.0
|
|
35
|
+
- pandas
|
|
36
|
+
- pandas-stubs
|
|
37
|
+
- scipy
|
|
38
|
+
- typing_extensions>=4.0,<5.0
|
|
39
|
+
- types-pillow
|
|
40
|
+
- types-tqdm
|
|
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
5
5
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
6
|
|
|
7
|
+
## [0.9.0] - 2026-05-13
|
|
8
|
+
This release moves heavyweight dependencies behind optional extras so they are not installed for users who only need core hashing functionality, and standardizes the error users see when an extra is missing.
|
|
9
|
+
|
|
10
|
+
### Breaking changes
|
|
11
|
+
- `faiss-cpu`, `networkit`, and `networkx` are no longer core dependencies. They are pulled in by the new `approximate-deduplication` extra (`pip install perception[approximate-deduplication]`), which is required to use `perception.approximate_deduplication` or `perception.local_descriptor_deduplication`.
|
|
12
|
+
- `pandas` is no longer a core dependency. It is pulled in by the `approximate-deduplication` and `benchmarking` extras (the only modules that use it). Code that imports `perception.benchmarking`, `perception.approximate_deduplication`, `perception.local_descriptor_deduplication`, or `perception.testing` should install the appropriate extra.
|
|
13
|
+
|
|
14
|
+
### Enhancements
|
|
15
|
+
- All optional-dependency import sites — across the `approximate-deduplication`, `benchmarking`, `matching`, and `pdq` extras — now raise a uniform, actionable `ImportError` pointing at the correct `pip install perception[<extra>]` command when the relevant extra is not installed. This is implemented via a single helper, `perception._optional.import_optional`.
|
|
16
|
+
- `typing_extensions` is now an explicit core dependency (it was previously transitive via `faiss-cpu` / `pandas`).
|
|
17
|
+
|
|
7
18
|
## [0.4.0] - 2020-10-17
|
|
8
19
|
This release switches from using false positive rates in benchmarking to reporting precision, which is more intuitive.
|
|
9
20
|
|
|
@@ -33,4 +44,4 @@ This release adds significantly more support for video.
|
|
|
33
44
|
This release adds more support for hashing videos (including TMK L2 and TMK L2). As part of that, it also includes a re-factor to separate `benchmarking.BenchmarkDataset` and `benchmarking.BenchmarkTransforms` into image and video variants.
|
|
34
45
|
|
|
35
46
|
## [0.1.0] - 2019-11-04
|
|
36
|
-
Initial release
|
|
47
|
+
Initial release
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: Perception
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Perception provides flexible, well-documented, and comprehensively tested tooling for perceptual hashing research, development, and production use.
|
|
5
5
|
Author-email: Thorn <info@wearethorn.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -9,19 +9,22 @@ Description-Content-Type: text/markdown
|
|
|
9
9
|
License-File: LICENSE
|
|
10
10
|
Requires-Dist: numpy<3.0.0,>=1.26.4
|
|
11
11
|
Requires-Dist: opencv-contrib-python-headless<5.0.0,>=4.10.0
|
|
12
|
-
Requires-Dist: faiss-cpu<2.0.0,>=1.8.0
|
|
13
|
-
Requires-Dist: networkit<12.0.0,>=11.1; sys_platform != "darwin"
|
|
14
|
-
Requires-Dist: networkx<4.0,>=3.0; sys_platform == "darwin"
|
|
15
|
-
Requires-Dist: pandas
|
|
16
12
|
Requires-Dist: Pillow
|
|
17
13
|
Requires-Dist: pywavelets<2.0.0,>=1.5.0
|
|
18
14
|
Requires-Dist: validators<1.0.0,>=0.22.0
|
|
19
15
|
Requires-Dist: rich<14.0.0,>=13.7.0
|
|
20
16
|
Requires-Dist: scipy
|
|
21
17
|
Requires-Dist: tqdm<5.0.0,>=4.67.1
|
|
18
|
+
Requires-Dist: typing_extensions<5.0,>=4.0
|
|
19
|
+
Provides-Extra: approximate-deduplication
|
|
20
|
+
Requires-Dist: faiss-cpu<2.0.0,>=1.8.0; extra == "approximate-deduplication"
|
|
21
|
+
Requires-Dist: networkit<12.0.0,>=11.1; sys_platform != "darwin" and extra == "approximate-deduplication"
|
|
22
|
+
Requires-Dist: networkx<4.0,>=3.0; sys_platform == "darwin" and extra == "approximate-deduplication"
|
|
23
|
+
Requires-Dist: pandas; extra == "approximate-deduplication"
|
|
22
24
|
Provides-Extra: benchmarking
|
|
23
25
|
Requires-Dist: matplotlib; extra == "benchmarking"
|
|
24
26
|
Requires-Dist: albumentations<3.0.0,>=2.0.8; extra == "benchmarking"
|
|
27
|
+
Requires-Dist: pandas; extra == "benchmarking"
|
|
25
28
|
Requires-Dist: tabulate; extra == "benchmarking"
|
|
26
29
|
Requires-Dist: scikit-learn; extra == "benchmarking"
|
|
27
30
|
Requires-Dist: ffmpeg-python; extra == "benchmarking"
|
|
@@ -50,6 +53,10 @@ Dynamic: license-file
|
|
|
50
53
|
|
|
51
54
|
`perception` provides optional extras for additional functionality:
|
|
52
55
|
|
|
56
|
+
- `approximate-deduplication` – FAISS-based approximate-nearest-neighbor
|
|
57
|
+
deduplication and graph community/clique detection (used by
|
|
58
|
+
`perception.approximate_deduplication` and
|
|
59
|
+
`perception.local_descriptor_deduplication`)
|
|
53
60
|
- `benchmarking` – tools for benchmarking perceptual hashes
|
|
54
61
|
- `matching` – async matching utilities
|
|
55
62
|
- `pdq` – Facebook's PDQ hash support
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: Perception
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Perception provides flexible, well-documented, and comprehensively tested tooling for perceptual hashing research, development, and production use.
|
|
5
5
|
Author-email: Thorn <info@wearethorn.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -9,19 +9,22 @@ Description-Content-Type: text/markdown
|
|
|
9
9
|
License-File: LICENSE
|
|
10
10
|
Requires-Dist: numpy<3.0.0,>=1.26.4
|
|
11
11
|
Requires-Dist: opencv-contrib-python-headless<5.0.0,>=4.10.0
|
|
12
|
-
Requires-Dist: faiss-cpu<2.0.0,>=1.8.0
|
|
13
|
-
Requires-Dist: networkit<12.0.0,>=11.1; sys_platform != "darwin"
|
|
14
|
-
Requires-Dist: networkx<4.0,>=3.0; sys_platform == "darwin"
|
|
15
|
-
Requires-Dist: pandas
|
|
16
12
|
Requires-Dist: Pillow
|
|
17
13
|
Requires-Dist: pywavelets<2.0.0,>=1.5.0
|
|
18
14
|
Requires-Dist: validators<1.0.0,>=0.22.0
|
|
19
15
|
Requires-Dist: rich<14.0.0,>=13.7.0
|
|
20
16
|
Requires-Dist: scipy
|
|
21
17
|
Requires-Dist: tqdm<5.0.0,>=4.67.1
|
|
18
|
+
Requires-Dist: typing_extensions<5.0,>=4.0
|
|
19
|
+
Provides-Extra: approximate-deduplication
|
|
20
|
+
Requires-Dist: faiss-cpu<2.0.0,>=1.8.0; extra == "approximate-deduplication"
|
|
21
|
+
Requires-Dist: networkit<12.0.0,>=11.1; sys_platform != "darwin" and extra == "approximate-deduplication"
|
|
22
|
+
Requires-Dist: networkx<4.0,>=3.0; sys_platform == "darwin" and extra == "approximate-deduplication"
|
|
23
|
+
Requires-Dist: pandas; extra == "approximate-deduplication"
|
|
22
24
|
Provides-Extra: benchmarking
|
|
23
25
|
Requires-Dist: matplotlib; extra == "benchmarking"
|
|
24
26
|
Requires-Dist: albumentations<3.0.0,>=2.0.8; extra == "benchmarking"
|
|
27
|
+
Requires-Dist: pandas; extra == "benchmarking"
|
|
25
28
|
Requires-Dist: tabulate; extra == "benchmarking"
|
|
26
29
|
Requires-Dist: scikit-learn; extra == "benchmarking"
|
|
27
30
|
Requires-Dist: ffmpeg-python; extra == "benchmarking"
|
|
@@ -50,6 +53,10 @@ Dynamic: license-file
|
|
|
50
53
|
|
|
51
54
|
`perception` provides optional extras for additional functionality:
|
|
52
55
|
|
|
56
|
+
- `approximate-deduplication` – FAISS-based approximate-nearest-neighbor
|
|
57
|
+
deduplication and graph community/clique detection (used by
|
|
58
|
+
`perception.approximate_deduplication` and
|
|
59
|
+
`perception.local_descriptor_deduplication`)
|
|
53
60
|
- `benchmarking` – tools for benchmarking perceptual hashes
|
|
54
61
|
- `matching` – async matching utilities
|
|
55
62
|
- `pdq` – Facebook's PDQ hash support
|
|
@@ -1,23 +1,27 @@
|
|
|
1
1
|
numpy<3.0.0,>=1.26.4
|
|
2
2
|
opencv-contrib-python-headless<5.0.0,>=4.10.0
|
|
3
|
-
faiss-cpu<2.0.0,>=1.8.0
|
|
4
|
-
pandas
|
|
5
3
|
Pillow
|
|
6
4
|
pywavelets<2.0.0,>=1.5.0
|
|
7
5
|
validators<1.0.0,>=0.22.0
|
|
8
6
|
rich<14.0.0,>=13.7.0
|
|
9
7
|
scipy
|
|
10
8
|
tqdm<5.0.0,>=4.67.1
|
|
9
|
+
typing_extensions<5.0,>=4.0
|
|
10
|
+
|
|
11
|
+
[approximate-deduplication]
|
|
12
|
+
faiss-cpu<2.0.0,>=1.8.0
|
|
13
|
+
pandas
|
|
11
14
|
|
|
12
|
-
[:sys_platform != "darwin"]
|
|
15
|
+
[approximate-deduplication:sys_platform != "darwin"]
|
|
13
16
|
networkit<12.0.0,>=11.1
|
|
14
17
|
|
|
15
|
-
[:sys_platform == "darwin"]
|
|
18
|
+
[approximate-deduplication:sys_platform == "darwin"]
|
|
16
19
|
networkx<4.0,>=3.0
|
|
17
20
|
|
|
18
21
|
[benchmarking]
|
|
19
22
|
matplotlib
|
|
20
23
|
albumentations<3.0.0,>=2.0.8
|
|
24
|
+
pandas
|
|
21
25
|
tabulate
|
|
22
26
|
scikit-learn
|
|
23
27
|
ffmpeg-python
|
|
@@ -16,6 +16,10 @@
|
|
|
16
16
|
|
|
17
17
|
`perception` provides optional extras for additional functionality:
|
|
18
18
|
|
|
19
|
+
- `approximate-deduplication` – FAISS-based approximate-nearest-neighbor
|
|
20
|
+
deduplication and graph community/clique detection (used by
|
|
21
|
+
`perception.approximate_deduplication` and
|
|
22
|
+
`perception.local_descriptor_deduplication`)
|
|
19
23
|
- `benchmarking` – tools for benchmarking perceptual hashes
|
|
20
24
|
- `matching` – async matching utilities
|
|
21
25
|
- `pdq` – Facebook's PDQ hash support
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Helpers for importing optional dependencies with friendly error messages."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def import_optional(package: str, *, extra: str) -> Any:
|
|
10
|
+
"""Import ``package`` or raise ``ImportError`` pointing at ``extra``.
|
|
11
|
+
|
|
12
|
+
Returns the imported module typed as ``Any`` so that attribute access
|
|
13
|
+
(including in type annotations like ``pd.DataFrame``) is not flagged
|
|
14
|
+
by static type checkers when the optional package is not installed in
|
|
15
|
+
the type-checking environment.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
package:
|
|
20
|
+
The importable module name (e.g. ``"faiss"``, ``"pandas"``).
|
|
21
|
+
extra:
|
|
22
|
+
The name of the ``perception`` extra that provides ``package``
|
|
23
|
+
(e.g. ``"approximate-deduplication"``).
|
|
24
|
+
"""
|
|
25
|
+
try:
|
|
26
|
+
return importlib.import_module(package)
|
|
27
|
+
except (
|
|
28
|
+
ModuleNotFoundError
|
|
29
|
+
) as exc: # pragma: no cover - exercised only without extra installed
|
|
30
|
+
# Only convert to the friendly "install the extra" hint when the
|
|
31
|
+
# missing module is the optional package itself (or a submodule of
|
|
32
|
+
# it). If the optional package is installed but one of its own
|
|
33
|
+
# imports failed, re-raise the original error so the real cause is
|
|
34
|
+
# not hidden.
|
|
35
|
+
missing = exc.name or ""
|
|
36
|
+
top_level = package.split(".", 1)[0]
|
|
37
|
+
if missing != top_level and not missing.startswith(top_level + "."):
|
|
38
|
+
raise
|
|
39
|
+
raise ImportError(
|
|
40
|
+
f"`{package}` is required for this functionality. Install the "
|
|
41
|
+
f"'{extra}' extra with `pip install perception[{extra}]`."
|
|
42
|
+
) from exc
|
|
@@ -3,12 +3,15 @@ import math
|
|
|
3
3
|
import os.path as op
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
|
-
import faiss
|
|
7
6
|
import numpy as np
|
|
8
7
|
import tqdm
|
|
9
8
|
import typing_extensions
|
|
10
9
|
|
|
11
|
-
from .
|
|
10
|
+
from perception._optional import import_optional
|
|
11
|
+
|
|
12
|
+
faiss = import_optional("faiss", extra="approximate-deduplication")
|
|
13
|
+
|
|
14
|
+
from ._graph_backend import get_graph_backend # noqa: E402 (must follow faiss check)
|
|
12
15
|
|
|
13
16
|
LOGGER = logging.getLogger(__name__)
|
|
14
17
|
DEFAULT_PCT_PROBE = 0
|
{perception-0.8.5 → perception-0.9.0}/perception/approximate_deduplication/_graph_backend.py
RENAMED
|
@@ -2,6 +2,8 @@ import sys
|
|
|
2
2
|
import typing
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
|
|
5
|
+
from perception._optional import import_optional
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class GraphBackend(ABC):
|
|
7
9
|
@abstractmethod
|
|
@@ -28,9 +30,7 @@ class GraphBackend(ABC):
|
|
|
28
30
|
|
|
29
31
|
class NetworkitGraphBackend(GraphBackend):
|
|
30
32
|
def __init__(self):
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
self.nk = nk
|
|
33
|
+
self.nk = import_optional("networkit", extra="approximate-deduplication")
|
|
34
34
|
|
|
35
35
|
def build_graph(
|
|
36
36
|
self, node_count: int, edges: typing.Iterable[tuple[int, int]]
|
|
@@ -83,9 +83,7 @@ class NetworkitGraphBackend(GraphBackend):
|
|
|
83
83
|
|
|
84
84
|
class NetworkxGraphBackend(GraphBackend):
|
|
85
85
|
def __init__(self):
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
self.nx = nx
|
|
86
|
+
self.nx = import_optional("networkx", extra="approximate-deduplication")
|
|
89
87
|
|
|
90
88
|
def build_graph(
|
|
91
89
|
self, node_count: int, edges: typing.Iterable[tuple[int, int]]
|
|
@@ -1,13 +1,20 @@
|
|
|
1
1
|
import time
|
|
2
2
|
import typing
|
|
3
3
|
import warnings
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
|
-
import faiss
|
|
6
6
|
import numpy as np
|
|
7
|
-
import pandas as pd
|
|
8
7
|
import typing_extensions
|
|
9
8
|
|
|
10
9
|
import perception.hashers.tools as pht
|
|
10
|
+
from perception._optional import import_optional
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
import pandas as pd
|
|
14
|
+
else:
|
|
15
|
+
pd = import_optional("pandas", extra="approximate-deduplication")
|
|
16
|
+
|
|
17
|
+
faiss = import_optional("faiss", extra="approximate-deduplication")
|
|
11
18
|
|
|
12
19
|
|
|
13
20
|
class QueryInput(typing_extensions.TypedDict):
|
|
@@ -4,14 +4,16 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import typing
|
|
6
6
|
|
|
7
|
-
import aiohttp.web
|
|
8
7
|
import numpy as np
|
|
9
|
-
from pythonjsonlogger import jsonlogger
|
|
10
8
|
|
|
11
9
|
import perception.hashers.tools as pht
|
|
10
|
+
from perception._optional import import_optional
|
|
12
11
|
|
|
13
12
|
from .index import ApproximateNearestNeighbors
|
|
14
13
|
|
|
14
|
+
aiohttp_web = import_optional("aiohttp.web", extra="matching")
|
|
15
|
+
jsonlogger = import_optional("pythonjsonlogger.jsonlogger", extra="matching")
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
def is_similarity_valid(data, index: ApproximateNearestNeighbors):
|
|
17
19
|
"""Validates input to the similarity endpoint."""
|
|
@@ -54,13 +56,13 @@ async def similarity(request):
|
|
|
54
56
|
try:
|
|
55
57
|
request_data = await request.json()
|
|
56
58
|
except json.JSONDecodeError:
|
|
57
|
-
return
|
|
59
|
+
return aiohttp_web.json_response({"reason": "Malformed JSON"}, status=400)
|
|
58
60
|
|
|
59
61
|
index = request.app["index"]
|
|
60
62
|
try:
|
|
61
63
|
assert is_similarity_valid(request_data, index)
|
|
62
64
|
except Exception:
|
|
63
|
-
return
|
|
65
|
+
return aiohttp_web.json_response({"reason": "Invalid JSON request"}, status=400)
|
|
64
66
|
|
|
65
67
|
async with request.app["query_semaphore"]:
|
|
66
68
|
matches = await asyncio.get_event_loop().run_in_executor(
|
|
@@ -78,7 +80,7 @@ async def similarity(request):
|
|
|
78
80
|
)
|
|
79
81
|
matches = json.loads(json.dumps({"queries": matches}))
|
|
80
82
|
|
|
81
|
-
return
|
|
83
|
+
return aiohttp_web.json_response(matches)
|
|
82
84
|
|
|
83
85
|
|
|
84
86
|
def get_logger(name, log_level):
|
|
@@ -133,7 +135,7 @@ async def serve(
|
|
|
133
135
|
"""
|
|
134
136
|
logger = get_logger(name="serve", log_level=log_level)
|
|
135
137
|
logger.info("Initializing web service")
|
|
136
|
-
app =
|
|
138
|
+
app = aiohttp_web.Application()
|
|
137
139
|
app.router.add_post("/v1/similarity", similarity, name="similarity")
|
|
138
140
|
|
|
139
141
|
# Store globals in the application object
|
|
@@ -144,8 +146,8 @@ async def serve(
|
|
|
144
146
|
app["index"] = index
|
|
145
147
|
app["query_semaphore"] = asyncio.Semaphore(concurrency)
|
|
146
148
|
logger.info("Entering web service listener loop.")
|
|
147
|
-
runner =
|
|
149
|
+
runner = aiohttp_web.AppRunner(app, logger=logger)
|
|
148
150
|
await runner.setup()
|
|
149
|
-
site =
|
|
151
|
+
site = aiohttp_web.TCPSite(runner, host, port)
|
|
150
152
|
await site.start()
|
|
151
153
|
return site
|
|
@@ -7,15 +7,22 @@ import uuid
|
|
|
7
7
|
import warnings
|
|
8
8
|
import zipfile
|
|
9
9
|
from abc import ABC
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
10
11
|
|
|
11
|
-
import matplotlib.pyplot as plt
|
|
12
12
|
import numpy as np
|
|
13
|
-
import pandas as pd
|
|
14
13
|
import tqdm
|
|
15
14
|
from scipy import spatial, stats
|
|
16
15
|
|
|
16
|
+
from .._optional import import_optional
|
|
17
17
|
from ..hashers.tools import compute_md5, string_to_vector
|
|
18
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
import matplotlib.pyplot as plt
|
|
21
|
+
import pandas as pd
|
|
22
|
+
else:
|
|
23
|
+
plt = import_optional("matplotlib.pyplot", extra="benchmarking")
|
|
24
|
+
pd = import_optional("pandas", extra="benchmarking")
|
|
25
|
+
|
|
19
26
|
try:
|
|
20
27
|
from . import extensions # type: ignore
|
|
21
28
|
except ImportError:
|