gxhash 0.1.2__cp313-cp313-manylinux_2_34_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gxhash might be problematic. Click here for more details.
- gxhash/__init__.py +5 -0
- gxhash/__init__.pyi +194 -0
- gxhash/gxhash.cpython-313-x86_64-linux-gnu.so +0 -0
- gxhash/py.typed +0 -0
- gxhash-0.1.2.dist-info/METADATA +72 -0
- gxhash-0.1.2.dist-info/RECORD +7 -0
- gxhash-0.1.2.dist-info/WHEEL +4 -0
gxhash/__init__.py
ADDED
gxhash/__init__.pyi
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from typing import Protocol
|
|
2
|
+
|
|
3
|
+
class File(Protocol):
|
|
4
|
+
def fileno(self) -> int:
|
|
5
|
+
"""
|
|
6
|
+
Summary
|
|
7
|
+
-------
|
|
8
|
+
Returns the file descriptor of the file.
|
|
9
|
+
Some file-like objects like `io.BytesIO` have an unimplemented `fileno` method.
|
|
10
|
+
If you are uncertain whether the file has a valid `fileno` method,
|
|
11
|
+
you should write to a `tempfile.TemporaryFile` and pass that to the hasher.
|
|
12
|
+
|
|
13
|
+
Returns
|
|
14
|
+
-------
|
|
15
|
+
file_descriptor (`int`)
|
|
16
|
+
the file descriptor of the file
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
class Hasher(Protocol):
|
|
20
|
+
def __init__(self, *, seed: int) -> None:
|
|
21
|
+
"""
|
|
22
|
+
Summary
|
|
23
|
+
-------
|
|
24
|
+
Initialise `Hasher` with a `seed`.
|
|
25
|
+
The `seed` should not be exposed as it is used to deterministically generate the hash.
|
|
26
|
+
An exposed `seed` would put your service at a higher risk of a DoS attack.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
seed (`int`)
|
|
31
|
+
a seed for the hasher
|
|
32
|
+
|
|
33
|
+
Example
|
|
34
|
+
-------
|
|
35
|
+
```python
|
|
36
|
+
hasher = GxHash128(seed=1234)
|
|
37
|
+
```
|
|
38
|
+
"""
|
|
39
|
+
def hash(self, bytes: bytes) -> int:
|
|
40
|
+
"""
|
|
41
|
+
Summary
|
|
42
|
+
-------
|
|
43
|
+
Hashes `bytes` to an `int`.
|
|
44
|
+
If your input is in `bytes`, this is the most performant variant of the hasher.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
bytes (`bytes`)
|
|
49
|
+
input bytes
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
hash (`int`)
|
|
54
|
+
the hash of the input bytes
|
|
55
|
+
|
|
56
|
+
Example
|
|
57
|
+
-------
|
|
58
|
+
```python
|
|
59
|
+
hasher = GxHash128(1234)
|
|
60
|
+
print(f"Hash is {hasher.hash(bytes([42] * 1000))}!")
|
|
61
|
+
```
|
|
62
|
+
"""
|
|
63
|
+
def hash_nogil(self, bytes: bytes) -> int:
|
|
64
|
+
"""
|
|
65
|
+
Summary
|
|
66
|
+
-------
|
|
67
|
+
Hashes `bytes` to an `int` without the GIL.
|
|
68
|
+
This method allows you to perform multiple hashes with true multi-threaded parallelism.
|
|
69
|
+
If called sequentially, this method is slightly less performant than the default `hash` method.
|
|
70
|
+
Otherwise, this variant offers the best raw multi-threaded performance.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
bytes (`bytes`)
|
|
75
|
+
input bytes
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
hash (`int`)
|
|
80
|
+
the hash of the input bytes
|
|
81
|
+
|
|
82
|
+
Example
|
|
83
|
+
-------
|
|
84
|
+
```python
|
|
85
|
+
hasher = GxHash128(seed=1234)
|
|
86
|
+
input_bytes = bytes([42] * 1000)
|
|
87
|
+
thread_pool = ThreadPoolExecutor()
|
|
88
|
+
future = thread_pool.submit(hasher.hash_nogil, input_bytes)
|
|
89
|
+
hash_result = await wrap_future(future)
|
|
90
|
+
print(f"Hash is {hash_result}!")
|
|
91
|
+
```
|
|
92
|
+
"""
|
|
93
|
+
def hash_file(self, file: File) -> int:
|
|
94
|
+
"""
|
|
95
|
+
Summary
|
|
96
|
+
-------
|
|
97
|
+
Hashes a `File` to an `int`.
|
|
98
|
+
This method duplicates the file descriptor and memory maps the file entirely in Rust.
|
|
99
|
+
This operation is many times faster than reading the file in Python and passing the bytes to the hasher.
|
|
100
|
+
If your input is already in `bytes`, this method may be slightly less performant than `hash` and `hash_nogil`.
|
|
101
|
+
If the `bytes` is really large, writing to a `TemporaryFile` and passing it to this method may be more
|
|
102
|
+
performant than passing the `bytes` directly to `hash` or `hash_nogil`.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
file (`File`)
|
|
107
|
+
a file object
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
hash (`int`)
|
|
112
|
+
the hash of the input file
|
|
113
|
+
|
|
114
|
+
Example
|
|
115
|
+
-------
|
|
116
|
+
Converting `bytes` to a `TemporaryFile` and hashing.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
hasher = GxHash128(seed=1234)
|
|
120
|
+
file = TemporaryFile()
|
|
121
|
+
file.write(bytes([42] * 1000))
|
|
122
|
+
file.seek(0)
|
|
123
|
+
print(f"Hash is {hasher.hash_file(file)}!")
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Hashing a file directly.
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
file = open('really_large_file.img', 'rb')
|
|
130
|
+
hasher = GxHash128(seed=1234)
|
|
131
|
+
print(f"Hash is {hasher.hash_file(file)}!")
|
|
132
|
+
```
|
|
133
|
+
"""
|
|
134
|
+
async def hash_file_async(self, file: File) -> int:
|
|
135
|
+
"""
|
|
136
|
+
Summary
|
|
137
|
+
-------
|
|
138
|
+
Asynchronous variant of `hash_file`.
|
|
139
|
+
This method allows you to perform multiple hashes with true multi-threaded parallelism.
|
|
140
|
+
If called sequentially, this method is slightly less performant than `hash_file`.
|
|
141
|
+
It is only ever faster than a multi-threaded `hash_nogil` when the input is a `File`,
|
|
142
|
+
and that is due to the performance overhead of reading a `File` in Python.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
file (`File`)
|
|
147
|
+
a file object
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
hash (`int`)
|
|
152
|
+
the hash of the input file
|
|
153
|
+
|
|
154
|
+
Example
|
|
155
|
+
-------
|
|
156
|
+
Converting `bytes` to a `TemporaryFile` and hashing.
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
hasher = GxHash128(seed=1234)
|
|
160
|
+
file = TemporaryFile()
|
|
161
|
+
file.write(bytes([42] * 1000))
|
|
162
|
+
file.seek(0)
|
|
163
|
+
print(f"Hash is {await hasher.hash_file_asymc(file)}!")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Hashing a file directly.
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
file = open('really_large_file.img', 'rb')
|
|
170
|
+
hasher = GxHash128(seed=1234)
|
|
171
|
+
print(f"Hash is {await hasher.hash_file_async(file)}!")
|
|
172
|
+
```
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
class GxHash32(Hasher):
|
|
176
|
+
"""
|
|
177
|
+
Summary
|
|
178
|
+
-------
|
|
179
|
+
This class exposes GxHash's 32-bit hash methods.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
class GxHash64(Hasher):
|
|
183
|
+
"""
|
|
184
|
+
Summary
|
|
185
|
+
-------
|
|
186
|
+
This class exposes GxHash's 64-bit hash methods.
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
class GxHash128(Hasher):
|
|
190
|
+
"""
|
|
191
|
+
Summary
|
|
192
|
+
-------
|
|
193
|
+
This class exposes GxHash's 128-bit hash methods.
|
|
194
|
+
"""
|
|
Binary file
|
gxhash/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gxhash
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Classifier: Programming Language :: Rust
|
|
5
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
6
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
7
|
+
Requires-Python: >=3.7
|
|
8
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
9
|
+
|
|
10
|
+
# gxhash-py
|
|
11
|
+
|
|
12
|
+
Python bindings for [GxHash](https://github.com/ogxd/gxhash), a blazingly fast and robust non-cryptographic hashing algorithm.
|
|
13
|
+
|
|
14
|
+
## Features
|
|
15
|
+
|
|
16
|
+
- **Blazingly Fast**: Minimal-overhead binding to leverage the full speed of GxHash.
|
|
17
|
+
- **Zero Python**: Pure Rust backend with zero additional Python runtime overhead.
|
|
18
|
+
- **Fine-Grained Control**: Build true multi-threaded or async hashing pipelines with GIL-free APIs.
|
|
19
|
+
- **Faster File Hashing**: Hash files using memory-mapped I/O via Rust — 3x faster than Python's sequential I/O.
|
|
20
|
+
- **Async-Ready**: Tokio-powered async hashing for fast and efficient concurrency.
|
|
21
|
+
- **Fully Typesafe**: Predictable, clean API with complete type safety.
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install gxhash
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
Hashing bytes.
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from gxhash import GxHash32
|
|
35
|
+
|
|
36
|
+
def main():
|
|
37
|
+
gxhash = GxHash32(seed=0)
|
|
38
|
+
result = gxhash.hash(b"Hello, world!")
|
|
39
|
+
|
|
40
|
+
if __name__ == "__main__":
|
|
41
|
+
main()
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Hashing a file.
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from gxhash import GxHash64
|
|
48
|
+
|
|
49
|
+
def main():
|
|
50
|
+
gxhash = GxHash64(seed=0)
|
|
51
|
+
file = open("path/to/file.dmg", "rb")
|
|
52
|
+
result = gxhash.hash_file(file)
|
|
53
|
+
|
|
54
|
+
if __name__ == "__main__":
|
|
55
|
+
main()
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Hashing a file asynchronously.
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from asyncio import run
|
|
62
|
+
from gxhash import GxHash128
|
|
63
|
+
|
|
64
|
+
async def main():
|
|
65
|
+
gxhash = GxHash128(seed=0)
|
|
66
|
+
file = open("path/to/file.dmg", "rb")
|
|
67
|
+
result = await gxhash.hash_file_async(file)
|
|
68
|
+
|
|
69
|
+
if __name__ == "__main__":
|
|
70
|
+
run(main())
|
|
71
|
+
```
|
|
72
|
+
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
gxhash-0.1.2.dist-info/METADATA,sha256=4wwfZMVnQKJfZF7tDir7DMyAVDhV2fgKf3uPOi2sxoY,1765
|
|
2
|
+
gxhash-0.1.2.dist-info/WHEEL,sha256=0T3ERfOVy2_NA0spmJ9W283dd80DxGxyjANqE-4dteM,108
|
|
3
|
+
gxhash/__init__.py,sha256=zLP3Jdvo2-JFQKZIocwkNCWkjYJVXauBSG3JSAW8rA0,107
|
|
4
|
+
gxhash/__init__.pyi,sha256=9HtHNSOemmwH6A1Av5GI2HsAiIf-s7JKIpC6uPzxzqM,5539
|
|
5
|
+
gxhash/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
gxhash/gxhash.cpython-313-x86_64-linux-gnu.so,sha256=7z0NvbmCFuoc4nvn6xYYKXOH-WbTxqoTq1c5y3Y0EsM,642424
|
|
7
|
+
gxhash-0.1.2.dist-info/RECORD,,
|