graphembed-rs 0.1.0__cp313-cp313-macosx_11_0_arm64.whl → 0.1.2__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphembed_rs/__init__.py +7 -0
- graphembed_rs/graphembed_rs.cpython-313-darwin.so +0 -0
- graphembed_rs/load_utils.py +71 -0
- {graphembed_rs-0.1.0.dist-info → graphembed_rs-0.1.2.dist-info}/METADATA +54 -7
- graphembed_rs-0.1.2.dist-info/RECORD +7 -0
- graphembed_rs-0.1.2.dist-info/licenses/LICENSE-MIT +25 -0
- graphembed/__init__.py +0 -5
- graphembed/__init__.pyi +0 -79
- graphembed/graphembed.cpython-313-darwin.so +0 -0
- graphembed/py.typed +0 -0
- graphembed_rs-0.1.0.dist-info/RECORD +0 -7
- {graphembed_rs-0.1.0.dist-info → graphembed_rs-0.1.2.dist-info}/WHEEL +0 -0
Binary file
|
@@ -0,0 +1,71 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from pathlib import Path
|
3
|
+
### install pymongo with pip install pymongo
|
4
|
+
# -- pymongo is a BSON decoder, not a BSON encoder --
|
5
|
+
from bson import decode_file_iter
|
6
|
+
|
7
|
+
|
8
|
+
def _dtype_from_type_name(tname: str):
|
9
|
+
"""Map the Rust type-name string to a NumPy dtype."""
|
10
|
+
if tname == "f32":
|
11
|
+
return np.float32
|
12
|
+
if tname == "f64":
|
13
|
+
return np.float64
|
14
|
+
if tname == "usize": # stored as i64 in BSON
|
15
|
+
return np.int64
|
16
|
+
raise ValueError(f"Unknown type_name {tname!r} in BSON header")
|
17
|
+
|
18
|
+
|
19
|
+
def load_embedding_bson(path: str | Path, *, want_in: bool = False):
|
20
|
+
"""
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
path : str | pathlib.Path
|
24
|
+
File written by graphembed::io::bson_dump(...)
|
25
|
+
want_in : bool, default False
|
26
|
+
• False … always return the OUT/source embedding (shape = (n, d))
|
27
|
+
• True … additionally return the IN/target embedding when
|
28
|
+
the dump is *asymmetric* (tuple(out, in_)). For symmetric
|
29
|
+
dumps the second item is None.
|
30
|
+
|
31
|
+
Returns
|
32
|
+
-------
|
33
|
+
np.ndarray (sym. dump or want_in=False)
|
34
|
+
OR (out_emb, in_emb) (want_in=True)
|
35
|
+
"""
|
36
|
+
path = Path(path)
|
37
|
+
with path.open("rb") as fh:
|
38
|
+
docs = decode_file_iter(fh)
|
39
|
+
|
40
|
+
# -- header ----------------------------------------------------
|
41
|
+
header = next(docs)["header"]
|
42
|
+
n = int(header["nbdata"])
|
43
|
+
d = int(header["dimension"])
|
44
|
+
sym = bool(header["symetric"])
|
45
|
+
dtype = _dtype_from_type_name(header["type_name"])
|
46
|
+
out_emb = np.empty((n, d), dtype=dtype)
|
47
|
+
in_emb = None if sym or not want_in else np.empty((n, d), dtype=dtype)
|
48
|
+
|
49
|
+
# --OUT part --------------------------------------------------
|
50
|
+
for _ in range(n):
|
51
|
+
doc = next(docs)
|
52
|
+
key, vec = next(iter(doc.items())) # only 1 (key,val)
|
53
|
+
idx, tag = map(int, key.split(","))
|
54
|
+
assert tag == 0, f"expected tag 0, got {tag}"
|
55
|
+
out_emb[idx] = np.asarray(vec, dtype=dtype)
|
56
|
+
|
57
|
+
# -- IN part (if any) -----------------------------------------
|
58
|
+
if not sym:
|
59
|
+
for _ in range(n):
|
60
|
+
doc = next(docs)
|
61
|
+
key, vec = next(iter(doc.items()))
|
62
|
+
idx, tag = map(int, key.split(","))
|
63
|
+
assert tag == 1, f"expected tag 1, got {tag}"
|
64
|
+
if in_emb is not None: # want_in == True
|
65
|
+
in_emb[idx] = np.asarray(vec, dtype=dtype)
|
66
|
+
# else: silently drop it
|
67
|
+
|
68
|
+
# -- optional indexation doc – skip for now --------------------
|
69
|
+
# (decode_file_iter stops automatically at EOF)
|
70
|
+
|
71
|
+
return (out_emb, in_emb) if want_in else out_emb
|
@@ -1,14 +1,17 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: graphembed_rs
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.2
|
4
|
+
Requires-Dist: numpy>=2.2.5
|
5
|
+
Requires-Dist: pymongo>=4.12.1
|
6
|
+
License-File: LICENSE-MIT
|
4
7
|
Summary: Python bindings for the high‑performance Rust graph/network embedding library graphembed
|
5
8
|
Keywords: graph,embedding,hash
|
6
9
|
Author: Jianshu Zhao
|
7
|
-
Author-email: jeanpierre.both@gmail.com
|
10
|
+
Author-email: jeanpierre.both@gmail.com
|
8
11
|
License: MIT OR Apache-2.0
|
9
12
|
Requires-Python: >=3.8
|
10
13
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
11
|
-
Project-URL: Source Code, https://
|
14
|
+
Project-URL: Source Code, https://github.com/jean-pierreBoth/graphembed
|
12
15
|
|
13
16
|
[](http://bioconda.github.io/recipes/graphembed/README.html)
|
14
17
|

|
@@ -17,10 +20,13 @@ Project-URL: Source Code, https://gitlab.com/Jianshu_Zhao/graphembed
|
|
17
20
|

|
18
21
|
[](https://anaconda.org/bioconda/graphembed)
|
19
22
|
|
23
|
+
<div align="center">
|
24
|
+
<img width="35%" src ="GraphEmbed_log.jpg">
|
25
|
+
</div>
|
20
26
|
|
21
27
|
# GraphEmbed: Efficient and Robust Network Embedding via High-Order Proximity Preservation or Recursive Sketching
|
22
28
|
|
23
|
-
This crate provides an executable and a library for embedding of directed or undirected graphs with positively weighted edges. We engineered and optimized current network embedding algorithms for large-scale network embedding, especially biological network. This crate was developed by [Jianshu Zhao](https://gitlab.com/Jianshu_Zhao) and Jean-Pierre Both [jpboth](https://gitlab.com/jpboth).
|
29
|
+
This crate provides an executable and a library for embedding of directed or undirected graphs with positively weighted edges. We engineered and optimized current network embedding algorithms for large-scale network embedding, especially biological network. This crate was developed by [Jianshu Zhao](https://gitlab.com/Jianshu_Zhao) and Jean-Pierre Both [jpboth](https://gitlab.com/jpboth). We have a copy here in [Github](https://github.com/jianshu93/graphembed)
|
24
30
|
|
25
31
|
|
26
32
|
- For simple graphs, without data attached to nodes, there are 2 modules **nodesketch** and **atp**. A simple executable with a validation option based on link prediction is also provided.
|
@@ -29,13 +35,13 @@ This crate provides an executable and a library for embedding of directed or und
|
|
29
35
|
|
30
36
|
### Pre-built binaries on Linux
|
31
37
|
```bash
|
32
|
-
wget https://gitlab.com/-/project/64961144/uploads/
|
33
|
-
unzip graphembed_Linux_x86-64_v0.1.
|
38
|
+
wget https://gitlab.com/-/project/64961144/uploads/9d7d0b038140cb67c584f01cd6dafac9/graphembed_Linux_x86-64_v0.1.6.zip
|
39
|
+
unzip graphembed_Linux_x86-64_v0.1.6.zip
|
34
40
|
chmod a+x ./graphembed
|
35
41
|
./graphembed -h
|
36
42
|
```
|
37
43
|
|
38
|
-
### Bioconda on Linux
|
44
|
+
### Bioconda on Linux/MacOS
|
39
45
|
```bash
|
40
46
|
conda install -c conda-forge -c bioconda graphembed
|
41
47
|
```
|
@@ -45,7 +51,48 @@ conda install -c conda-forge -c bioconda graphembed
|
|
45
51
|
brew tap jianshu93/graphembed
|
46
52
|
brew update
|
47
53
|
brew install graphembed
|
54
|
+
```
|
55
|
+
|
56
|
+
|
57
|
+
### In Python (Please install python>=3.9 first)
|
58
|
+
```bash
|
59
|
+
pip install graphembed_rs
|
60
|
+
|
61
|
+
### or you can build from source (Linux) after installing maturin
|
62
|
+
git clone https://gitlab.com/Jianshu_Zhao/graphembed
|
63
|
+
cd graphembed
|
64
|
+
pip install maturin
|
65
|
+
### note: for macOS, you need to change the line "features = ["pyo3/extension-module", "intel-mkl-static", "simdeez_f"]" in pyporject.toml to "features = ["pyo3/extension-module","openblas-system","stdsimd"]", you also need to Install OpenBLAS and add to system library path via Homebrew
|
66
|
+
maturin develop --release
|
67
|
+
|
68
|
+
#### Prepare some data
|
69
|
+
wget https://gitlab.com/-/project/64961144/uploads/4e341383d62d86d1dd66e668e91b2c07/BlogCatalog.txt
|
70
|
+
```
|
48
71
|
|
72
|
+
```python
|
73
|
+
import os
|
74
|
+
os.environ["RUST_LOG"] = "info"
|
75
|
+
import graphembed_rs.graphembed_rs as ge
|
76
|
+
import graphembed_rs.load_utils as ge_utils
|
77
|
+
help(ge)
|
78
|
+
help(ge_utils)
|
79
|
+
### HOPE
|
80
|
+
ge.embed_hope_rank("BlogCatalog.txt", target_rank=128, nbiter=4,output="embedding_output")
|
81
|
+
out_vectors=ge_utils.load_embedding_bson("embedding_output.bson")
|
82
|
+
print("OUT embedding shape :", out_vectors.shape)
|
83
|
+
print("first OUT vector :", out_vectors[0])
|
84
|
+
|
85
|
+
### Sketching
|
86
|
+
### sketching only
|
87
|
+
ge.embed_sketching("BlogCatalog.txt", decay=0.3, dim=128, nbiter=5, symetric=True, output="embedding_output")
|
88
|
+
out_vectors=ge_utils.load_embedding_bson("embedding_output.bson")
|
89
|
+
print("OUT embedding shape :", out_vectors.shape)
|
90
|
+
print("first OUT vector :", out_vectors[0])
|
91
|
+
|
92
|
+
|
93
|
+
### validate accuracy
|
94
|
+
auc_scores = ge.validate_sketching("BlogCatalog.txt",decay=0.3, dim=128, nbiter=3, nbpass=1, skip_frac=0.2,symetric=True, centric=True)
|
95
|
+
print("Standard AUC per pass:", auc_scores)
|
49
96
|
```
|
50
97
|
|
51
98
|
## Methods
|
@@ -0,0 +1,7 @@
|
|
1
|
+
graphembed_rs-0.1.2.dist-info/METADATA,sha256=WEvYJP6qVNPFlFxYzzq-n3XX6L__a2ukxgYkVt_WnYo,11532
|
2
|
+
graphembed_rs-0.1.2.dist-info/WHEEL,sha256=_czbP61TsBkf9T201RekHMHlqESnWn7yJwXBJC9P-w0,104
|
3
|
+
graphembed_rs-0.1.2.dist-info/licenses/LICENSE-MIT,sha256=ndZ12D28O4UkfOeoa6HP9E7IKyYG4iH79iQ6WiLs9bc,1077
|
4
|
+
graphembed_rs/load_utils.py,sha256=fXd4-5OxdymSkb4kx39XeW_P3nEDdAuQ7Oe1D6PKDWE,2719
|
5
|
+
graphembed_rs/__init__.py,sha256=s8WorNcMnn15CfjctNaLnXGnwQ9RlPyCR0WJQxepTTc,179
|
6
|
+
graphembed_rs/graphembed_rs.cpython-313-darwin.so,sha256=9fLyn8FN0gZOCRj2Zv8_Wx8lN81dkjFRHcRowLNlv7o,5263216
|
7
|
+
graphembed_rs-0.1.2.dist-info/RECORD,,
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Copyright (c) 2022 jean-pierre.both and Jianshu Zhao
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any
|
4
|
+
person obtaining a copy of this software and associated
|
5
|
+
documentation files (the "Software"), to deal in the
|
6
|
+
Software without restriction, including without
|
7
|
+
limitation the rights to use, copy, modify, merge,
|
8
|
+
publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software
|
10
|
+
is furnished to do so, subject to the following
|
11
|
+
conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice
|
14
|
+
shall be included in all copies or substantial portions
|
15
|
+
of the Software.
|
16
|
+
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
18
|
+
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
19
|
+
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
20
|
+
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
21
|
+
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
22
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
23
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
24
|
+
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
25
|
+
DEALINGS IN THE SOFTWARE.
|
graphembed/__init__.py
DELETED
graphembed/__init__.pyi
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
# ---------- Embedding ----------
|
4
|
-
def embed_hope_rank(
|
5
|
-
csv: str,
|
6
|
-
symetric: bool,
|
7
|
-
target_rank: int,
|
8
|
-
nbiter: int,
|
9
|
-
output: Optional[str] = None,
|
10
|
-
) -> None: ...
|
11
|
-
def embed_hope_precision(
|
12
|
-
csv: str,
|
13
|
-
symetric: bool,
|
14
|
-
epsil: float,
|
15
|
-
maxrank: int,
|
16
|
-
blockiter: int,
|
17
|
-
output: Optional[str] = None,
|
18
|
-
) -> None: ...
|
19
|
-
def embed_sketching(
|
20
|
-
csv: str,
|
21
|
-
symetric: bool,
|
22
|
-
decay: float,
|
23
|
-
dim: int,
|
24
|
-
nbiter: int,
|
25
|
-
output: Optional[str] = None,
|
26
|
-
) -> None: ...
|
27
|
-
|
28
|
-
# ---------- Validation (returns mean AUC) ----------
|
29
|
-
def validate_hope_rank(
|
30
|
-
csv: str,
|
31
|
-
symetric: bool,
|
32
|
-
target_rank: int,
|
33
|
-
nbiter: int,
|
34
|
-
nbpass: int = 10,
|
35
|
-
skip_frac: float = 0.1,
|
36
|
-
centric: bool = False,
|
37
|
-
) -> float: ...
|
38
|
-
def validate_hope_precision(
|
39
|
-
csv: str,
|
40
|
-
symetric: bool,
|
41
|
-
epsil: float,
|
42
|
-
maxrank: int,
|
43
|
-
blockiter: int,
|
44
|
-
nbpass: int = 10,
|
45
|
-
skip_frac: float = 0.1,
|
46
|
-
centric: bool = False,
|
47
|
-
) -> float: ...
|
48
|
-
def validate_sketching(
|
49
|
-
csv: str,
|
50
|
-
symetric: bool,
|
51
|
-
decay: float,
|
52
|
-
dim: int,
|
53
|
-
nbiter: int,
|
54
|
-
nbpass: int = 10,
|
55
|
-
skip_frac: float = 0.1,
|
56
|
-
centric: bool = False,
|
57
|
-
) -> float: ...
|
58
|
-
|
59
|
-
# ---------- VCMPR (precision/recall curves) ----------
|
60
|
-
def estimate_vcmpr_hope_rank(
|
61
|
-
csv: str,
|
62
|
-
symetric: bool,
|
63
|
-
target_rank: int,
|
64
|
-
nbiter: int,
|
65
|
-
nbpass: int = 2,
|
66
|
-
topk: int = 10,
|
67
|
-
skip_frac: float = 0.1,
|
68
|
-
) -> None: ...
|
69
|
-
def estimate_vcmpr_sketching(
|
70
|
-
csv: str,
|
71
|
-
symetric: bool,
|
72
|
-
decay: float,
|
73
|
-
dim: int,
|
74
|
-
nbiter: int,
|
75
|
-
nbpass: int = 2,
|
76
|
-
topk: int = 10,
|
77
|
-
skip_frac: float = 0.1,
|
78
|
-
) -> None: ...
|
79
|
-
|
Binary file
|
graphembed/py.typed
DELETED
File without changes
|
@@ -1,7 +0,0 @@
|
|
1
|
-
graphembed_rs-0.1.0.dist-info/METADATA,sha256=dxYkTamZDsIFcEqterZ0LEeXQDItlSW2aZjmL6u1vuI,9694
|
2
|
-
graphembed_rs-0.1.0.dist-info/WHEEL,sha256=_czbP61TsBkf9T201RekHMHlqESnWn7yJwXBJC9P-w0,104
|
3
|
-
graphembed/__init__.py,sha256=RCcLraveWf-myTsDQGePMYq-scNNfz-3Mv1baSbgAmM,123
|
4
|
-
graphembed/__init__.pyi,sha256=3_KBFG4g9akylo32CHlm9bZStcLwxIY2X4si21ilD3w,1626
|
5
|
-
graphembed/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
graphembed/graphembed.cpython-313-darwin.so,sha256=6MnpRaFtxd9VQ4aK_yHbA-C9eqiVELPJGAIs6KJwt8I,5126848
|
7
|
-
graphembed_rs-0.1.0.dist-info/RECORD,,
|
File without changes
|