graphembed-rs 0.1.1__cp39-cp39-macosx_11_0_arm64.whl → 0.1.2__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphembed_rs/__init__.py +6 -4
- graphembed_rs/graphembed_rs.cpython-39-darwin.so +0 -0
- graphembed_rs/load_utils.py +71 -0
- {graphembed_rs-0.1.1.dist-info → graphembed_rs-0.1.2.dist-info}/METADATA +24 -9
- graphembed_rs-0.1.2.dist-info/RECORD +7 -0
- graphembed_rs/__init__.pyi +0 -79
- graphembed_rs/py.typed +0 -0
- graphembed_rs-0.1.1.dist-info/RECORD +0 -8
- {graphembed_rs-0.1.1.dist-info → graphembed_rs-0.1.2.dist-info}/WHEEL +0 -0
- {graphembed_rs-0.1.1.dist-info → graphembed_rs-0.1.2.dist-info}/licenses/LICENSE-MIT +0 -0
graphembed_rs/__init__.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
from
|
1
|
+
from importlib import import_module as _imp
|
2
|
+
from graphembed_rs import *
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
4
|
+
# add the helper into the same namespace
|
5
|
+
from .load_utils import *
|
6
|
+
|
7
|
+
del _imp
|
Binary file
|
@@ -0,0 +1,71 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from pathlib import Path
|
3
|
+
### install pymongo with pip install pymongo
|
4
|
+
# -- pymongo is a BSON decoder, not a BSON encoder --
|
5
|
+
from bson import decode_file_iter
|
6
|
+
|
7
|
+
|
8
|
+
def _dtype_from_type_name(tname: str):
|
9
|
+
"""Map the Rust type-name string to a NumPy dtype."""
|
10
|
+
if tname == "f32":
|
11
|
+
return np.float32
|
12
|
+
if tname == "f64":
|
13
|
+
return np.float64
|
14
|
+
if tname == "usize": # stored as i64 in BSON
|
15
|
+
return np.int64
|
16
|
+
raise ValueError(f"Unknown type_name {tname!r} in BSON header")
|
17
|
+
|
18
|
+
|
19
|
+
def load_embedding_bson(path: str | Path, *, want_in: bool = False):
|
20
|
+
"""
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
path : str | pathlib.Path
|
24
|
+
File written by graphembed::io::bson_dump(...)
|
25
|
+
want_in : bool, default False
|
26
|
+
• False … always return the OUT/source embedding (shape = (n, d))
|
27
|
+
• True … additionally return the IN/target embedding when
|
28
|
+
the dump is *asymmetric* (tuple(out, in_)). For symmetric
|
29
|
+
dumps the second item is None.
|
30
|
+
|
31
|
+
Returns
|
32
|
+
-------
|
33
|
+
np.ndarray (sym. dump or want_in=False)
|
34
|
+
OR (out_emb, in_emb) (want_in=True)
|
35
|
+
"""
|
36
|
+
path = Path(path)
|
37
|
+
with path.open("rb") as fh:
|
38
|
+
docs = decode_file_iter(fh)
|
39
|
+
|
40
|
+
# -- header ----------------------------------------------------
|
41
|
+
header = next(docs)["header"]
|
42
|
+
n = int(header["nbdata"])
|
43
|
+
d = int(header["dimension"])
|
44
|
+
sym = bool(header["symetric"])
|
45
|
+
dtype = _dtype_from_type_name(header["type_name"])
|
46
|
+
out_emb = np.empty((n, d), dtype=dtype)
|
47
|
+
in_emb = None if sym or not want_in else np.empty((n, d), dtype=dtype)
|
48
|
+
|
49
|
+
# --OUT part --------------------------------------------------
|
50
|
+
for _ in range(n):
|
51
|
+
doc = next(docs)
|
52
|
+
key, vec = next(iter(doc.items())) # only 1 (key,val)
|
53
|
+
idx, tag = map(int, key.split(","))
|
54
|
+
assert tag == 0, f"expected tag 0, got {tag}"
|
55
|
+
out_emb[idx] = np.asarray(vec, dtype=dtype)
|
56
|
+
|
57
|
+
# -- IN part (if any) -----------------------------------------
|
58
|
+
if not sym:
|
59
|
+
for _ in range(n):
|
60
|
+
doc = next(docs)
|
61
|
+
key, vec = next(iter(doc.items()))
|
62
|
+
idx, tag = map(int, key.split(","))
|
63
|
+
assert tag == 1, f"expected tag 1, got {tag}"
|
64
|
+
if in_emb is not None: # want_in == True
|
65
|
+
in_emb[idx] = np.asarray(vec, dtype=dtype)
|
66
|
+
# else: silently drop it
|
67
|
+
|
68
|
+
# -- optional indexation doc – skip for now --------------------
|
69
|
+
# (decode_file_iter stops automatically at EOF)
|
70
|
+
|
71
|
+
return (out_emb, in_emb) if want_in else out_emb
|
@@ -1,6 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: graphembed_rs
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.2
|
4
|
+
Requires-Dist: numpy>=2.2.5
|
5
|
+
Requires-Dist: pymongo>=4.12.1
|
4
6
|
License-File: LICENSE-MIT
|
5
7
|
Summary: Python bindings for the high‑performance Rust graph/network embedding library graphembed
|
6
8
|
Keywords: graph,embedding,hash
|
@@ -18,6 +20,9 @@ Project-URL: Source Code, https://github.com/jean-pierreBoth/graphembed
|
|
18
20
|

|
19
21
|
[](https://anaconda.org/bioconda/graphembed)
|
20
22
|
|
23
|
+
<div align="center">
|
24
|
+
<img width="35%" src ="GraphEmbed_log.jpg">
|
25
|
+
</div>
|
21
26
|
|
22
27
|
# GraphEmbed: Efficient and Robust Network Embedding via High-Order Proximity Preservation or Recursive Sketching
|
23
28
|
|
@@ -30,13 +35,13 @@ This crate provides an executable and a library for embedding of directed or und
|
|
30
35
|
|
31
36
|
### Pre-built binaries on Linux
|
32
37
|
```bash
|
33
|
-
wget https://gitlab.com/-/project/64961144/uploads/
|
34
|
-
unzip graphembed_Linux_x86-64_v0.1.
|
38
|
+
wget https://gitlab.com/-/project/64961144/uploads/9d7d0b038140cb67c584f01cd6dafac9/graphembed_Linux_x86-64_v0.1.6.zip
|
39
|
+
unzip graphembed_Linux_x86-64_v0.1.6.zip
|
35
40
|
chmod a+x ./graphembed
|
36
41
|
./graphembed -h
|
37
42
|
```
|
38
43
|
|
39
|
-
### Bioconda on Linux
|
44
|
+
### Bioconda on Linux/MacOS
|
40
45
|
```bash
|
41
46
|
conda install -c conda-forge -c bioconda graphembed
|
42
47
|
```
|
@@ -49,7 +54,7 @@ brew install graphembed
|
|
49
54
|
```
|
50
55
|
|
51
56
|
|
52
|
-
### In Python (Please install python first)
|
57
|
+
### In Python (Please install python>=3.9 first)
|
53
58
|
```bash
|
54
59
|
pip install graphembed_rs
|
55
60
|
|
@@ -57,7 +62,7 @@ pip install graphembed_rs
|
|
57
62
|
git clone https://gitlab.com/Jianshu_Zhao/graphembed
|
58
63
|
cd graphembed
|
59
64
|
pip install maturin
|
60
|
-
### note: for macOS, you need to change the line "features = ["pyo3/extension-module", "intel-mkl-static", "simdeez_f"]" in pyporject.toml to "features = ["pyo3/extension-module","openblas-system","stdsimd"]"
|
65
|
+
### note: for macOS, you need to change the line "features = ["pyo3/extension-module", "intel-mkl-static", "simdeez_f"]" in pyporject.toml to "features = ["pyo3/extension-module","openblas-system","stdsimd"]", you also need to Install OpenBLAS and add to system library path via Homebrew
|
61
66
|
maturin develop --release
|
62
67
|
|
63
68
|
#### Prepare some data
|
@@ -66,15 +71,25 @@ wget https://gitlab.com/-/project/64961144/uploads/4e341383d62d86d1dd66e668e91b2
|
|
66
71
|
|
67
72
|
```python
|
68
73
|
import os
|
69
|
-
os.environ["RUST_LOG"] = "
|
70
|
-
import
|
74
|
+
os.environ["RUST_LOG"] = "info"
|
75
|
+
import graphembed_rs.graphembed_rs as ge
|
76
|
+
import graphembed_rs.load_utils as ge_utils
|
71
77
|
help(ge)
|
78
|
+
help(ge_utils)
|
72
79
|
### HOPE
|
73
|
-
ge.embed_hope_rank("BlogCatalog.txt", target_rank=128, nbiter=4)
|
80
|
+
ge.embed_hope_rank("BlogCatalog.txt", target_rank=128, nbiter=4,output="embedding_output")
|
81
|
+
out_vectors=ge_utils.load_embedding_bson("embedding_output.bson")
|
82
|
+
print("OUT embedding shape :", out_vectors.shape)
|
83
|
+
print("first OUT vector :", out_vectors[0])
|
74
84
|
|
75
85
|
### Sketching
|
76
86
|
### sketching only
|
77
87
|
ge.embed_sketching("BlogCatalog.txt", decay=0.3, dim=128, nbiter=5, symetric=True, output="embedding_output")
|
88
|
+
out_vectors=ge_utils.load_embedding_bson("embedding_output.bson")
|
89
|
+
print("OUT embedding shape :", out_vectors.shape)
|
90
|
+
print("first OUT vector :", out_vectors[0])
|
91
|
+
|
92
|
+
|
78
93
|
### validate accuracy
|
79
94
|
auc_scores = ge.validate_sketching("BlogCatalog.txt",decay=0.3, dim=128, nbiter=3, nbpass=1, skip_frac=0.2,symetric=True, centric=True)
|
80
95
|
print("Standard AUC per pass:", auc_scores)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
graphembed_rs-0.1.2.dist-info/METADATA,sha256=WEvYJP6qVNPFlFxYzzq-n3XX6L__a2ukxgYkVt_WnYo,11532
|
2
|
+
graphembed_rs-0.1.2.dist-info/WHEEL,sha256=OWa2DoOWNAtKx8o8X2SfzZNCVYa4xySMUeG2PSXKaJ4,102
|
3
|
+
graphembed_rs-0.1.2.dist-info/licenses/LICENSE-MIT,sha256=ndZ12D28O4UkfOeoa6HP9E7IKyYG4iH79iQ6WiLs9bc,1077
|
4
|
+
graphembed_rs/load_utils.py,sha256=fXd4-5OxdymSkb4kx39XeW_P3nEDdAuQ7Oe1D6PKDWE,2719
|
5
|
+
graphembed_rs/__init__.py,sha256=s8WorNcMnn15CfjctNaLnXGnwQ9RlPyCR0WJQxepTTc,179
|
6
|
+
graphembed_rs/graphembed_rs.cpython-39-darwin.so,sha256=bnNJQ9TeEI6GVmt9GP_FJUtfH9enrlcYdvjbXfY5nEw,5265888
|
7
|
+
graphembed_rs-0.1.2.dist-info/RECORD,,
|
graphembed_rs/__init__.pyi
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
# ---------- Embedding ----------
|
4
|
-
def embed_hope_rank(
|
5
|
-
csv: str,
|
6
|
-
symetric: bool,
|
7
|
-
target_rank: int,
|
8
|
-
nbiter: int,
|
9
|
-
output: Optional[str] = None,
|
10
|
-
) -> None: ...
|
11
|
-
def embed_hope_precision(
|
12
|
-
csv: str,
|
13
|
-
symetric: bool,
|
14
|
-
epsil: float,
|
15
|
-
maxrank: int,
|
16
|
-
blockiter: int,
|
17
|
-
output: Optional[str] = None,
|
18
|
-
) -> None: ...
|
19
|
-
def embed_sketching(
|
20
|
-
csv: str,
|
21
|
-
symetric: bool,
|
22
|
-
decay: float,
|
23
|
-
dim: int,
|
24
|
-
nbiter: int,
|
25
|
-
output: Optional[str] = None,
|
26
|
-
) -> None: ...
|
27
|
-
|
28
|
-
# ---------- Validation (returns mean AUC) ----------
|
29
|
-
def validate_hope_rank(
|
30
|
-
csv: str,
|
31
|
-
symetric: bool,
|
32
|
-
target_rank: int,
|
33
|
-
nbiter: int,
|
34
|
-
nbpass: int = 10,
|
35
|
-
skip_frac: float = 0.1,
|
36
|
-
centric: bool = False,
|
37
|
-
) -> float: ...
|
38
|
-
def validate_hope_precision(
|
39
|
-
csv: str,
|
40
|
-
symetric: bool,
|
41
|
-
epsil: float,
|
42
|
-
maxrank: int,
|
43
|
-
blockiter: int,
|
44
|
-
nbpass: int = 10,
|
45
|
-
skip_frac: float = 0.1,
|
46
|
-
centric: bool = False,
|
47
|
-
) -> float: ...
|
48
|
-
def validate_sketching(
|
49
|
-
csv: str,
|
50
|
-
symetric: bool,
|
51
|
-
decay: float,
|
52
|
-
dim: int,
|
53
|
-
nbiter: int,
|
54
|
-
nbpass: int = 10,
|
55
|
-
skip_frac: float = 0.1,
|
56
|
-
centric: bool = False,
|
57
|
-
) -> float: ...
|
58
|
-
|
59
|
-
# ---------- VCMPR (precision/recall curves) ----------
|
60
|
-
def estimate_vcmpr_hope_rank(
|
61
|
-
csv: str,
|
62
|
-
symetric: bool,
|
63
|
-
target_rank: int,
|
64
|
-
nbiter: int,
|
65
|
-
nbpass: int = 2,
|
66
|
-
topk: int = 10,
|
67
|
-
skip_frac: float = 0.1,
|
68
|
-
) -> None: ...
|
69
|
-
def estimate_vcmpr_sketching(
|
70
|
-
csv: str,
|
71
|
-
symetric: bool,
|
72
|
-
decay: float,
|
73
|
-
dim: int,
|
74
|
-
nbiter: int,
|
75
|
-
nbpass: int = 2,
|
76
|
-
topk: int = 10,
|
77
|
-
skip_frac: float = 0.1,
|
78
|
-
) -> None: ...
|
79
|
-
|
graphembed_rs/py.typed
DELETED
File without changes
|
@@ -1,8 +0,0 @@
|
|
1
|
-
graphembed_rs-0.1.1.dist-info/METADATA,sha256=uW8W9UwifiE6FViIKpp1I7d5e6ygVuo6AA2O72MogLU,10890
|
2
|
-
graphembed_rs-0.1.1.dist-info/WHEEL,sha256=OWa2DoOWNAtKx8o8X2SfzZNCVYa4xySMUeG2PSXKaJ4,102
|
3
|
-
graphembed_rs-0.1.1.dist-info/licenses/LICENSE-MIT,sha256=ndZ12D28O4UkfOeoa6HP9E7IKyYG4iH79iQ6WiLs9bc,1077
|
4
|
-
graphembed_rs/__init__.py,sha256=R2D0If_-sN__21LBYNod0CNgVo2dCd2RqM11AStM3X0,135
|
5
|
-
graphembed_rs/__init__.pyi,sha256=3_KBFG4g9akylo32CHlm9bZStcLwxIY2X4si21ilD3w,1626
|
6
|
-
graphembed_rs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
graphembed_rs/graphembed_rs.cpython-39-darwin.so,sha256=uHq1hQqyDyoMhlm2Uz2XN0oDXK8omVehmCunu9ABg9w,5106880
|
8
|
-
graphembed_rs-0.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|