graphembed-rs 0.1.0__cp313-cp313-macosx_11_0_arm64.whl → 0.1.2__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ from importlib import import_module as _imp
2
+ from graphembed_rs import *
3
+
4
+ # add the helper into the same namespace
5
+ from .load_utils import *
6
+
7
+ del _imp
@@ -0,0 +1,71 @@
1
+ import numpy as np
2
+ from pathlib import Path
3
+ ### install pymongo with pip install pymongo
4
+ # -- pymongo is a BSON decoder, not a BSON encoder --
5
+ from bson import decode_file_iter
6
+
7
+
8
+ def _dtype_from_type_name(tname: str):
9
+ """Map the Rust type-name string to a NumPy dtype."""
10
+ if tname == "f32":
11
+ return np.float32
12
+ if tname == "f64":
13
+ return np.float64
14
+ if tname == "usize": # stored as i64 in BSON
15
+ return np.int64
16
+ raise ValueError(f"Unknown type_name {tname!r} in BSON header")
17
+
18
+
19
+ def load_embedding_bson(path: str | Path, *, want_in: bool = False):
20
+ """
21
+ Parameters
22
+ ----------
23
+ path : str | pathlib.Path
24
+ File written by graphembed::io::bson_dump(...)
25
+ want_in : bool, default False
26
+ • False … always return the OUT/source embedding (shape = (n, d))
27
+ • True … additionally return the IN/target embedding when
28
+ the dump is *asymmetric* (tuple(out, in_)). For symmetric
29
+ dumps the second item is None.
30
+
31
+ Returns
32
+ -------
33
+ np.ndarray (sym. dump or want_in=False)
34
+ OR (out_emb, in_emb) (want_in=True)
35
+ """
36
+ path = Path(path)
37
+ with path.open("rb") as fh:
38
+ docs = decode_file_iter(fh)
39
+
40
+ # -- header ----------------------------------------------------
41
+ header = next(docs)["header"]
42
+ n = int(header["nbdata"])
43
+ d = int(header["dimension"])
44
+ sym = bool(header["symetric"])
45
+ dtype = _dtype_from_type_name(header["type_name"])
46
+ out_emb = np.empty((n, d), dtype=dtype)
47
+ in_emb = None if sym or not want_in else np.empty((n, d), dtype=dtype)
48
+
49
+ # --OUT part --------------------------------------------------
50
+ for _ in range(n):
51
+ doc = next(docs)
52
+ key, vec = next(iter(doc.items())) # only 1 (key,val)
53
+ idx, tag = map(int, key.split(","))
54
+ assert tag == 0, f"expected tag 0, got {tag}"
55
+ out_emb[idx] = np.asarray(vec, dtype=dtype)
56
+
57
+ # -- IN part (if any) -----------------------------------------
58
+ if not sym:
59
+ for _ in range(n):
60
+ doc = next(docs)
61
+ key, vec = next(iter(doc.items()))
62
+ idx, tag = map(int, key.split(","))
63
+ assert tag == 1, f"expected tag 1, got {tag}"
64
+ if in_emb is not None: # want_in == True
65
+ in_emb[idx] = np.asarray(vec, dtype=dtype)
66
+ # else: silently drop it
67
+
68
+ # -- optional indexation doc – skip for now --------------------
69
+ # (decode_file_iter stops automatically at EOF)
70
+
71
+ return (out_emb, in_emb) if want_in else out_emb
@@ -1,14 +1,17 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphembed_rs
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
+ Requires-Dist: numpy>=2.2.5
5
+ Requires-Dist: pymongo>=4.12.1
6
+ License-File: LICENSE-MIT
4
7
  Summary: Python bindings for the high‑performance Rust graph/network embedding library graphembed
5
8
  Keywords: graph,embedding,hash
6
9
  Author: Jianshu Zhao
7
- Author-email: jeanpierre.both@gmail.com, jianshuzhao@yahoo.com
10
+ Author-email: jeanpierre.both@gmail.com
8
11
  License: MIT OR Apache-2.0
9
12
  Requires-Python: >=3.8
10
13
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
11
- Project-URL: Source Code, https://gitlab.com/Jianshu_Zhao/graphembed
14
+ Project-URL: Source Code, https://github.com/jean-pierreBoth/graphembed
12
15
 
13
16
  [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/graphembed/README.html)
14
17
  ![](https://anaconda.org/bioconda/graphembed/badges/license.svg)
@@ -17,10 +20,13 @@ Project-URL: Source Code, https://gitlab.com/Jianshu_Zhao/graphembed
17
20
  ![](https://anaconda.org/bioconda/graphembed/badges/platforms.svg)
18
21
  [![install with conda](https://anaconda.org/bioconda/graphembed/badges/downloads.svg)](https://anaconda.org/bioconda/graphembed)
19
22
 
23
+ <div align="center">
24
+ <img width="35%" src ="GraphEmbed_log.jpg">
25
+ </div>
20
26
 
21
27
  # GraphEmbed: Efficient and Robust Network Embedding via High-Order Proximity Preservation or Recursive Sketching
22
28
 
23
- This crate provides an executable and a library for embedding of directed or undirected graphs with positively weighted edges. We engineered and optimized current network embedding algorithms for large-scale network embedding, especially biological network. This crate was developed by [Jianshu Zhao](https://gitlab.com/Jianshu_Zhao) and Jean-Pierre Both [jpboth](https://gitlab.com/jpboth).
29
+ This crate provides an executable and a library for embedding of directed or undirected graphs with positively weighted edges. We engineered and optimized current network embedding algorithms for large-scale network embedding, especially biological network. This crate was developed by [Jianshu Zhao](https://gitlab.com/Jianshu_Zhao) and Jean-Pierre Both [jpboth](https://gitlab.com/jpboth). We have a copy here in [Github](https://github.com/jianshu93/graphembed)
24
30
 
25
31
 
26
32
  - For simple graphs, without data attached to nodes, there are 2 modules **nodesketch** and **atp**. A simple executable with a validation option based on link prediction is also provided.
@@ -29,13 +35,13 @@ This crate provides an executable and a library for embedding of directed or und
29
35
 
30
36
  ### Pre-built binaries on Linux
31
37
  ```bash
32
- wget https://gitlab.com/-/project/64961144/uploads/ea72ca007e9e4899e0c830e708f52939/graphembed_Linux_x86-64_v0.1.4.zip
33
- unzip graphembed_Linux_x86-64_v0.1.4.zip
38
+ wget https://gitlab.com/-/project/64961144/uploads/9d7d0b038140cb67c584f01cd6dafac9/graphembed_Linux_x86-64_v0.1.6.zip
39
+ unzip graphembed_Linux_x86-64_v0.1.6.zip
34
40
  chmod a+x ./graphembed
35
41
  ./graphembed -h
36
42
  ```
37
43
 
38
- ### Bioconda on Linux
44
+ ### Bioconda on Linux/MacOS
39
45
  ```bash
40
46
  conda install -c conda-forge -c bioconda graphembed
41
47
  ```
@@ -45,7 +51,48 @@ conda install -c conda-forge -c bioconda graphembed
45
51
  brew tap jianshu93/graphembed
46
52
  brew update
47
53
  brew install graphembed
54
+ ```
55
+
56
+
57
+ ### In Python (Please install python>=3.9 first)
58
+ ```bash
59
+ pip install graphembed_rs
60
+
61
+ ### or you can build from source (Linux) after installing maturin
62
+ git clone https://gitlab.com/Jianshu_Zhao/graphembed
63
+ cd graphembed
64
+ pip install maturin
65
+ ### note: for macOS, you need to change the line "features = ["pyo3/extension-module", "intel-mkl-static", "simdeez_f"]" in pyporject.toml to "features = ["pyo3/extension-module","openblas-system","stdsimd"]", you also need to Install OpenBLAS and add to system library path via Homebrew
66
+ maturin develop --release
67
+
68
+ #### Prepare some data
69
+ wget https://gitlab.com/-/project/64961144/uploads/4e341383d62d86d1dd66e668e91b2c07/BlogCatalog.txt
70
+ ```
48
71
 
72
+ ```python
73
+ import os
74
+ os.environ["RUST_LOG"] = "info"
75
+ import graphembed_rs.graphembed_rs as ge
76
+ import graphembed_rs.load_utils as ge_utils
77
+ help(ge)
78
+ help(ge_utils)
79
+ ### HOPE
80
+ ge.embed_hope_rank("BlogCatalog.txt", target_rank=128, nbiter=4,output="embedding_output")
81
+ out_vectors=ge_utils.load_embedding_bson("embedding_output.bson")
82
+ print("OUT embedding shape :", out_vectors.shape)
83
+ print("first OUT vector :", out_vectors[0])
84
+
85
+ ### Sketching
86
+ ### sketching only
87
+ ge.embed_sketching("BlogCatalog.txt", decay=0.3, dim=128, nbiter=5, symetric=True, output="embedding_output")
88
+ out_vectors=ge_utils.load_embedding_bson("embedding_output.bson")
89
+ print("OUT embedding shape :", out_vectors.shape)
90
+ print("first OUT vector :", out_vectors[0])
91
+
92
+
93
+ ### validate accuracy
94
+ auc_scores = ge.validate_sketching("BlogCatalog.txt",decay=0.3, dim=128, nbiter=3, nbpass=1, skip_frac=0.2,symetric=True, centric=True)
95
+ print("Standard AUC per pass:", auc_scores)
49
96
  ```
50
97
 
51
98
  ## Methods
@@ -0,0 +1,7 @@
1
+ graphembed_rs-0.1.2.dist-info/METADATA,sha256=WEvYJP6qVNPFlFxYzzq-n3XX6L__a2ukxgYkVt_WnYo,11532
2
+ graphembed_rs-0.1.2.dist-info/WHEEL,sha256=_czbP61TsBkf9T201RekHMHlqESnWn7yJwXBJC9P-w0,104
3
+ graphembed_rs-0.1.2.dist-info/licenses/LICENSE-MIT,sha256=ndZ12D28O4UkfOeoa6HP9E7IKyYG4iH79iQ6WiLs9bc,1077
4
+ graphembed_rs/load_utils.py,sha256=fXd4-5OxdymSkb4kx39XeW_P3nEDdAuQ7Oe1D6PKDWE,2719
5
+ graphembed_rs/__init__.py,sha256=s8WorNcMnn15CfjctNaLnXGnwQ9RlPyCR0WJQxepTTc,179
6
+ graphembed_rs/graphembed_rs.cpython-313-darwin.so,sha256=9fLyn8FN0gZOCRj2Zv8_Wx8lN81dkjFRHcRowLNlv7o,5263216
7
+ graphembed_rs-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2022 jean-pierre.both and Jianshu Zhao
2
+
3
+ Permission is hereby granted, free of charge, to any
4
+ person obtaining a copy of this software and associated
5
+ documentation files (the "Software"), to deal in the
6
+ Software without restriction, including without
7
+ limitation the rights to use, copy, modify, merge,
8
+ publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software
10
+ is furnished to do so, subject to the following
11
+ conditions:
12
+
13
+ The above copyright notice and this permission notice
14
+ shall be included in all copies or substantial portions
15
+ of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18
+ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19
+ TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20
+ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21
+ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25
+ DEALINGS IN THE SOFTWARE.
graphembed/__init__.py DELETED
@@ -1,5 +0,0 @@
1
- from .graphembed import *
2
-
3
- __doc__ = graphembed.__doc__
4
- if hasattr(graphembed, "__all__"):
5
- __all__ = graphembed.__all__
graphembed/__init__.pyi DELETED
@@ -1,79 +0,0 @@
1
- from typing import Optional
2
-
3
- # ---------- Embedding ----------
4
- def embed_hope_rank(
5
- csv: str,
6
- symetric: bool,
7
- target_rank: int,
8
- nbiter: int,
9
- output: Optional[str] = None,
10
- ) -> None: ...
11
- def embed_hope_precision(
12
- csv: str,
13
- symetric: bool,
14
- epsil: float,
15
- maxrank: int,
16
- blockiter: int,
17
- output: Optional[str] = None,
18
- ) -> None: ...
19
- def embed_sketching(
20
- csv: str,
21
- symetric: bool,
22
- decay: float,
23
- dim: int,
24
- nbiter: int,
25
- output: Optional[str] = None,
26
- ) -> None: ...
27
-
28
- # ---------- Validation (returns mean AUC) ----------
29
- def validate_hope_rank(
30
- csv: str,
31
- symetric: bool,
32
- target_rank: int,
33
- nbiter: int,
34
- nbpass: int = 10,
35
- skip_frac: float = 0.1,
36
- centric: bool = False,
37
- ) -> float: ...
38
- def validate_hope_precision(
39
- csv: str,
40
- symetric: bool,
41
- epsil: float,
42
- maxrank: int,
43
- blockiter: int,
44
- nbpass: int = 10,
45
- skip_frac: float = 0.1,
46
- centric: bool = False,
47
- ) -> float: ...
48
- def validate_sketching(
49
- csv: str,
50
- symetric: bool,
51
- decay: float,
52
- dim: int,
53
- nbiter: int,
54
- nbpass: int = 10,
55
- skip_frac: float = 0.1,
56
- centric: bool = False,
57
- ) -> float: ...
58
-
59
- # ---------- VCMPR (precision/recall curves) ----------
60
- def estimate_vcmpr_hope_rank(
61
- csv: str,
62
- symetric: bool,
63
- target_rank: int,
64
- nbiter: int,
65
- nbpass: int = 2,
66
- topk: int = 10,
67
- skip_frac: float = 0.1,
68
- ) -> None: ...
69
- def estimate_vcmpr_sketching(
70
- csv: str,
71
- symetric: bool,
72
- decay: float,
73
- dim: int,
74
- nbiter: int,
75
- nbpass: int = 2,
76
- topk: int = 10,
77
- skip_frac: float = 0.1,
78
- ) -> None: ...
79
-
Binary file
graphembed/py.typed DELETED
File without changes
@@ -1,7 +0,0 @@
1
- graphembed_rs-0.1.0.dist-info/METADATA,sha256=dxYkTamZDsIFcEqterZ0LEeXQDItlSW2aZjmL6u1vuI,9694
2
- graphembed_rs-0.1.0.dist-info/WHEEL,sha256=_czbP61TsBkf9T201RekHMHlqESnWn7yJwXBJC9P-w0,104
3
- graphembed/__init__.py,sha256=RCcLraveWf-myTsDQGePMYq-scNNfz-3Mv1baSbgAmM,123
4
- graphembed/__init__.pyi,sha256=3_KBFG4g9akylo32CHlm9bZStcLwxIY2X4si21ilD3w,1626
5
- graphembed/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- graphembed/graphembed.cpython-313-darwin.so,sha256=6MnpRaFtxd9VQ4aK_yHbA-C9eqiVELPJGAIs6KJwt8I,5126848
7
- graphembed_rs-0.1.0.dist-info/RECORD,,