graphembed-rs 0.1.1__cp310-cp310-macosx_11_0_arm64.whl → 0.1.2__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graphembed_rs/__init__.py CHANGED
@@ -1,5 +1,7 @@
1
- from .graphembed_rs import *
1
+ from importlib import import_module as _imp
2
+ from graphembed_rs import *
2
3
 
3
- __doc__ = graphembed_rs.__doc__
4
- if hasattr(graphembed_rs, "__all__"):
5
- __all__ = graphembed_rs.__all__
4
+ # add the helper into the same namespace
5
+ from .load_utils import *
6
+
7
+ del _imp
@@ -0,0 +1,71 @@
1
+ import numpy as np
2
+ from pathlib import Path
3
+ ### install pymongo with pip install pymongo
4
+ # -- pymongo is a BSON decoder, not a BSON encoder --
5
+ from bson import decode_file_iter
6
+
7
+
8
+ def _dtype_from_type_name(tname: str):
9
+ """Map the Rust type-name string to a NumPy dtype."""
10
+ if tname == "f32":
11
+ return np.float32
12
+ if tname == "f64":
13
+ return np.float64
14
+ if tname == "usize": # stored as i64 in BSON
15
+ return np.int64
16
+ raise ValueError(f"Unknown type_name {tname!r} in BSON header")
17
+
18
+
19
+ def load_embedding_bson(path: str | Path, *, want_in: bool = False):
20
+ """
21
+ Parameters
22
+ ----------
23
+ path : str | pathlib.Path
24
+ File written by graphembed::io::bson_dump(...)
25
+ want_in : bool, default False
26
+ • False … always return the OUT/source embedding (shape = (n, d))
27
+ • True … additionally return the IN/target embedding when
28
+ the dump is *asymmetric* (tuple(out, in_)). For symmetric
29
+ dumps the second item is None.
30
+
31
+ Returns
32
+ -------
33
+ np.ndarray (sym. dump or want_in=False)
34
+ OR (out_emb, in_emb) (want_in=True)
35
+ """
36
+ path = Path(path)
37
+ with path.open("rb") as fh:
38
+ docs = decode_file_iter(fh)
39
+
40
+ # -- header ----------------------------------------------------
41
+ header = next(docs)["header"]
42
+ n = int(header["nbdata"])
43
+ d = int(header["dimension"])
44
+ sym = bool(header["symetric"])
45
+ dtype = _dtype_from_type_name(header["type_name"])
46
+ out_emb = np.empty((n, d), dtype=dtype)
47
+ in_emb = None if sym or not want_in else np.empty((n, d), dtype=dtype)
48
+
49
+ # --OUT part --------------------------------------------------
50
+ for _ in range(n):
51
+ doc = next(docs)
52
+ key, vec = next(iter(doc.items())) # only 1 (key,val)
53
+ idx, tag = map(int, key.split(","))
54
+ assert tag == 0, f"expected tag 0, got {tag}"
55
+ out_emb[idx] = np.asarray(vec, dtype=dtype)
56
+
57
+ # -- IN part (if any) -----------------------------------------
58
+ if not sym:
59
+ for _ in range(n):
60
+ doc = next(docs)
61
+ key, vec = next(iter(doc.items()))
62
+ idx, tag = map(int, key.split(","))
63
+ assert tag == 1, f"expected tag 1, got {tag}"
64
+ if in_emb is not None: # want_in == True
65
+ in_emb[idx] = np.asarray(vec, dtype=dtype)
66
+ # else: silently drop it
67
+
68
+ # -- optional indexation doc – skip for now --------------------
69
+ # (decode_file_iter stops automatically at EOF)
70
+
71
+ return (out_emb, in_emb) if want_in else out_emb
@@ -1,6 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphembed_rs
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
+ Requires-Dist: numpy>=2.2.5
5
+ Requires-Dist: pymongo>=4.12.1
4
6
  License-File: LICENSE-MIT
5
7
  Summary: Python bindings for the high‑performance Rust graph/network embedding library graphembed
6
8
  Keywords: graph,embedding,hash
@@ -18,6 +20,9 @@ Project-URL: Source Code, https://github.com/jean-pierreBoth/graphembed
18
20
  ![](https://anaconda.org/bioconda/graphembed/badges/platforms.svg)
19
21
  [![install with conda](https://anaconda.org/bioconda/graphembed/badges/downloads.svg)](https://anaconda.org/bioconda/graphembed)
20
22
 
23
+ <div align="center">
24
+ <img width="35%" src ="GraphEmbed_log.jpg">
25
+ </div>
21
26
 
22
27
  # GraphEmbed: Efficient and Robust Network Embedding via High-Order Proximity Preservation or Recursive Sketching
23
28
 
@@ -30,13 +35,13 @@ This crate provides an executable and a library for embedding of directed or und
30
35
 
31
36
  ### Pre-built binaries on Linux
32
37
  ```bash
33
- wget https://gitlab.com/-/project/64961144/uploads/ea72ca007e9e4899e0c830e708f52939/graphembed_Linux_x86-64_v0.1.4.zip
34
- unzip graphembed_Linux_x86-64_v0.1.4.zip
38
+ wget https://gitlab.com/-/project/64961144/uploads/9d7d0b038140cb67c584f01cd6dafac9/graphembed_Linux_x86-64_v0.1.6.zip
39
+ unzip graphembed_Linux_x86-64_v0.1.6.zip
35
40
  chmod a+x ./graphembed
36
41
  ./graphembed -h
37
42
  ```
38
43
 
39
- ### Bioconda on Linux
44
+ ### Bioconda on Linux/MacOS
40
45
  ```bash
41
46
  conda install -c conda-forge -c bioconda graphembed
42
47
  ```
@@ -49,7 +54,7 @@ brew install graphembed
49
54
  ```
50
55
 
51
56
 
52
- ### In Python (Please install python first)
57
+ ### In Python (Please install python>=3.9 first)
53
58
  ```bash
54
59
  pip install graphembed_rs
55
60
 
@@ -57,7 +62,7 @@ pip install graphembed_rs
57
62
  git clone https://gitlab.com/Jianshu_Zhao/graphembed
58
63
  cd graphembed
59
64
  pip install maturin
60
- ### note: for macOS, you need to change the line "features = ["pyo3/extension-module", "intel-mkl-static", "simdeez_f"]" in pyporject.toml to "features = ["pyo3/extension-module","openblas-system","stdsimd"]"
65
+ ### note: for macOS, you need to change the line "features = ["pyo3/extension-module", "intel-mkl-static", "simdeez_f"]" in pyporject.toml to "features = ["pyo3/extension-module","openblas-system","stdsimd"]", you also need to Install OpenBLAS and add to system library path via Homebrew
61
66
  maturin develop --release
62
67
 
63
68
  #### Prepare some data
@@ -66,15 +71,25 @@ wget https://gitlab.com/-/project/64961144/uploads/4e341383d62d86d1dd66e668e91b2
66
71
 
67
72
  ```python
68
73
  import os
69
- os.environ["RUST_LOG"] = "graphembed=info"
70
- import graphembed as ge
74
+ os.environ["RUST_LOG"] = "info"
75
+ import graphembed_rs.graphembed_rs as ge
76
+ import graphembed_rs.load_utils as ge_utils
71
77
  help(ge)
78
+ help(ge_utils)
72
79
  ### HOPE
73
- ge.embed_hope_rank("BlogCatalog.txt", target_rank=128, nbiter=4)
80
+ ge.embed_hope_rank("BlogCatalog.txt", target_rank=128, nbiter=4,output="embedding_output")
81
+ out_vectors=ge_utils.load_embedding_bson("embedding_output.bson")
82
+ print("OUT embedding shape :", out_vectors.shape)
83
+ print("first OUT vector :", out_vectors[0])
74
84
 
75
85
  ### Sketching
76
86
  ### sketching only
77
87
  ge.embed_sketching("BlogCatalog.txt", decay=0.3, dim=128, nbiter=5, symetric=True, output="embedding_output")
88
+ out_vectors=ge_utils.load_embedding_bson("embedding_output.bson")
89
+ print("OUT embedding shape :", out_vectors.shape)
90
+ print("first OUT vector :", out_vectors[0])
91
+
92
+
78
93
  ### validate accuracy
79
94
  auc_scores = ge.validate_sketching("BlogCatalog.txt",decay=0.3, dim=128, nbiter=3, nbpass=1, skip_frac=0.2,symetric=True, centric=True)
80
95
  print("Standard AUC per pass:", auc_scores)
@@ -0,0 +1,7 @@
1
+ graphembed_rs-0.1.2.dist-info/METADATA,sha256=WEvYJP6qVNPFlFxYzzq-n3XX6L__a2ukxgYkVt_WnYo,11532
2
+ graphembed_rs-0.1.2.dist-info/WHEEL,sha256=pKyTkFbTEakJa6xy_GKYIZO6TnrTnUxcBEc6JAhr_7o,104
3
+ graphembed_rs-0.1.2.dist-info/licenses/LICENSE-MIT,sha256=ndZ12D28O4UkfOeoa6HP9E7IKyYG4iH79iQ6WiLs9bc,1077
4
+ graphembed_rs/load_utils.py,sha256=fXd4-5OxdymSkb4kx39XeW_P3nEDdAuQ7Oe1D6PKDWE,2719
5
+ graphembed_rs/__init__.py,sha256=s8WorNcMnn15CfjctNaLnXGnwQ9RlPyCR0WJQxepTTc,179
6
+ graphembed_rs/graphembed_rs.cpython-310-darwin.so,sha256=tsLRSTexzC6Y6ZXPPOr640KrEbGEAt3O_2nznzQ1R-k,5266032
7
+ graphembed_rs-0.1.2.dist-info/RECORD,,
@@ -1,79 +0,0 @@
1
- from typing import Optional
2
-
3
- # ---------- Embedding ----------
4
- def embed_hope_rank(
5
- csv: str,
6
- symetric: bool,
7
- target_rank: int,
8
- nbiter: int,
9
- output: Optional[str] = None,
10
- ) -> None: ...
11
- def embed_hope_precision(
12
- csv: str,
13
- symetric: bool,
14
- epsil: float,
15
- maxrank: int,
16
- blockiter: int,
17
- output: Optional[str] = None,
18
- ) -> None: ...
19
- def embed_sketching(
20
- csv: str,
21
- symetric: bool,
22
- decay: float,
23
- dim: int,
24
- nbiter: int,
25
- output: Optional[str] = None,
26
- ) -> None: ...
27
-
28
- # ---------- Validation (returns mean AUC) ----------
29
- def validate_hope_rank(
30
- csv: str,
31
- symetric: bool,
32
- target_rank: int,
33
- nbiter: int,
34
- nbpass: int = 10,
35
- skip_frac: float = 0.1,
36
- centric: bool = False,
37
- ) -> float: ...
38
- def validate_hope_precision(
39
- csv: str,
40
- symetric: bool,
41
- epsil: float,
42
- maxrank: int,
43
- blockiter: int,
44
- nbpass: int = 10,
45
- skip_frac: float = 0.1,
46
- centric: bool = False,
47
- ) -> float: ...
48
- def validate_sketching(
49
- csv: str,
50
- symetric: bool,
51
- decay: float,
52
- dim: int,
53
- nbiter: int,
54
- nbpass: int = 10,
55
- skip_frac: float = 0.1,
56
- centric: bool = False,
57
- ) -> float: ...
58
-
59
- # ---------- VCMPR (precision/recall curves) ----------
60
- def estimate_vcmpr_hope_rank(
61
- csv: str,
62
- symetric: bool,
63
- target_rank: int,
64
- nbiter: int,
65
- nbpass: int = 2,
66
- topk: int = 10,
67
- skip_frac: float = 0.1,
68
- ) -> None: ...
69
- def estimate_vcmpr_sketching(
70
- csv: str,
71
- symetric: bool,
72
- decay: float,
73
- dim: int,
74
- nbiter: int,
75
- nbpass: int = 2,
76
- topk: int = 10,
77
- skip_frac: float = 0.1,
78
- ) -> None: ...
79
-
graphembed_rs/py.typed DELETED
File without changes
@@ -1,8 +0,0 @@
1
- graphembed_rs-0.1.1.dist-info/METADATA,sha256=uW8W9UwifiE6FViIKpp1I7d5e6ygVuo6AA2O72MogLU,10890
2
- graphembed_rs-0.1.1.dist-info/WHEEL,sha256=pKyTkFbTEakJa6xy_GKYIZO6TnrTnUxcBEc6JAhr_7o,104
3
- graphembed_rs-0.1.1.dist-info/licenses/LICENSE-MIT,sha256=ndZ12D28O4UkfOeoa6HP9E7IKyYG4iH79iQ6WiLs9bc,1077
4
- graphembed_rs/__init__.py,sha256=R2D0If_-sN__21LBYNod0CNgVo2dCd2RqM11AStM3X0,135
5
- graphembed_rs/__init__.pyi,sha256=3_KBFG4g9akylo32CHlm9bZStcLwxIY2X4si21ilD3w,1626
6
- graphembed_rs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- graphembed_rs/graphembed_rs.cpython-310-darwin.so,sha256=y_x9gQrzR40a3wGjdDfyIml9E_SwcgLixnyuW767B3g,5106976
8
- graphembed_rs-0.1.1.dist-info/RECORD,,