phenolrs 0.5.9__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of phenolrs might be problematic. Click here for more details.
- phenolrs/__init__.py +5 -0
- phenolrs/networkx/__init__.py +1 -0
- phenolrs/networkx/loader.py +144 -0
- phenolrs/networkx/typings.py +17 -0
- phenolrs/numpy/__init__.py +1 -0
- phenolrs/numpy/loader.py +114 -0
- phenolrs/numpy/typings.py +12 -0
- phenolrs/phenolrs.cp310-win_amd64.pyd +0 -0
- phenolrs/phenolrs.pyi +43 -0
- phenolrs/pyg/__init__.py +1 -0
- phenolrs/pyg/loader.py +173 -0
- phenolrs/pyg/typings.py +2 -0
- phenolrs-0.5.9.dist-info/METADATA +70 -0
- phenolrs-0.5.9.dist-info/RECORD +15 -0
- phenolrs-0.5.9.dist-info/WHEEL +4 -0
phenolrs/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .loader import NetworkXLoader # noqa: F401
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from typing import Any, Set, Tuple
|
|
2
|
+
|
|
3
|
+
from phenolrs import PhenolError, graph_to_networkx_format
|
|
4
|
+
|
|
5
|
+
from .typings import (
|
|
6
|
+
ArangoIDtoIndex,
|
|
7
|
+
DiGraphAdjDict,
|
|
8
|
+
DstIndices,
|
|
9
|
+
EdgeIndices,
|
|
10
|
+
EdgeValuesDict,
|
|
11
|
+
GraphAdjDict,
|
|
12
|
+
MultiDiGraphAdjDict,
|
|
13
|
+
MultiGraphAdjDict,
|
|
14
|
+
NodeDict,
|
|
15
|
+
SrcIndices,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class NetworkXLoader:
|
|
20
|
+
@staticmethod
|
|
21
|
+
def load_into_networkx(
|
|
22
|
+
database: str,
|
|
23
|
+
metagraph: dict[str, dict[str, Set[str]]],
|
|
24
|
+
hosts: list[str],
|
|
25
|
+
user_jwt: str | None = None,
|
|
26
|
+
username: str | None = None,
|
|
27
|
+
password: str | None = None,
|
|
28
|
+
tls_cert: Any | None = None,
|
|
29
|
+
parallelism: int | None = None,
|
|
30
|
+
batch_size: int | None = None,
|
|
31
|
+
load_adj_dict: bool = True,
|
|
32
|
+
load_coo: bool = True,
|
|
33
|
+
load_all_vertex_attributes: bool = True,
|
|
34
|
+
load_all_edge_attributes: bool = True,
|
|
35
|
+
is_directed: bool = True,
|
|
36
|
+
is_multigraph: bool = True,
|
|
37
|
+
symmetrize_edges_if_directed: bool = False,
|
|
38
|
+
) -> Tuple[
|
|
39
|
+
NodeDict,
|
|
40
|
+
GraphAdjDict | DiGraphAdjDict | MultiGraphAdjDict | MultiDiGraphAdjDict,
|
|
41
|
+
SrcIndices,
|
|
42
|
+
DstIndices,
|
|
43
|
+
EdgeIndices,
|
|
44
|
+
ArangoIDtoIndex,
|
|
45
|
+
EdgeValuesDict,
|
|
46
|
+
]:
|
|
47
|
+
if "vertexCollections" not in metagraph:
|
|
48
|
+
raise PhenolError("vertexCollections not found in metagraph")
|
|
49
|
+
|
|
50
|
+
if "edgeCollections" not in metagraph:
|
|
51
|
+
raise PhenolError("edgeCollections not found in metagraph")
|
|
52
|
+
|
|
53
|
+
if len(metagraph["vertexCollections"]) + len(metagraph["edgeCollections"]) == 0:
|
|
54
|
+
m = "vertexCollections and edgeCollections cannot both be empty"
|
|
55
|
+
raise PhenolError(m)
|
|
56
|
+
|
|
57
|
+
if len(metagraph["edgeCollections"]) == 0 and (load_adj_dict or load_coo):
|
|
58
|
+
m = "edgeCollections must be non-empty if **load_adj_dict** or **load_coo** is True" # noqa
|
|
59
|
+
raise PhenolError(m)
|
|
60
|
+
|
|
61
|
+
if load_all_vertex_attributes and any(
|
|
62
|
+
[len(entries) > 0 for entries in metagraph["vertexCollections"].values()]
|
|
63
|
+
):
|
|
64
|
+
m = "load_all_vertex_attributes is True, but vertexCollections contain attributes" # noqa
|
|
65
|
+
raise PhenolError(m)
|
|
66
|
+
|
|
67
|
+
if load_all_edge_attributes and any(
|
|
68
|
+
[len(entries) > 0 for entries in metagraph["edgeCollections"].values()]
|
|
69
|
+
):
|
|
70
|
+
m = "load_all_edge_attributes is True, but edgeCollections contain attributes" # noqa
|
|
71
|
+
raise PhenolError(m)
|
|
72
|
+
|
|
73
|
+
if len(metagraph["edgeCollections"]) != 0 and not (load_coo or load_adj_dict):
|
|
74
|
+
m = "load_coo and load_adj_dict cannot both be False if edgeCollections is non-empty" # noqa
|
|
75
|
+
raise PhenolError(m)
|
|
76
|
+
|
|
77
|
+
# TODO: replace with pydantic validation
|
|
78
|
+
db_config_options: dict[str, Any] = {
|
|
79
|
+
"endpoints": hosts,
|
|
80
|
+
"database": database,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
load_config_options: dict[str, Any] = {
|
|
84
|
+
"parallelism": parallelism if parallelism is not None else 8,
|
|
85
|
+
"batch_size": batch_size if batch_size is not None else 100000,
|
|
86
|
+
"prefetch_count": 5,
|
|
87
|
+
"load_all_vertex_attributes": load_all_vertex_attributes,
|
|
88
|
+
"load_all_edge_attributes": load_all_edge_attributes,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if username:
|
|
92
|
+
db_config_options["username"] = username
|
|
93
|
+
if password:
|
|
94
|
+
db_config_options["password"] = password
|
|
95
|
+
if user_jwt:
|
|
96
|
+
db_config_options["jwt_token"] = user_jwt
|
|
97
|
+
if tls_cert:
|
|
98
|
+
db_config_options["tls_cert"] = tls_cert
|
|
99
|
+
|
|
100
|
+
graph_config = {
|
|
101
|
+
"load_adj_dict": load_adj_dict,
|
|
102
|
+
"load_coo": load_coo,
|
|
103
|
+
"is_directed": is_directed,
|
|
104
|
+
"is_multigraph": is_multigraph,
|
|
105
|
+
"symmetrize_edges_if_directed": symmetrize_edges_if_directed,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
vertex_collections = [
|
|
109
|
+
{"name": v_col_name, "fields": list(entries)}
|
|
110
|
+
for v_col_name, entries in metagraph["vertexCollections"].items()
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
edge_collections = [
|
|
114
|
+
{"name": e_col_name, "fields": list(entries)}
|
|
115
|
+
for e_col_name, entries in metagraph["edgeCollections"].items()
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
(
|
|
119
|
+
node_dict,
|
|
120
|
+
adj_dict,
|
|
121
|
+
src_indices,
|
|
122
|
+
dst_indices,
|
|
123
|
+
edge_indices,
|
|
124
|
+
id_to_index_map,
|
|
125
|
+
edge_values,
|
|
126
|
+
) = graph_to_networkx_format(
|
|
127
|
+
request={
|
|
128
|
+
"vertex_collections": vertex_collections,
|
|
129
|
+
"edge_collections": edge_collections,
|
|
130
|
+
"database_config": db_config_options,
|
|
131
|
+
"load_config": load_config_options,
|
|
132
|
+
},
|
|
133
|
+
graph_config=graph_config, # TODO Anthony: Move into request
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
return (
|
|
137
|
+
node_dict,
|
|
138
|
+
adj_dict,
|
|
139
|
+
src_indices,
|
|
140
|
+
dst_indices,
|
|
141
|
+
edge_indices,
|
|
142
|
+
id_to_index_map,
|
|
143
|
+
edge_values,
|
|
144
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import numpy.typing as npt
|
|
5
|
+
|
|
6
|
+
Json = dict[str, Any]
|
|
7
|
+
NodeDict = dict[str, Json]
|
|
8
|
+
GraphAdjDict = dict[str, dict[str, Json]]
|
|
9
|
+
DiGraphAdjDict = dict[str, GraphAdjDict]
|
|
10
|
+
MultiGraphAdjDict = dict[str, dict[str, dict[int, Json]]]
|
|
11
|
+
MultiDiGraphAdjDict = dict[str, MultiGraphAdjDict]
|
|
12
|
+
EdgeValuesDict = dict[str, list[int | float]]
|
|
13
|
+
|
|
14
|
+
SrcIndices = npt.NDArray[np.int64]
|
|
15
|
+
DstIndices = npt.NDArray[np.int64]
|
|
16
|
+
EdgeIndices = npt.NDArray[np.int64]
|
|
17
|
+
ArangoIDtoIndex = dict[str, int]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .loader import NumpyLoader # noqa: F401
|
phenolrs/numpy/loader.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from typing import Any, Tuple
|
|
2
|
+
|
|
3
|
+
from phenolrs import PhenolError, graph_to_numpy_format
|
|
4
|
+
|
|
5
|
+
from .typings import (
|
|
6
|
+
ArangoCollectionSourceToOutput,
|
|
7
|
+
ArangoCollectionToArangoKeyToIndex,
|
|
8
|
+
ArangoCollectionToIndexToArangoKey,
|
|
9
|
+
ArangoCollectionToNodeFeatures,
|
|
10
|
+
COOByEdgeType,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NumpyLoader:
|
|
15
|
+
@staticmethod
|
|
16
|
+
def load_graph_to_numpy(
|
|
17
|
+
database: str,
|
|
18
|
+
metagraph: dict[str, Any],
|
|
19
|
+
hosts: list[str],
|
|
20
|
+
user_jwt: str | None = None,
|
|
21
|
+
username: str | None = None,
|
|
22
|
+
password: str | None = None,
|
|
23
|
+
tls_cert: Any | None = None,
|
|
24
|
+
parallelism: int | None = None,
|
|
25
|
+
batch_size: int | None = None,
|
|
26
|
+
) -> Tuple[
|
|
27
|
+
ArangoCollectionToNodeFeatures,
|
|
28
|
+
COOByEdgeType,
|
|
29
|
+
ArangoCollectionToArangoKeyToIndex,
|
|
30
|
+
ArangoCollectionToIndexToArangoKey,
|
|
31
|
+
ArangoCollectionSourceToOutput,
|
|
32
|
+
]:
|
|
33
|
+
# TODO: replace with pydantic validation
|
|
34
|
+
db_config_options: dict[str, Any] = {
|
|
35
|
+
"endpoints": hosts,
|
|
36
|
+
"database": database,
|
|
37
|
+
}
|
|
38
|
+
load_config_options: dict[str, Any] = {
|
|
39
|
+
"parallelism": parallelism if parallelism is not None else 8,
|
|
40
|
+
"batch_size": batch_size if batch_size is not None else 100000,
|
|
41
|
+
"prefetch_count": 5,
|
|
42
|
+
"load_all_vertex_attributes": False,
|
|
43
|
+
"load_all_edge_attributes": False,
|
|
44
|
+
}
|
|
45
|
+
if username:
|
|
46
|
+
db_config_options["username"] = username
|
|
47
|
+
if password:
|
|
48
|
+
db_config_options["password"] = password
|
|
49
|
+
if user_jwt:
|
|
50
|
+
db_config_options["jwt_token"] = user_jwt
|
|
51
|
+
if tls_cert:
|
|
52
|
+
db_config_options["tls_cert"] = tls_cert
|
|
53
|
+
|
|
54
|
+
if "vertexCollections" not in metagraph:
|
|
55
|
+
raise PhenolError("vertexCollections not found in metagraph")
|
|
56
|
+
|
|
57
|
+
# Address the possibility of having something like this:
|
|
58
|
+
# "USER": {"x": {"features": None}}
|
|
59
|
+
# Should be converted to:
|
|
60
|
+
# "USER": {"x": "features"}
|
|
61
|
+
entries: dict[str, Any]
|
|
62
|
+
for v_col_name, entries in metagraph["vertexCollections"].items():
|
|
63
|
+
for source_name, value in entries.items():
|
|
64
|
+
if isinstance(value, dict):
|
|
65
|
+
if len(value) != 1:
|
|
66
|
+
m = f"Only one feature field should be specified per attribute. Found {value}" # noqa: E501
|
|
67
|
+
raise PhenolError(m)
|
|
68
|
+
|
|
69
|
+
value_key = list(value.keys())[0]
|
|
70
|
+
if value[value_key] is not None:
|
|
71
|
+
m = f"Invalid value for feature {source_name}: {value_key}. Found {value[value_key]}" # noqa: E501
|
|
72
|
+
raise PhenolError(m)
|
|
73
|
+
|
|
74
|
+
metagraph["vertexCollections"][v_col_name][source_name] = value_key
|
|
75
|
+
|
|
76
|
+
vertex_collections = [
|
|
77
|
+
{"name": v_col_name, "fields": list(entries.values())}
|
|
78
|
+
for v_col_name, entries in metagraph["vertexCollections"].items()
|
|
79
|
+
]
|
|
80
|
+
vertex_cols_source_to_output = {
|
|
81
|
+
v_col_name: {
|
|
82
|
+
source_name: output_name for output_name, source_name in entries.items()
|
|
83
|
+
}
|
|
84
|
+
for v_col_name, entries in metagraph["vertexCollections"].items()
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
edge_collections = []
|
|
88
|
+
if "edgeCollections" in metagraph:
|
|
89
|
+
edge_collections = [
|
|
90
|
+
{"name": e_col_name, "fields": list(entries.values())}
|
|
91
|
+
for e_col_name, entries in metagraph["edgeCollections"].items()
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
(
|
|
95
|
+
features_by_col,
|
|
96
|
+
coo_map,
|
|
97
|
+
col_to_adb_key_to_ind,
|
|
98
|
+
col_to_ind_to_adb_key,
|
|
99
|
+
) = graph_to_numpy_format(
|
|
100
|
+
{
|
|
101
|
+
"vertex_collections": vertex_collections,
|
|
102
|
+
"edge_collections": edge_collections,
|
|
103
|
+
"database_config": db_config_options,
|
|
104
|
+
"load_config": load_config_options,
|
|
105
|
+
}
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
return (
|
|
109
|
+
features_by_col,
|
|
110
|
+
coo_map,
|
|
111
|
+
col_to_adb_key_to_ind,
|
|
112
|
+
col_to_ind_to_adb_key,
|
|
113
|
+
vertex_cols_source_to_output,
|
|
114
|
+
)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from typing import Tuple
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import numpy.typing as npt
|
|
5
|
+
|
|
6
|
+
EdgeType = Tuple[str, str, str]
|
|
7
|
+
|
|
8
|
+
ArangoCollectionToNodeFeatures = dict[str, dict[str, npt.NDArray[np.float64]]]
|
|
9
|
+
COOByEdgeType = dict[EdgeType, npt.NDArray[np.float64]]
|
|
10
|
+
ArangoCollectionToArangoKeyToIndex = dict[str, dict[str, int]]
|
|
11
|
+
ArangoCollectionToIndexToArangoKey = dict[str, dict[int, str]]
|
|
12
|
+
ArangoCollectionSourceToOutput = dict[str, dict[str, str]]
|
|
Binary file
|
phenolrs/phenolrs.pyi
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import numpy.typing as npt
|
|
5
|
+
|
|
6
|
+
from .networkx.typings import (
|
|
7
|
+
ArangoIDtoIndex,
|
|
8
|
+
DiGraphAdjDict,
|
|
9
|
+
DstIndices,
|
|
10
|
+
EdgeIndices,
|
|
11
|
+
EdgeValuesDict,
|
|
12
|
+
GraphAdjDict,
|
|
13
|
+
MultiDiGraphAdjDict,
|
|
14
|
+
MultiGraphAdjDict,
|
|
15
|
+
NodeDict,
|
|
16
|
+
SrcIndices,
|
|
17
|
+
)
|
|
18
|
+
from .numpy.typings import (
|
|
19
|
+
ArangoCollectionToArangoKeyToIndex,
|
|
20
|
+
ArangoCollectionToIndexToArangoKey,
|
|
21
|
+
ArangoCollectionToNodeFeatures,
|
|
22
|
+
COOByEdgeType,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def graph_to_numpy_format(request: dict[str, typing.Any]) -> typing.Tuple[
|
|
26
|
+
ArangoCollectionToNodeFeatures,
|
|
27
|
+
COOByEdgeType,
|
|
28
|
+
ArangoCollectionToArangoKeyToIndex,
|
|
29
|
+
ArangoCollectionToIndexToArangoKey,
|
|
30
|
+
]: ...
|
|
31
|
+
def graph_to_networkx_format(
|
|
32
|
+
request: dict[str, typing.Any], graph_config: dict[str, typing.Any]
|
|
33
|
+
) -> typing.Tuple[
|
|
34
|
+
NodeDict,
|
|
35
|
+
GraphAdjDict | DiGraphAdjDict | MultiGraphAdjDict | MultiDiGraphAdjDict,
|
|
36
|
+
SrcIndices,
|
|
37
|
+
DstIndices,
|
|
38
|
+
EdgeIndices,
|
|
39
|
+
ArangoIDtoIndex,
|
|
40
|
+
EdgeValuesDict,
|
|
41
|
+
]: ...
|
|
42
|
+
|
|
43
|
+
class PhenolError(Exception): ...
|
phenolrs/pyg/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .loader import PygLoader # noqa: F401
|
phenolrs/pyg/loader.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
from typing import Any, Tuple
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from phenolrs import PhenolError
|
|
6
|
+
from phenolrs.numpy import NumpyLoader
|
|
7
|
+
|
|
8
|
+
from .typings import (
|
|
9
|
+
ArangoCollectionToArangoKeyToIndex,
|
|
10
|
+
ArangoCollectionToIndexToArangoKey,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import torch
|
|
15
|
+
from torch_geometric.data import Data, HeteroData
|
|
16
|
+
|
|
17
|
+
TORCH_AVAILABLE = True
|
|
18
|
+
except ImportError:
|
|
19
|
+
TORCH_AVAILABLE = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PygLoader:
|
|
23
|
+
@staticmethod
|
|
24
|
+
def load_into_pyg_data(
|
|
25
|
+
database: str,
|
|
26
|
+
metagraph: dict[str, Any],
|
|
27
|
+
hosts: list[str],
|
|
28
|
+
user_jwt: str | None = None,
|
|
29
|
+
username: str | None = None,
|
|
30
|
+
password: str | None = None,
|
|
31
|
+
tls_cert: Any | None = None,
|
|
32
|
+
parallelism: int | None = None,
|
|
33
|
+
batch_size: int | None = None,
|
|
34
|
+
) -> Tuple[
|
|
35
|
+
"Data", ArangoCollectionToArangoKeyToIndex, ArangoCollectionToIndexToArangoKey
|
|
36
|
+
]:
|
|
37
|
+
if not TORCH_AVAILABLE:
|
|
38
|
+
m = "Missing required dependencies. Install with `pip install phenolrs[torch]`" # noqa: E501
|
|
39
|
+
raise ImportError(m)
|
|
40
|
+
|
|
41
|
+
if "vertexCollections" not in metagraph:
|
|
42
|
+
raise PhenolError("vertexCollections not found in metagraph")
|
|
43
|
+
if "edgeCollections" not in metagraph:
|
|
44
|
+
raise PhenolError("edgeCollections not found in metagraph")
|
|
45
|
+
|
|
46
|
+
if len(metagraph["vertexCollections"]) == 0:
|
|
47
|
+
raise PhenolError("vertexCollections must map to non-empty dictionary")
|
|
48
|
+
if len(metagraph["edgeCollections"]) == 0:
|
|
49
|
+
raise PhenolError("edgeCollections must map to non-empty dictionary")
|
|
50
|
+
|
|
51
|
+
if len(metagraph["vertexCollections"]) > 1:
|
|
52
|
+
m = "More than one vertex collection specified for homogeneous dataset"
|
|
53
|
+
raise PhenolError(m)
|
|
54
|
+
if len(metagraph["edgeCollections"]) > 1:
|
|
55
|
+
m = "More than one edge collection specified for homogeneous dataset"
|
|
56
|
+
raise PhenolError(m)
|
|
57
|
+
|
|
58
|
+
v_col_spec_name = list(metagraph["vertexCollections"].keys())[0]
|
|
59
|
+
v_col_spec = list(metagraph["vertexCollections"].values())[0]
|
|
60
|
+
|
|
61
|
+
(
|
|
62
|
+
features_by_col,
|
|
63
|
+
coo_map,
|
|
64
|
+
col_to_adb_key_to_ind,
|
|
65
|
+
col_to_ind_to_adb_key,
|
|
66
|
+
vertex_cols_source_to_output,
|
|
67
|
+
) = NumpyLoader.load_graph_to_numpy(
|
|
68
|
+
database,
|
|
69
|
+
metagraph,
|
|
70
|
+
hosts,
|
|
71
|
+
user_jwt,
|
|
72
|
+
username,
|
|
73
|
+
password,
|
|
74
|
+
tls_cert,
|
|
75
|
+
parallelism,
|
|
76
|
+
batch_size,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
data = Data()
|
|
80
|
+
# add the features
|
|
81
|
+
if v_col_spec_name not in features_by_col:
|
|
82
|
+
raise PhenolError(f"Unable to load data for collection {v_col_spec_name}")
|
|
83
|
+
for feature in v_col_spec.keys():
|
|
84
|
+
feature_source_key = v_col_spec[feature]
|
|
85
|
+
if feature_source_key not in features_by_col[v_col_spec_name]:
|
|
86
|
+
raise PhenolError(
|
|
87
|
+
f"Unable to load features {feature_source_key} for collection {v_col_spec_name}" # noqa: E501
|
|
88
|
+
)
|
|
89
|
+
result = torch.from_numpy(
|
|
90
|
+
features_by_col[v_col_spec_name][feature_source_key].astype(np.float64)
|
|
91
|
+
)
|
|
92
|
+
if result.numel() > 0:
|
|
93
|
+
data[feature] = result
|
|
94
|
+
|
|
95
|
+
# finally add the edges
|
|
96
|
+
edge_col_name = list(metagraph["edgeCollections"].keys())[0]
|
|
97
|
+
for e_tup in coo_map.keys():
|
|
98
|
+
e_name, from_name, to_name = e_tup
|
|
99
|
+
if e_name == edge_col_name:
|
|
100
|
+
result = torch.from_numpy(coo_map[e_tup].astype(np.int64))
|
|
101
|
+
if result.numel() > 0:
|
|
102
|
+
data["edge_index"] = result
|
|
103
|
+
|
|
104
|
+
return data, col_to_adb_key_to_ind, col_to_ind_to_adb_key
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def load_into_pyg_heterodata(
|
|
108
|
+
database: str,
|
|
109
|
+
metagraph: dict[str, Any],
|
|
110
|
+
hosts: list[str],
|
|
111
|
+
user_jwt: str | None = None,
|
|
112
|
+
username: str | None = None,
|
|
113
|
+
password: str | None = None,
|
|
114
|
+
tls_cert: Any | None = None,
|
|
115
|
+
parallelism: int | None = None,
|
|
116
|
+
batch_size: int | None = None,
|
|
117
|
+
) -> tuple[
|
|
118
|
+
"HeteroData",
|
|
119
|
+
ArangoCollectionToArangoKeyToIndex,
|
|
120
|
+
ArangoCollectionToIndexToArangoKey,
|
|
121
|
+
]:
|
|
122
|
+
if not TORCH_AVAILABLE:
|
|
123
|
+
m = "Missing required dependencies. Install with `pip install phenolrs[torch]`" # noqa: E501
|
|
124
|
+
raise ImportError(m)
|
|
125
|
+
|
|
126
|
+
if "vertexCollections" not in metagraph:
|
|
127
|
+
raise PhenolError("vertexCollections not found in metagraph")
|
|
128
|
+
if "edgeCollections" not in metagraph:
|
|
129
|
+
raise PhenolError("edgeCollections not found in metagraph")
|
|
130
|
+
|
|
131
|
+
if len(metagraph["vertexCollections"]) == 0:
|
|
132
|
+
raise PhenolError("vertexCollections must map to non-empty dictionary")
|
|
133
|
+
if len(metagraph["edgeCollections"]) == 0:
|
|
134
|
+
raise PhenolError("edgeCollections must map to non-empty dictionary")
|
|
135
|
+
|
|
136
|
+
(
|
|
137
|
+
features_by_col,
|
|
138
|
+
coo_map,
|
|
139
|
+
col_to_adb_key_to_ind,
|
|
140
|
+
col_to_ind_to_adb_key,
|
|
141
|
+
vertex_cols_source_to_output,
|
|
142
|
+
) = NumpyLoader.load_graph_to_numpy(
|
|
143
|
+
database,
|
|
144
|
+
metagraph,
|
|
145
|
+
hosts,
|
|
146
|
+
user_jwt,
|
|
147
|
+
username,
|
|
148
|
+
password,
|
|
149
|
+
tls_cert,
|
|
150
|
+
parallelism,
|
|
151
|
+
batch_size,
|
|
152
|
+
)
|
|
153
|
+
data = HeteroData()
|
|
154
|
+
for col in features_by_col.keys():
|
|
155
|
+
col_mapping = vertex_cols_source_to_output[col]
|
|
156
|
+
for feature in features_by_col[col].keys():
|
|
157
|
+
if feature == "@collection_name":
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
target_name = col_mapping[feature]
|
|
161
|
+
result = torch.from_numpy(
|
|
162
|
+
features_by_col[col][feature].astype(np.float64)
|
|
163
|
+
)
|
|
164
|
+
if result.numel() > 0:
|
|
165
|
+
data[col][target_name] = result
|
|
166
|
+
|
|
167
|
+
for edge_col in coo_map.keys():
|
|
168
|
+
edge_col_name, from_name, to_name = edge_col
|
|
169
|
+
result = torch.from_numpy(coo_map[edge_col].astype(np.int64))
|
|
170
|
+
if result.numel() > 0:
|
|
171
|
+
data[(from_name, edge_col_name, to_name)].edge_index = result
|
|
172
|
+
|
|
173
|
+
return data, col_to_adb_key_to_ind, col_to_ind_to_adb_key
|
phenolrs/pyg/typings.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phenolrs
|
|
3
|
+
Version: 0.5.9
|
|
4
|
+
Classifier: Programming Language :: Rust
|
|
5
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
6
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Classifier: Typing :: Typed
|
|
11
|
+
Requires-Dist: numpy ~=1.26
|
|
12
|
+
Requires-Dist: python-arango
|
|
13
|
+
Requires-Dist: pytest ; extra == 'tests'
|
|
14
|
+
Requires-Dist: arango-datasets ; extra == 'tests'
|
|
15
|
+
Requires-Dist: adbnx-adapter ; extra == 'tests'
|
|
16
|
+
Requires-Dist: torch ; extra == 'torch'
|
|
17
|
+
Requires-Dist: torch-geometric ; extra == 'torch'
|
|
18
|
+
Requires-Dist: version ; extra == 'dynamic'
|
|
19
|
+
Provides-Extra: tests
|
|
20
|
+
Provides-Extra: torch
|
|
21
|
+
Provides-Extra: dynamic
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
24
|
+
|
|
25
|
+
# Phenolrs
|
|
26
|
+
|
|
27
|
+
## Running/building
|
|
28
|
+
|
|
29
|
+
### Required:
|
|
30
|
+
- `python`
|
|
31
|
+
- `pytest`
|
|
32
|
+
- `rust/cargo`
|
|
33
|
+
|
|
34
|
+
### Build
|
|
35
|
+
Build using the `maturin` python package - can install using `pip install maturin`.
|
|
36
|
+
|
|
37
|
+
Dev build
|
|
38
|
+
`maturin develop`
|
|
39
|
+
|
|
40
|
+
Release build
|
|
41
|
+
`maturin develop -r`
|
|
42
|
+
|
|
43
|
+
### Tests
|
|
44
|
+
|
|
45
|
+
#### Rust
|
|
46
|
+
`cargo test --no-default-features`
|
|
47
|
+
|
|
48
|
+
#### Python
|
|
49
|
+
** WIP - assumes dataset loaded in local ArangoDB instance. **
|
|
50
|
+
|
|
51
|
+
##### Requirements
|
|
52
|
+
`pip install pytest arango-datasets`
|
|
53
|
+
|
|
54
|
+
##### Python db setup
|
|
55
|
+
```python
|
|
56
|
+
import arango
|
|
57
|
+
from arango_datasets import Datasets
|
|
58
|
+
|
|
59
|
+
client = arango.ArangoClient("http://localhost:8529")
|
|
60
|
+
sys = client.db("_system", password="test")
|
|
61
|
+
sys.create_database("abide")
|
|
62
|
+
|
|
63
|
+
abide_db = client.db("abide", password="test")
|
|
64
|
+
dsets = Datasets(abide_db)
|
|
65
|
+
dsets.load("ABIDE")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
##### Run python tests:
|
|
69
|
+
`maturin develop && pytest`
|
|
70
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
phenolrs-0.5.9.dist-info/METADATA,sha256=PG7Nvsxl83OYRTNQencyHIZWk9Kjnmr7Hf34GCMkVZQ,1750
|
|
2
|
+
phenolrs-0.5.9.dist-info/WHEEL,sha256=nWeKMpvecbFEmBYQM44p2FChzkAyOaqMiofeuspsYro,96
|
|
3
|
+
phenolrs/networkx/loader.py,sha256=HTC1fuIKWgPZATnHsnQlmJcoQ973Gke0G12pS-A1w6g,5053
|
|
4
|
+
phenolrs/networkx/typings.py,sha256=-_Bdpwpxj9eKAjnNKoK0XMRs3rT6z16kw5mH53k7kd8,515
|
|
5
|
+
phenolrs/networkx/__init__.py,sha256=IkJSMrsv-Ky3uviyX5uIBJXlALnpz_PKE-OlQpxTJko,50
|
|
6
|
+
phenolrs/numpy/loader.py,sha256=V5rKbhrCsPx_Ynapo8-ROQlNVBZhniGf_g0IPqvQ5ZA,4203
|
|
7
|
+
phenolrs/numpy/typings.py,sha256=JcKbwXwNkgOUIAOBan9SRNeHH3QkvPqDBzFGHhaYLWs,438
|
|
8
|
+
phenolrs/numpy/__init__.py,sha256=gx3JCZ0u9RPDUthbaa2n4GnyklRwwgMyCOWjKlWT43M,47
|
|
9
|
+
phenolrs/phenolrs.pyi,sha256=6LRUFRdXtL_L_7xZ0xYS83l8Vq07fpT37PAeOvQE1v4,1057
|
|
10
|
+
phenolrs/pyg/loader.py,sha256=STEKl8Wc4TOLh_D0j8CV_BIYjyI-WpBt8xxulhblQ9M,6332
|
|
11
|
+
phenolrs/pyg/typings.py,sha256=e52peJZkW9_QrGpn879hvvNsprBJK4U7aPVsnRUJSf8,128
|
|
12
|
+
phenolrs/pyg/__init__.py,sha256=FyMHHo7XUn48UzGbtcuGo7mLEXOxxQdu-f16hicc6HY,45
|
|
13
|
+
phenolrs/__init__.py,sha256=VFJvgvyWg3kAovAU7Qww4-AjcV2P-BS8vQVBxjYkLjE,267
|
|
14
|
+
phenolrs/phenolrs.cp310-win_amd64.pyd,sha256=MP4MHPVlB7OKOTwNkiU3u9nu9SVF873hsgyLzfgfNq8,5606400
|
|
15
|
+
phenolrs-0.5.9.dist-info/RECORD,,
|