graph-seeder 1.0.0.dev0__tar.gz → 1.0.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/PKG-INFO +1 -1
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/pyproject.toml +1 -1
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/SubgraphExtractor.py +3 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/densification/GraphConnector.py +2 -1
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/extraction/BFS/BFS.py +2 -2
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/extraction/ExtractionStrategy.py +6 -1
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/extraction/Hop/HopExpansion.py +37 -14
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/Factory.py +13 -2
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/.gitignore +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/README.md +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/requirements.txt +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/GraphSeeder.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/dbpedia_default.json +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/default.json +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/europeana_default.json +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/pgxlod_default.json +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/wikidata_default.json +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/ConsoleUI.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/GraphExporter.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/GraphStatistics.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/URIManager.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/utils.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/NeighborhoodWrapper.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/hashmap/HashMapWrapper.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/BaseClient.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/GraphWrapper.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/SparqlQueryBuilder.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/client/SparqlClient.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/client/TurtleClient.py +0 -0
- {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: graph-seeder
|
|
3
|
-
Version: 1.0.0.
|
|
3
|
+
Version: 1.0.0.dev1
|
|
4
4
|
Summary: A powerful tool to extract and densify subgraphs from Knowledge Graphs via SPARQL or LMDB, with different extraction strategies.
|
|
5
5
|
Requires-Python: >=3.9
|
|
6
6
|
Requires-Dist: lmdb>=2.2.0
|
|
@@ -7,7 +7,7 @@ packages = ["src/graph_seeder"]
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "graph-seeder"
|
|
10
|
-
version = "1.0.0.
|
|
10
|
+
version = "1.0.0.dev1"
|
|
11
11
|
description = "A powerful tool to extract and densify subgraphs from Knowledge Graphs via SPARQL or LMDB, with different extraction strategies."
|
|
12
12
|
readme = "README.md"
|
|
13
13
|
requires-python = ">=3.9"
|
|
@@ -281,10 +281,13 @@ class SubgraphExtractor:
|
|
|
281
281
|
def densify_graph(
|
|
282
282
|
self, triplets: list[tuple[str, str, str]], seeds_found: set[str]
|
|
283
283
|
):
|
|
284
|
+
explored_nodes = self.extractor_strategy.explored_nodes
|
|
285
|
+
|
|
284
286
|
graph_connector: GraphConnector = GraphConnector(
|
|
285
287
|
self.wrapper,
|
|
286
288
|
self.uri_manager,
|
|
287
289
|
self.extractor_strategy.graph,
|
|
290
|
+
explored_nodes,
|
|
288
291
|
self.ui,
|
|
289
292
|
self.cfg,
|
|
290
293
|
)
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/densification/GraphConnector.py
RENAMED
|
@@ -18,6 +18,7 @@ class GraphConnector:
|
|
|
18
18
|
wrapper: NeighborhoodWrapper,
|
|
19
19
|
uri_manager: URIManager,
|
|
20
20
|
graph: nx.MultiGraph,
|
|
21
|
+
explored_nodes: set[str],
|
|
21
22
|
ui: ConsoleUI,
|
|
22
23
|
config: dict,
|
|
23
24
|
):
|
|
@@ -33,7 +34,7 @@ class GraphConnector:
|
|
|
33
34
|
config.get("densification", {}).get("mode", "most_connected").lower()
|
|
34
35
|
)
|
|
35
36
|
self.bfs: BidirectionalBFS = ComponentFactory.create_strategy(
|
|
36
|
-
wrapper, uri_manager, self.bfs_config
|
|
37
|
+
wrapper, uri_manager, self.bfs_config, explored_nodes=explored_nodes
|
|
37
38
|
)
|
|
38
39
|
self.bfs.load_graph(graph)
|
|
39
40
|
|
|
@@ -20,12 +20,12 @@ class BidirectionalBFS(ExtractionStrategy):
|
|
|
20
20
|
wrapper: NeighborhoodWrapper,
|
|
21
21
|
uri_manager: URIManager,
|
|
22
22
|
cfg: dict,
|
|
23
|
+
explored_nodes: set[str] = None,
|
|
23
24
|
) -> None:
|
|
24
|
-
super().__init__(wrapper, uri_manager, cfg)
|
|
25
|
+
super().__init__(wrapper, uri_manager, cfg, explored_nodes)
|
|
25
26
|
self.uri_manager = uri_manager
|
|
26
27
|
self.cfg = cfg
|
|
27
28
|
|
|
28
|
-
self.explored_nodes: set[str] = set()
|
|
29
29
|
self._has_path: bool = False
|
|
30
30
|
|
|
31
31
|
self._excluded_nodes = set(cfg["graph_filters"]["exclude_nodes"])
|
|
@@ -14,13 +14,18 @@ logger = logging.getLogger("subgraph")
|
|
|
14
14
|
|
|
15
15
|
class ExtractionStrategy(ABC):
|
|
16
16
|
def __init__(
|
|
17
|
-
self,
|
|
17
|
+
self,
|
|
18
|
+
wrapper: NeighborhoodWrapper,
|
|
19
|
+
uri_manager: URIManager,
|
|
20
|
+
config: dict,
|
|
21
|
+
explored_nodes: set[str] = None,
|
|
18
22
|
):
|
|
19
23
|
"""Base class for extraction strategies that define how to extract a subgraph given a set of seed nodes."""
|
|
20
24
|
self.wrapper = wrapper
|
|
21
25
|
self.uri_manager = uri_manager
|
|
22
26
|
self.config = config
|
|
23
27
|
self.graph: MultiGraph = MultiGraph()
|
|
28
|
+
self.explored_nodes = set() if explored_nodes is None else explored_nodes
|
|
24
29
|
|
|
25
30
|
@abstractmethod
|
|
26
31
|
def extract(self, nodes: list[str]) -> tuple[list[tuple[str, str, str]], str]:
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/extraction/Hop/HopExpansion.py
RENAMED
|
@@ -11,9 +11,13 @@ class HopExpansion(ExtractionStrategy):
|
|
|
11
11
|
"""Simple expansion strategy that expands each node level by level up to max_hops."""
|
|
12
12
|
|
|
13
13
|
def __init__(
|
|
14
|
-
self,
|
|
14
|
+
self,
|
|
15
|
+
wrapper: NeighborhoodWrapper,
|
|
16
|
+
uri_manager: URIManager,
|
|
17
|
+
config: dict,
|
|
18
|
+
explored_nodes: set[str] = None,
|
|
15
19
|
):
|
|
16
|
-
super().__init__(wrapper, uri_manager, config)
|
|
20
|
+
super().__init__(wrapper, uri_manager, config, explored_nodes)
|
|
17
21
|
|
|
18
22
|
self.max_hops = config["extraction"]["max_hops"]
|
|
19
23
|
self.excluded_nodes = set(
|
|
@@ -65,18 +69,37 @@ class HopExpansion(ExtractionStrategy):
|
|
|
65
69
|
)
|
|
66
70
|
next_level_nodes: set[str] = set()
|
|
67
71
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
72
|
+
nodes_to_query = []
|
|
73
|
+
|
|
74
|
+
for node in current_level_nodes:
|
|
75
|
+
if node in self.explored_nodes:
|
|
76
|
+
if node in self.graph:
|
|
77
|
+
for u, v, data in self.graph.edges(node, data=True):
|
|
78
|
+
pred = data.get("key")
|
|
79
|
+
neighbor = v if u == node else u
|
|
80
|
+
|
|
81
|
+
if neighbor not in self.excluded_nodes:
|
|
82
|
+
all_triplets.add((u, pred, v))
|
|
83
|
+
if neighbor not in visited_nodes:
|
|
84
|
+
next_level_nodes.add(neighbor)
|
|
85
|
+
else:
|
|
86
|
+
nodes_to_query.append(node)
|
|
87
|
+
|
|
88
|
+
if nodes_to_query:
|
|
89
|
+
for triplets in self.wrapper.get_neighborhood(nodes_to_query):
|
|
90
|
+
for subj, pred, obj in triplets:
|
|
91
|
+
self.graph.add_edge(subj, obj, key=pred)
|
|
92
|
+
if subj in self.excluded_nodes or obj in self.excluded_nodes:
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
all_triplets.add((subj, pred, obj))
|
|
96
|
+
|
|
97
|
+
if subj not in visited_nodes:
|
|
98
|
+
next_level_nodes.add(subj)
|
|
99
|
+
if obj not in visited_nodes:
|
|
100
|
+
next_level_nodes.add(obj)
|
|
101
|
+
|
|
102
|
+
self.explored_nodes.update(nodes_to_query)
|
|
80
103
|
|
|
81
104
|
if not next_level_nodes:
|
|
82
105
|
logger.info("No more nodes to expand. Graph is fully explored.")
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from graph_seeder.utils.URIManager import URIManager
|
|
1
2
|
from graph_seeder.wrapper.NeighborhoodWrapper import NeighborhoodWrapper
|
|
2
3
|
from graph_seeder.wrapper.sparql.GraphWrapper import GraphWrapper
|
|
3
4
|
from graph_seeder.wrapper.sparql.client.SparqlClient import SparqlClient
|
|
@@ -47,7 +48,12 @@ class ComponentFactory:
|
|
|
47
48
|
return wrapper_class(uri_manager, config)
|
|
48
49
|
|
|
49
50
|
@staticmethod
|
|
50
|
-
def create_strategy(
|
|
51
|
+
def create_strategy(
|
|
52
|
+
wrapper: NeighborhoodWrapper,
|
|
53
|
+
uri_manager: URIManager,
|
|
54
|
+
config: dict,
|
|
55
|
+
explored_nodes=None,
|
|
56
|
+
):
|
|
51
57
|
"""Create an extraction strategy based on the configuration."""
|
|
52
58
|
strategy_type = config.get("extraction", {}).get("strategy", "bfs").lower()
|
|
53
59
|
|
|
@@ -61,4 +67,9 @@ class ComponentFactory:
|
|
|
61
67
|
f"Unknown strategy type: '{strategy_type}'. Valid options: {list(registered_strategies.keys())}"
|
|
62
68
|
)
|
|
63
69
|
|
|
64
|
-
return registered_strategies[strategy_type](
|
|
70
|
+
return registered_strategies[strategy_type](
|
|
71
|
+
wrapper,
|
|
72
|
+
uri_manager,
|
|
73
|
+
config,
|
|
74
|
+
explored_nodes,
|
|
75
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/dbpedia_default.json
RENAMED
|
File without changes
|
|
File without changes
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/europeana_default.json
RENAMED
|
File without changes
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/pgxlod_default.json
RENAMED
|
File without changes
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/wikidata_default.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/GraphStatistics.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/NeighborhoodWrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/BaseClient.py
RENAMED
|
File without changes
|
{graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/GraphWrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|