graph-seeder 1.0.0.dev0__tar.gz → 1.0.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/PKG-INFO +1 -1
  2. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/pyproject.toml +1 -1
  3. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/SubgraphExtractor.py +3 -0
  4. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/densification/GraphConnector.py +2 -1
  5. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/extraction/BFS/BFS.py +2 -2
  6. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/extraction/ExtractionStrategy.py +6 -1
  7. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/extraction/Hop/HopExpansion.py +37 -14
  8. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/Factory.py +13 -2
  9. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/.gitignore +0 -0
  10. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/README.md +0 -0
  11. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/requirements.txt +0 -0
  12. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/GraphSeeder.py +0 -0
  13. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/dbpedia_default.json +0 -0
  14. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/default.json +0 -0
  15. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/europeana_default.json +0 -0
  16. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/pgxlod_default.json +0 -0
  17. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/configs/wikidata_default.json +0 -0
  18. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/ConsoleUI.py +0 -0
  19. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/GraphExporter.py +0 -0
  20. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/GraphStatistics.py +0 -0
  21. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/URIManager.py +0 -0
  22. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/utils/utils.py +0 -0
  23. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/NeighborhoodWrapper.py +0 -0
  24. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/hashmap/HashMapWrapper.py +0 -0
  25. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/BaseClient.py +0 -0
  26. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/GraphWrapper.py +0 -0
  27. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/SparqlQueryBuilder.py +0 -0
  28. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/client/SparqlClient.py +0 -0
  29. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/src/graph_seeder/wrapper/sparql/client/TurtleClient.py +0 -0
  30. {graph_seeder-1.0.0.dev0 → graph_seeder-1.0.0.dev1}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graph-seeder
3
- Version: 1.0.0.dev0
3
+ Version: 1.0.0.dev1
4
4
  Summary: A powerful tool to extract and densify subgraphs from Knowledge Graphs via SPARQL or LMDB, with different extraction strategies.
5
5
  Requires-Python: >=3.9
6
6
  Requires-Dist: lmdb>=2.2.0
@@ -7,7 +7,7 @@ packages = ["src/graph_seeder"]
7
7
 
8
8
  [project]
9
9
  name = "graph-seeder"
10
- version = "1.0.0.dev0"
10
+ version = "1.0.0.dev1"
11
11
  description = "A powerful tool to extract and densify subgraphs from Knowledge Graphs via SPARQL or LMDB, with different extraction strategies."
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.9"
@@ -281,10 +281,13 @@ class SubgraphExtractor:
281
281
  def densify_graph(
282
282
  self, triplets: list[tuple[str, str, str]], seeds_found: set[str]
283
283
  ):
284
+ explored_nodes = self.extractor_strategy.explored_nodes
285
+
284
286
  graph_connector: GraphConnector = GraphConnector(
285
287
  self.wrapper,
286
288
  self.uri_manager,
287
289
  self.extractor_strategy.graph,
290
+ explored_nodes,
288
291
  self.ui,
289
292
  self.cfg,
290
293
  )
@@ -18,6 +18,7 @@ class GraphConnector:
18
18
  wrapper: NeighborhoodWrapper,
19
19
  uri_manager: URIManager,
20
20
  graph: nx.MultiGraph,
21
+ explored_nodes: set[str],
21
22
  ui: ConsoleUI,
22
23
  config: dict,
23
24
  ):
@@ -33,7 +34,7 @@ class GraphConnector:
33
34
  config.get("densification", {}).get("mode", "most_connected").lower()
34
35
  )
35
36
  self.bfs: BidirectionalBFS = ComponentFactory.create_strategy(
36
- wrapper, uri_manager, self.bfs_config
37
+ wrapper, uri_manager, self.bfs_config, explored_nodes=explored_nodes
37
38
  )
38
39
  self.bfs.load_graph(graph)
39
40
 
@@ -20,12 +20,12 @@ class BidirectionalBFS(ExtractionStrategy):
20
20
  wrapper: NeighborhoodWrapper,
21
21
  uri_manager: URIManager,
22
22
  cfg: dict,
23
+ explored_nodes: set[str] = None,
23
24
  ) -> None:
24
- super().__init__(wrapper, uri_manager, cfg)
25
+ super().__init__(wrapper, uri_manager, cfg, explored_nodes)
25
26
  self.uri_manager = uri_manager
26
27
  self.cfg = cfg
27
28
 
28
- self.explored_nodes: set[str] = set()
29
29
  self._has_path: bool = False
30
30
 
31
31
  self._excluded_nodes = set(cfg["graph_filters"]["exclude_nodes"])
@@ -14,13 +14,18 @@ logger = logging.getLogger("subgraph")
14
14
 
15
15
  class ExtractionStrategy(ABC):
16
16
  def __init__(
17
- self, wrapper: NeighborhoodWrapper, uri_manager: URIManager, config: dict
17
+ self,
18
+ wrapper: NeighborhoodWrapper,
19
+ uri_manager: URIManager,
20
+ config: dict,
21
+ explored_nodes: set[str] = None,
18
22
  ):
19
23
  """Base class for extraction strategies that define how to extract a subgraph given a set of seed nodes."""
20
24
  self.wrapper = wrapper
21
25
  self.uri_manager = uri_manager
22
26
  self.config = config
23
27
  self.graph: MultiGraph = MultiGraph()
28
+ self.explored_nodes = set() if explored_nodes is None else explored_nodes
24
29
 
25
30
  @abstractmethod
26
31
  def extract(self, nodes: list[str]) -> tuple[list[tuple[str, str, str]], str]:
@@ -11,9 +11,13 @@ class HopExpansion(ExtractionStrategy):
11
11
  """Simple expansion strategy that expands each node level by level up to max_hops."""
12
12
 
13
13
  def __init__(
14
- self, wrapper: NeighborhoodWrapper, uri_manager: URIManager, config: dict
14
+ self,
15
+ wrapper: NeighborhoodWrapper,
16
+ uri_manager: URIManager,
17
+ config: dict,
18
+ explored_nodes: set[str] = None,
15
19
  ):
16
- super().__init__(wrapper, uri_manager, config)
20
+ super().__init__(wrapper, uri_manager, config, explored_nodes)
17
21
 
18
22
  self.max_hops = config["extraction"]["max_hops"]
19
23
  self.excluded_nodes = set(
@@ -65,18 +69,37 @@ class HopExpansion(ExtractionStrategy):
65
69
  )
66
70
  next_level_nodes: set[str] = set()
67
71
 
68
- for triplets in self.wrapper.get_neighborhood(list(current_level_nodes)):
69
- for subj, pred, obj in triplets:
70
- self.graph.add_edge(subj, obj, key=pred)
71
- if subj in self.excluded_nodes or obj in self.excluded_nodes:
72
- continue
73
-
74
- all_triplets.add((subj, pred, obj))
75
-
76
- if subj not in visited_nodes:
77
- next_level_nodes.add(subj)
78
- if obj not in visited_nodes:
79
- next_level_nodes.add(obj)
72
+ nodes_to_query = []
73
+
74
+ for node in current_level_nodes:
75
+ if node in self.explored_nodes:
76
+ if node in self.graph:
77
+ for u, v, data in self.graph.edges(node, data=True):
78
+ pred = data.get("key")
79
+ neighbor = v if u == node else u
80
+
81
+ if neighbor not in self.excluded_nodes:
82
+ all_triplets.add((u, pred, v))
83
+ if neighbor not in visited_nodes:
84
+ next_level_nodes.add(neighbor)
85
+ else:
86
+ nodes_to_query.append(node)
87
+
88
+ if nodes_to_query:
89
+ for triplets in self.wrapper.get_neighborhood(nodes_to_query):
90
+ for subj, pred, obj in triplets:
91
+ self.graph.add_edge(subj, obj, key=pred)
92
+ if subj in self.excluded_nodes or obj in self.excluded_nodes:
93
+ continue
94
+
95
+ all_triplets.add((subj, pred, obj))
96
+
97
+ if subj not in visited_nodes:
98
+ next_level_nodes.add(subj)
99
+ if obj not in visited_nodes:
100
+ next_level_nodes.add(obj)
101
+
102
+ self.explored_nodes.update(nodes_to_query)
80
103
 
81
104
  if not next_level_nodes:
82
105
  logger.info("No more nodes to expand. Graph is fully explored.")
@@ -1,3 +1,4 @@
1
+ from graph_seeder.utils.URIManager import URIManager
1
2
  from graph_seeder.wrapper.NeighborhoodWrapper import NeighborhoodWrapper
2
3
  from graph_seeder.wrapper.sparql.GraphWrapper import GraphWrapper
3
4
  from graph_seeder.wrapper.sparql.client.SparqlClient import SparqlClient
@@ -47,7 +48,12 @@ class ComponentFactory:
47
48
  return wrapper_class(uri_manager, config)
48
49
 
49
50
  @staticmethod
50
- def create_strategy(wrapper, uri_manager, config: dict):
51
+ def create_strategy(
52
+ wrapper: NeighborhoodWrapper,
53
+ uri_manager: URIManager,
54
+ config: dict,
55
+ explored_nodes=None,
56
+ ):
51
57
  """Create an extraction strategy based on the configuration."""
52
58
  strategy_type = config.get("extraction", {}).get("strategy", "bfs").lower()
53
59
 
@@ -61,4 +67,9 @@ class ComponentFactory:
61
67
  f"Unknown strategy type: '{strategy_type}'. Valid options: {list(registered_strategies.keys())}"
62
68
  )
63
69
 
64
- return registered_strategies[strategy_type](wrapper, uri_manager, config)
70
+ return registered_strategies[strategy_type](
71
+ wrapper,
72
+ uri_manager,
73
+ config,
74
+ explored_nodes,
75
+ )