kgnode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kgnode/__init__.py ADDED
@@ -0,0 +1,60 @@
1
+ """
2
+ kgnode - Knowledge Graph Agnostic Node for Knowledge-Aware LLM Applications.
3
+
4
+ Public API for knowledge graph retrieval and answer generation.
5
+ """
6
+
7
+ # Main Pipeline APIs
8
+ from kgnode.seed_finder import citable, get_seed_nodes
9
+ from kgnode.subgraph_extraction import get_subgraphs
10
+ from kgnode.generator import (
11
+ generate_sparql,
12
+ kg_retrieve,
13
+ generate_answer,
14
+ generate_answer_using_subgraph,
15
+ )
16
+
17
+ # Validation
18
+ from kgnode.validator import validate_subgraph
19
+
20
+ # Search Operations
21
+ from kgnode.keyword_search import search_entities_by_keywords
22
+
23
+ # VectorDB Operations
24
+ from kgnode.chroma_db import (
25
+ compile_chromadb,
26
+ compile_chromadb_from_csv,
27
+ semantic_search_entities,
28
+ get_or_create_chromadb,
29
+ add_or_update_entities,
30
+ delete_entities,
31
+ )
32
+
33
+ # Core Configuration
34
+ from kgnode.core.kg_config import KGConfig
35
+ from kgnode.core.sparql_query import execute_sparql_query
36
+
37
+ __all__ = [
38
+ # Main Pipeline APIs
39
+ "citable",
40
+ "get_seed_nodes",
41
+ "get_subgraphs",
42
+ "generate_sparql",
43
+ "kg_retrieve",
44
+ "generate_answer",
45
+ "generate_answer_using_subgraph",
46
+ # Validation
47
+ "validate_subgraph",
48
+ # Search Operations
49
+ "search_entities_by_keywords",
50
+ # VectorDB Operations
51
+ "compile_chromadb",
52
+ "compile_chromadb_from_csv",
53
+ "semantic_search_entities",
54
+ "get_or_create_chromadb",
55
+ "add_or_update_entities",
56
+ "delete_entities",
57
+ # Core Configuration
58
+ "KGConfig",
59
+ "execute_sparql_query",
60
+ ]
@@ -0,0 +1,474 @@
1
+ from kgnode.core.sparql_query import execute_sparql_query
2
+ from typing import List, Dict, Callable, Optional, Any
3
+ from kgnode.core.kg_config import KGConfig
4
+
5
+
6
+ def _default_entity_descriptor_logic(entity_uri: str, triples: List[Dict[str, Any]]) -> str:
7
+ """Default DBLP-specific entity descriptor logic.
8
+
9
+ This function contains the hardcoded DBLP logic for creating entity descriptions.
10
+ It can be replaced by user-provided functions for other knowledge graphs.
11
+
12
+ Args:
13
+ entity_uri: URI of the entity (cleaned, no brackets).
14
+ triples: List of dicts with 'predicate' and 'object' keys.
15
+
16
+ Returns:
17
+ Natural language description optimized for search.
18
+ """
19
+ if not triples:
20
+ return _uri_to_label(entity_uri)
21
+
22
+ # Organize triples by priority
23
+ entity_type = None
24
+ title = None
25
+ authors = []
26
+ venue = None
27
+ year = None
28
+ affiliation = None
29
+ coauthors = []
30
+
31
+ for triple in triples:
32
+ predicate = triple['predicate']
33
+ obj = triple['object']
34
+ pred_label = _uri_to_label(predicate).lower()
35
+ obj_label = _uri_to_label(obj)
36
+
37
+ # Identify entity type
38
+ if 'type' in pred_label:
39
+ entity_type = obj_label.lower()
40
+
41
+ # Extract key fields based on predicate
42
+ elif 'title' in pred_label:
43
+ title = obj_label
44
+ elif 'authored by' in pred_label or 'author' in pred_label or 'creator' in pred_label:
45
+ authors.append(obj_label)
46
+ elif 'published in' in pred_label or 'venue' in pred_label or 'journal' in pred_label:
47
+ venue = obj_label
48
+ elif 'year' in pred_label:
49
+ year = obj_label
50
+ elif 'affiliation' in pred_label or 'organization' in pred_label:
51
+ affiliation = obj_label
52
+ elif 'coauthor' in pred_label or 'collaborate' in pred_label:
53
+ coauthors.append(obj_label)
54
+
55
+ # Get base entity label
56
+ entity_label = _uri_to_label(entity_uri)
57
+
58
+ # Build focused description based on entity type
59
+ description_parts = []
60
+
61
+ # Always start with the entity label/name
62
+ if entity_type == 'person' or entity_type == 'creator':
63
+ description_parts.append(f"Person: {entity_label}")
64
+ if affiliation:
65
+ description_parts.append(f"affiliated with {affiliation}")
66
+ if authors: # These are actually papers they authored
67
+ description_parts.append(f"author of {len(authors)} publications")
68
+ if coauthors:
69
+ coauthor_names = ", ".join(coauthors[:5])
70
+ description_parts.append(f"collaborates with {coauthor_names}")
71
+
72
+ elif entity_type in ['article', 'publication', 'inproceedings', 'informal']:
73
+ description_parts.append(f"Publication: {entity_label}")
74
+ if title:
75
+ description_parts.append(f"titled '{title}'")
76
+ if authors:
77
+ # Limit to first few authors for readability
78
+ author_names = ", ".join(authors[:10])
79
+ if len(authors) > 10:
80
+ author_names += f" and {len(authors) - 10} more"
81
+ description_parts.append(f"authored by {author_names}")
82
+ if venue:
83
+ description_parts.append(f"published in {venue}")
84
+ if year:
85
+ description_parts.append(f"in year {year}")
86
+
87
+ else:
88
+ # Generic fallback for other entity types
89
+ description_parts.append(f"{entity_type or 'Entity'}: {entity_label}")
90
+ if title:
91
+ description_parts.append(f"titled '{title}'")
92
+ if authors:
93
+ description_parts.append(f"associated with authors: {', '.join(authors[:5])}")
94
+ if venue:
95
+ description_parts.append(f"venue: {venue}")
96
+ if year:
97
+ description_parts.append(f"year: {year}")
98
+
99
+ # Join with proper punctuation
100
+ return ". ".join(description_parts) + "."
101
+
102
+
103
+ def _default_relation_descriptor_logic(relation_uri: str) -> str:
104
+ """Default relation descriptor logic (URI to label conversion).
105
+
106
+ Args:
107
+ relation_uri: URI of the relation/predicate.
108
+
109
+ Returns:
110
+ Human-readable label.
111
+ """
112
+ return _uri_to_label(relation_uri)
113
+
114
+
115
+ def create_entity_description(entity_uri: str, config: Optional[KGConfig] = None) -> str:
116
+ """
117
+ Create a focused natural language description optimized for semantic search.
118
+ Prioritizes key identifying information: names, titles, venues, years.
119
+
120
+ Note: This function uses default DBLP logic. For custom logic, use EntityDescriptorWrapper
121
+ with KGConfig.
122
+
123
+ Args:
124
+ entity_uri: URI of the entity (without angle brackets)
125
+ config: Optional KGConfig instance for configuration.
126
+ If None, uses default KGConfig with environment variables or built-in defaults.
127
+
128
+ Returns:
129
+ Natural language description optimized for search
130
+ """
131
+ # Initialize config if not provided
132
+ if config is None:
133
+ config = KGConfig.default()
134
+
135
+ # Remove angle brackets if present
136
+ entity_uri = entity_uri.strip()
137
+ if entity_uri.startswith('<') and entity_uri.endswith('>'):
138
+ entity_uri = entity_uri[1:-1]
139
+
140
+ query = f"""
141
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
142
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
143
+ PREFIX dblp: <https://dblp.org/rdf/schema#>
144
+ SELECT ?predicate ?object
145
+ WHERE {{
146
+ <{entity_uri}> ?predicate ?object .
147
+ }}
148
+ """
149
+
150
+ triples = execute_sparql_query(query, config=config)
151
+
152
+ return _default_entity_descriptor_logic(entity_uri, triples)
153
+
154
+ def create_entity_descriptions_batch(entity_uris: List[str], config: Optional[KGConfig] = None) -> Dict[str, str]:
155
+ """
156
+ Create focused descriptions for multiple entities in batch.
157
+ Optimized for semantic search of author names and paper titles.
158
+
159
+ Args:
160
+ entity_uris: List of entity URIs to describe.
161
+ config: Optional KGConfig instance for configuration.
162
+ If None, uses default KGConfig with environment variables or built-in defaults.
163
+
164
+ Returns:
165
+ Dictionary mapping entity URIs to their descriptions.
166
+ """
167
+ # Initialize config if not provided
168
+ if config is None:
169
+ config = KGConfig.default()
170
+
171
+ if not entity_uris:
172
+ return {}
173
+
174
+ # Clean URIs
175
+ cleaned_uris = []
176
+ for uri in entity_uris:
177
+ uri = uri.strip()
178
+ if not uri:
179
+ continue
180
+ if uri.startswith('<') and uri.endswith('>'):
181
+ uri = uri[1:-1]
182
+ cleaned_uris.append(uri)
183
+
184
+ if not cleaned_uris:
185
+ return {}
186
+
187
+ # Build VALUES clause
188
+ values_clause = " ".join([f"<{uri}>" for uri in cleaned_uris])
189
+
190
+ query = f"""
191
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
192
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
193
+ PREFIX dblp: <https://dblp.org/rdf/schema#>
194
+
195
+ SELECT ?entity ?predicate ?object
196
+ WHERE {{
197
+ VALUES ?entity {{ {values_clause} }}
198
+ ?entity ?predicate ?object .
199
+ }}
200
+ """
201
+
202
+ triples = execute_sparql_query(query, config=config)
203
+
204
+ # Group triples by entity
205
+ entity_triples = {}
206
+ for triple in triples:
207
+ entity = triple['entity']
208
+ if entity not in entity_triples:
209
+ entity_triples[entity] = []
210
+ entity_triples[entity].append(triple)
211
+
212
+ # Create descriptions using the same logic
213
+ descriptions = {}
214
+ for original_uri, cleaned_uri in zip(entity_uris, cleaned_uris):
215
+ if cleaned_uri not in entity_triples:
216
+ descriptions[original_uri] = _uri_to_label(original_uri)
217
+ else:
218
+ # Use similar logic as create_entity_description
219
+ entity_type = None
220
+ title = None
221
+ authors = []
222
+ venue = None
223
+ year = None
224
+ affiliation = None
225
+
226
+ for triple in entity_triples[cleaned_uri]:
227
+ pred_label = _uri_to_label(triple['predicate']).lower()
228
+ obj_label = _uri_to_label(triple['object'])
229
+
230
+ if 'type' in pred_label:
231
+ entity_type = obj_label.lower()
232
+ elif 'title' in pred_label:
233
+ title = obj_label
234
+ elif 'authored by' in pred_label or 'author' in pred_label or 'creator' in pred_label:
235
+ authors.append(obj_label)
236
+ elif 'published in' in pred_label or 'venue' in pred_label or 'journal' in pred_label:
237
+ venue = obj_label
238
+ elif 'year' in pred_label:
239
+ year = obj_label
240
+ elif 'affiliation' in pred_label:
241
+ affiliation = obj_label
242
+
243
+ entity_label = _uri_to_label(cleaned_uri)
244
+ description_parts = []
245
+
246
+ if entity_type == 'person' or entity_type == 'creator':
247
+ description_parts.append(f"Person: {entity_label}")
248
+ if affiliation:
249
+ description_parts.append(f"affiliated with {affiliation}")
250
+ if len(authors) > 0:
251
+ description_parts.append(f"author of {len(authors)} publications")
252
+
253
+ elif entity_type in ['article', 'publication', 'inproceedings', 'informal']:
254
+ description_parts.append(f"Publication: {entity_label}")
255
+ if title:
256
+ description_parts.append(f"titled '{title}'")
257
+ if authors:
258
+ author_names = ", ".join(authors[:10])
259
+ if len(authors) > 10:
260
+ author_names += f" and {len(authors) - 10} more"
261
+ description_parts.append(f"authored by {author_names}")
262
+ if venue:
263
+ description_parts.append(f"published in {venue}")
264
+ if year:
265
+ description_parts.append(f"in year {year}")
266
+ else:
267
+ description_parts.append(f"{entity_type or 'Entity'}: {entity_label}")
268
+
269
+ descriptions[original_uri] = ". ".join(description_parts) + "." if description_parts else entity_label
270
+
271
+ return descriptions
272
+
273
+ def create_relation_description(relation_uri: str) -> str:
274
+ """
275
+ Create natural language description for a relation.
276
+ Knowledge graph agnostic - converts URI to readable text.
277
+
278
+ Args:
279
+ relation_uri: URI of the relation/predicate
280
+
281
+ Returns:
282
+ Natural language description
283
+ """
284
+ return _uri_to_label(relation_uri)
285
+
286
+ def _uri_to_label(uri: str) -> str:
287
+ """
288
+ Convert URI to human-readable label.
289
+ Works for any knowledge graph by extracting and formatting the URI fragment.
290
+
291
+ Args:
292
+ uri: Full URI (e.g., "http://example.org/ontology#hasName" or "<http://example.org/ontology#hasName>")
293
+
294
+ Returns:
295
+ Human-readable label (e.g., "has name")
296
+ """
297
+ # Remove SPARQL brackets if present
298
+ uri = uri.strip('<>')
299
+
300
+ # Extract the fragment/local name from URI
301
+ if '#' in uri:
302
+ label = uri.split('#')[-1]
303
+ elif '/' in uri:
304
+ label = uri.split('/')[-1]
305
+ else:
306
+ label = uri
307
+
308
+ # Convert camelCase or PascalCase to spaces
309
+ # hasName -> has Name -> has name
310
+ import re
311
+ label = re.sub(r'([a-z])([A-Z])', r'\1 \2', label)
312
+
313
+ # Convert snake_case or kebab-case to spaces
314
+ label = label.replace('_', ' ').replace('-', ' ')
315
+
316
+ # Lowercase and clean up
317
+ label = label.lower().strip()
318
+
319
+ return label
320
+
321
+
322
+ class EntityDescriptorWrapper:
323
+ """Wrapper class that handles SPARQL queries and applies descriptor logic.
324
+
325
+ This class provides both single and batch entity description functionality,
326
+ using either user-provided descriptor logic or default DBLP logic.
327
+ """
328
+
329
+ def __init__(
330
+ self,
331
+ descriptor_function: Optional[Callable[[str, List[Dict[str, Any]]], str]] = None,
332
+ config: Optional[KGConfig] = None
333
+ ):
334
+ """Initialize the wrapper with descriptor function.
335
+
336
+ Args:
337
+ descriptor_function: Function that takes (entity_uri, triples) and returns
338
+ description string. If None, uses default DBLP logic.
339
+ config: Optional KGConfig instance for configuration.
340
+ If None, uses default KGConfig with environment variables or built-in defaults.
341
+ """
342
+ # Initialize config if not provided
343
+ if config is None:
344
+ from kgnode.core.kg_config import KGConfig
345
+ config = KGConfig.default()
346
+
347
+ self.descriptor_function = descriptor_function or _default_entity_descriptor_logic
348
+ self.config = config
349
+
350
+ def describe_single(self, entity_uri: str) -> str:
351
+ """Create description for a single entity.
352
+
353
+ Args:
354
+ entity_uri: URI of the entity (with or without angle brackets).
355
+
356
+ Returns:
357
+ Natural language description of the entity.
358
+ """
359
+ # Clean URI
360
+ entity_uri = entity_uri.strip()
361
+ if entity_uri.startswith('<') and entity_uri.endswith('>'):
362
+ entity_uri = entity_uri[1:-1]
363
+
364
+ # Query triples for this entity
365
+ query = f"""
366
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
367
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
368
+ PREFIX dblp: <https://dblp.org/rdf/schema#>
369
+ SELECT ?predicate ?object
370
+ WHERE {{
371
+ <{entity_uri}> ?predicate ?object .
372
+ }}
373
+ """
374
+
375
+ try:
376
+ triples = execute_sparql_query(query, config=self.config)
377
+ return self.descriptor_function(entity_uri, triples)
378
+ except Exception as e:
379
+ print(f"Warning: Error describing entity {entity_uri}: {e}")
380
+ return _uri_to_label(entity_uri)
381
+
382
+ def describe_batch(self, entity_uris: List[str]) -> Dict[str, str]:
383
+ """Create descriptions for multiple entities in batch (optimized).
384
+
385
+ Uses a single SPARQL query with VALUES clause to fetch all triples,
386
+ then applies descriptor logic to each entity. Batch size limited to
387
+ 80 entities to stay within 8KB SPARQL query size limit.
388
+
389
+ Args:
390
+ entity_uris: List of entity URIs (with or without angle brackets).
391
+
392
+ Returns:
393
+ Dictionary mapping original URIs to their descriptions.
394
+ """
395
+ if not entity_uris:
396
+ return {}
397
+
398
+ # Clean URIs
399
+ cleaned_uris = []
400
+ for uri in entity_uris:
401
+ uri = uri.strip()
402
+ if not uri:
403
+ continue
404
+ if uri.startswith('<') and uri.endswith('>'):
405
+ uri = uri[1:-1]
406
+ cleaned_uris.append(uri)
407
+
408
+ if not cleaned_uris:
409
+ return {}
410
+
411
+ # Build VALUES clause (limited to 80 URIs to stay within 8KB query limit)
412
+ if len(cleaned_uris) > 80:
413
+ print(f"Warning: Batch size {len(cleaned_uris)} exceeds limit of 80. "
414
+ f"Consider splitting into multiple batches.")
415
+ cleaned_uris = cleaned_uris[:80]
416
+
417
+ values_clause = " ".join([f"<{uri}>" for uri in cleaned_uris])
418
+
419
+ query = f"""
420
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
421
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
422
+ PREFIX dblp: <https://dblp.org/rdf/schema#>
423
+
424
+ SELECT ?entity ?predicate ?object
425
+ WHERE {{
426
+ VALUES ?entity {{ {values_clause} }}
427
+ ?entity ?predicate ?object .
428
+ }}
429
+ """
430
+
431
+ try:
432
+ triples = execute_sparql_query(query, config=self.config)
433
+ except Exception as e:
434
+ print(f"Warning: Error fetching triples for batch: {e}")
435
+ # Fallback to URI labels
436
+ return {uri: _uri_to_label(uri) for uri in entity_uris}
437
+
438
+ # Group triples by entity
439
+ entity_triples = {}
440
+ for triple in triples:
441
+ entity = triple['entity']
442
+ if entity not in entity_triples:
443
+ entity_triples[entity] = []
444
+ entity_triples[entity].append(triple)
445
+
446
+ # Create descriptions using descriptor function
447
+ descriptions = {}
448
+ for original_uri, cleaned_uri in zip(entity_uris[:len(cleaned_uris)], cleaned_uris):
449
+ if cleaned_uri not in entity_triples:
450
+ descriptions[original_uri] = _uri_to_label(original_uri)
451
+ else:
452
+ try:
453
+ descriptions[original_uri] = self.descriptor_function(
454
+ cleaned_uri,
455
+ entity_triples[cleaned_uri]
456
+ )
457
+ except Exception as e:
458
+ print(f"Warning: Error applying descriptor function to {cleaned_uri}: {e}")
459
+ descriptions[original_uri] = _uri_to_label(original_uri)
460
+
461
+ return descriptions
462
+
463
+
464
+ if __name__ == "__main__":
465
+ # print(create_entity_description("https://dblp.org/rdf/schema#Publication"))
466
+ print(create_entity_descriptions_batch(["<https://dblp.org/rec/journals/nature/RheinbayNAWSTHH20>", "https://dblp.org/rdf/schema#Person"]))
467
+ # print(create_relation_description("http://www.w3.org/2000/01/rdf-schema#comment"))
468
+
469
+ # Example usage:
470
+ # entities = ["http://example.org/entity1", "http://example.org/entity2", "http://example.org/entity3"]
471
+ # descriptions = create_entity_descriptions_batch(entities)
472
+ #
473
+ # for entity, desc in descriptions.items():
474
+ # print(f"{entity}: {desc}")
kgnode/_node_ranker.py ADDED
@@ -0,0 +1,138 @@
1
+ import csv
2
+ import os
3
+ from functools import lru_cache
4
+ from typing import List, Dict, Optional
5
+ import time
6
+ import threading
7
+ from kgnode.core.kg_config import KGConfig
8
+
9
+ from kgnode.core.sparql_query import execute_sparql_query
10
+
11
+ @lru_cache(maxsize=10)
12
+ def get_top_entities_by_degree(
13
+ limit: int = 1_000_000,
14
+ output_file: Optional[str] = None,
15
+ config: Optional[KGConfig] = None
16
+ ) -> List[Dict[str, str]]:
17
+ """
18
+ Get top N entities from knowledge graph sorted by degree (number of connections).
19
+ Saves results to CSV file.
20
+
21
+ Args:
22
+ limit (int): Number of top entities to retrieve. Default is 1,000,000.
23
+ output_file (str): Path to output CSV file. If None, defaults to ~/.kgnode/data/top_entities.csv.
24
+ Can be overridden via KGNODE_DATA_DIR environment variable.
25
+ config: Optional KGConfig instance for configuration.
26
+ If None, uses default KGConfig with environment variables or built-in defaults.
27
+
28
+ Returns:
29
+ List[Dict]: List of entities with their URIs and degrees.
30
+ Each dict has keys: 'entity', 'degree'
31
+ """
32
+ # Initialize config if not provided
33
+ if config is None:
34
+ config = KGConfig.default()
35
+
36
+ # Use default path if not provided
37
+ if output_file is None:
38
+ output_file = config.csv_path
39
+
40
+ # Ensure parent directory exists
41
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
42
+
43
+ print(f"Querying top {limit:,} entities by degree, For 1 million nodes KG it takes 7 seconds to run.")
44
+
45
+ sparql_query = f"""
46
+ SELECT ?entity (COUNT(?o) as ?degree)
47
+ WHERE {{
48
+ ?entity ?p ?o .
49
+ }}
50
+ GROUP BY ?entity
51
+ ORDER BY DESC(?degree)
52
+ LIMIT {limit}
53
+ """
54
+
55
+ # in+out
56
+ # SELECT ?entity (COUNT(?connection) as ?degree)
57
+ # WHERE {{
58
+ # {{ ?entity ?p ?o }}
59
+ # UNION
60
+ # {{ ?s ?p ?entity }}
61
+ # }}
62
+ # GROUP BY ?entity
63
+ # ORDER BY DESC(?degree)
64
+ # LIMIT {limit}
65
+
66
+ # indegree
67
+ # SELECT ?entity (COUNT(?s) as ?degree)
68
+ # WHERE {{
69
+ # ?s ?p ?entity .
70
+ # }}
71
+ # GROUP BY ?entity
72
+ # ORDER BY DESC(?degree)
73
+ # LIMIT {limit}
74
+
75
+ # outdegree
76
+ # SELECT ?entity (COUNT(?o) as ?degree)
77
+ # WHERE {{
78
+ # ?entity ?p ?o .
79
+ # }}
80
+ # GROUP BY ?entity
81
+ # ORDER BY DESC(?degree)
82
+ # LIMIT {limit}
83
+
84
+ # Spinner setup
85
+ spinner_chars = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
86
+ spinner_running = True
87
+
88
+ def spin():
89
+ i = 0
90
+ start = time.time()
91
+ while spinner_running:
92
+ elapsed = int(time.time() - start)
93
+ mins, secs = divmod(elapsed, 60)
94
+ print(f'\r{spinner_chars[i % len(spinner_chars)]} Querying... {mins:02d}:{secs:02d}', end='', flush=True)
95
+ i += 1
96
+ time.sleep(0.1)
97
+
98
+ # Start spinner
99
+ spinner_thread = threading.Thread(target=spin)
100
+ spinner_thread.start()
101
+
102
+ start_time = time.time()
103
+ results = execute_sparql_query(sparql_query, config=config)
104
+ query_time = time.time() - start_time
105
+
106
+ # Stop spinner
107
+ spinner_running = False
108
+ spinner_thread.join()
109
+
110
+ print(f"\r✓ Query completed in {query_time:.1f} seconds ({query_time / 60:.1f} minutes)")
111
+ print(f"✓ Retrieved {len(results):,} entities")
112
+ print(f"Saving to {output_file}...")
113
+
114
+ # Save to CSV
115
+ with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
116
+ fieldnames = ['entity', 'degree']
117
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
118
+ writer.writeheader()
119
+
120
+ for idx, row in enumerate(results):
121
+ writer.writerow({
122
+ 'entity': row['entity'],
123
+ 'degree': row['degree']
124
+ })
125
+
126
+ if (idx + 1) % 100_000 == 0:
127
+ print(f" Written {idx + 1:,} rows...")
128
+
129
+ total_time = time.time() - start_time
130
+ print(f"✓ Done! Saved {len(results):,} entities to {output_file}")
131
+ print(f"Total time: {total_time:.1f} seconds")
132
+
133
+ return results
134
+
135
+
136
+ if __name__ == "__main__":
137
+ entities = get_top_entities_by_degree(limit=10000)
138
+ print(entities)