scitex 2.4.1__py3-none-any.whl → 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. scitex/__version__.py +1 -1
  2. scitex/browser/__init__.py +53 -0
  3. scitex/browser/auth/__init__.py +35 -0
  4. scitex/browser/auth/google.py +381 -0
  5. scitex/browser/collaboration/__init__.py +5 -0
  6. scitex/browser/debugging/__init__.py +56 -0
  7. scitex/browser/debugging/_failure_capture.py +372 -0
  8. scitex/browser/debugging/_sync_session.py +259 -0
  9. scitex/browser/debugging/_test_monitor.py +284 -0
  10. scitex/browser/debugging/_visual_cursor.py +432 -0
  11. scitex/scholar/citation_graph/README.md +117 -0
  12. scitex/scholar/citation_graph/__init__.py +29 -0
  13. scitex/scholar/citation_graph/builder.py +214 -0
  14. scitex/scholar/citation_graph/database.py +246 -0
  15. scitex/scholar/citation_graph/example.py +96 -0
  16. scitex/scholar/citation_graph/models.py +80 -0
  17. scitex/scholar/config/ScholarConfig.py +23 -3
  18. scitex/scholar/config/default.yaml +56 -0
  19. scitex/scholar/core/Paper.py +102 -0
  20. scitex/scholar/core/__init__.py +44 -0
  21. scitex/scholar/core/journal_normalizer.py +524 -0
  22. scitex/scholar/core/oa_cache.py +285 -0
  23. scitex/scholar/core/open_access.py +457 -0
  24. scitex/scholar/metadata_engines/ScholarEngine.py +9 -1
  25. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +82 -21
  26. scitex/scholar/pdf_download/ScholarPDFDownloader.py +137 -0
  27. scitex/scholar/pdf_download/strategies/__init__.py +6 -0
  28. scitex/scholar/pdf_download/strategies/open_access_download.py +186 -0
  29. scitex/scholar/pipelines/ScholarPipelineSearchParallel.py +27 -9
  30. scitex/scholar/pipelines/ScholarPipelineSearchSingle.py +24 -8
  31. scitex/scholar/search_engines/ScholarSearchEngine.py +6 -1
  32. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/METADATA +1 -1
  33. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/RECORD +36 -20
  34. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/WHEEL +0 -0
  35. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/entry_points.txt +0 -0
  36. {scitex-2.4.1.dist-info → scitex-2.4.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,117 @@
1
+ # Citation Graph Module
2
+
3
+ Build and analyze citation networks for academic papers using CrossRef data.
4
+
5
+ ## Features
6
+
7
+ - **Citation extraction**: Forward and reverse citation lookups
8
+ - **Similarity metrics**: Co-citation and bibliographic coupling analysis
9
+ - **Network building**: Construct graphs of related papers
10
+ - **Export formats**: JSON for D3.js, vis.js, Cytoscape
11
+
12
+ ## Quick Start
13
+
14
+ ```python
15
+ from scitex.scholar.citation_graph import CitationGraphBuilder
16
+
17
+ # Initialize with CrossRef database
18
+ builder = CitationGraphBuilder("/path/to/crossref.db")
19
+
20
+ # Build citation network for a paper
21
+ graph = builder.build("10.1038/s41586-020-2008-3", top_n=20)
22
+
23
+ # Export for visualization
24
+ builder.export_json(graph, "network.json")
25
+
26
+ # Get paper summary
27
+ summary = builder.get_paper_summary("10.1038/s41586-020-2008-3")
28
+ ```
29
+
30
+ ## Architecture
31
+
32
+ ```
33
+ citation_graph/
34
+ ├── __init__.py # Package exports
35
+ ├── builder.py # CitationGraphBuilder (main interface)
36
+ ├── database.py # Database queries and connection management
37
+ ├── models.py # Data models (PaperNode, CitationEdge, CitationGraph)
38
+ └── README.md # This file
39
+ ```
40
+
41
+ ## Similarity Metrics
42
+
43
+ ### 1. Co-citation
44
+ Papers are related if they are frequently cited together.
45
+ - **Algorithm**: Find papers that appear together in reference lists
46
+ - **Weight**: 2.0 (default)
47
+ - **Use case**: Find foundational/seminal works in the same field
48
+
49
+ ### 2. Bibliographic Coupling
50
+ Papers are related if they cite similar references.
51
+ - **Algorithm**: Count shared references between papers
52
+ - **Weight**: 2.0 (default)
53
+ - **Use case**: Find papers addressing similar problems/methods
54
+
55
+ ### 3. Direct Citations
56
+ Papers directly citing or cited by the seed paper.
57
+ - **Weight**: 1.0 (default)
58
+ - **Use case**: Find immediately related work
59
+
60
+ ## Performance
61
+
62
+ Based on experiments with 47M+ citations:
63
+
64
+ | Operation | Time | Status |
65
+ |-----------|------|--------|
66
+ | Forward citations | 0.1ms | ⚡ Excellent |
67
+ | Reverse citations | 3.3s | ✓ Good |
68
+ | Co-citation | 3.2s | ✓ Good |
69
+ | Bibliographic coupling | 25s | ⚠️ Needs optimization |
70
+ | **Full network build** | **~30s** | ✓ Acceptable |
71
+
72
+ ## Database Schema
73
+
74
+ Requires CrossRef database with:
75
+ - `works` table: Paper metadata
76
+ - `citations` table: Citation relationships (citing_doi, cited_doi, citing_year)
77
+
78
+ ## Example Output
79
+
80
+ ```json
81
+ {
82
+ "seed": "10.1038/s41586-020-2008-3",
83
+ "nodes": [
84
+ {
85
+ "id": "10.1038/s41586-020-2008-3",
86
+ "title": "A Randomized Controlled Trial...",
87
+ "year": 2020,
88
+ "authors": ["Smith J", "Jones A"],
89
+ "journal": "Nature",
90
+ "similarity_score": 100.0
91
+ },
92
+ ...
93
+ ],
94
+ "edges": [
95
+ {
96
+ "source": "10.1038/s41586-020-2008-3",
97
+ "target": "10.1016/j.cell.2019.11.025",
98
+ "type": "cites"
99
+ },
100
+ ...
101
+ ]
102
+ }
103
+ ```
104
+
105
+ ## Future Enhancements
106
+
107
+ - [ ] Redis caching for popular papers
108
+ - [ ] Async/parallel query execution
109
+ - [ ] Additional similarity metrics (topic modeling, author networks)
110
+ - [ ] GraphQL API
111
+ - [ ] Real-time updates
112
+
113
+ ## References
114
+
115
+ - Co-citation: Small, H. (1973). Co-citation in the scientific literature. *J. Am. Soc. Inf. Sci.*
116
+ - Bibliographic coupling: Kessler, M. M. (1963). Bibliographic coupling. *American Documentation*
117
+ - Connected Papers (inspiration): https://www.connectedpapers.com/
@@ -0,0 +1,29 @@
1
+ """
2
+ Citation Graph Module
3
+
4
+ Build and analyze citation networks for academic papers using CrossRef data.
5
+
6
+ This module provides tools to:
7
+ - Extract citation relationships
8
+ - Calculate paper similarity (co-citation, bibliographic coupling)
9
+ - Build citation network graphs
10
+ - Export for visualization (D3.js, vis.js, Cytoscape)
11
+
12
+ Example:
13
+ >>> from scitex.scholar.citation_graph import CitationGraphBuilder
14
+ >>>
15
+ >>> builder = CitationGraphBuilder(db_path="/path/to/crossref.db")
16
+ >>> graph = builder.build("10.1038/s41586-020-2008-3", top_n=20)
17
+ >>> builder.export_json(graph, "network.json")
18
+ """
19
+
20
+ from .builder import CitationGraphBuilder
21
+ from .models import PaperNode, CitationEdge, CitationGraph
22
+
23
+ __version__ = "0.1.0"
24
+ __all__ = [
25
+ "CitationGraphBuilder",
26
+ "PaperNode",
27
+ "CitationEdge",
28
+ "CitationGraph",
29
+ ]
@@ -0,0 +1,214 @@
1
+ """
2
+ Citation Graph Builder
3
+
4
+ Main interface for building citation networks from CrossRef data.
5
+ """
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Optional, List
10
+ from collections import Counter
11
+
12
+ from .database import CitationDatabase
13
+ from .models import PaperNode, CitationEdge, CitationGraph
14
+
15
+
16
+ class CitationGraphBuilder:
17
+ """
18
+ Build citation network graphs for academic papers.
19
+
20
+ Example:
21
+ >>> builder = CitationGraphBuilder("/path/to/crossref.db")
22
+ >>> graph = builder.build("10.1038/s41586-020-2008-3", top_n=20)
23
+ >>> builder.export_json(graph, "network.json")
24
+ """
25
+
26
+ def __init__(self, db_path: str):
27
+ """
28
+ Initialize builder with database path.
29
+
30
+ Args:
31
+ db_path: Path to CrossRef SQLite database
32
+ """
33
+ self.db_path = db_path
34
+ self.db = CitationDatabase(db_path)
35
+
36
+ def build(
37
+ self,
38
+ seed_doi: str,
39
+ top_n: int = 20,
40
+ weight_coupling: float = 2.0,
41
+ weight_cocitation: float = 2.0,
42
+ weight_direct: float = 1.0,
43
+ ) -> CitationGraph:
44
+ """
45
+ Build citation network around a seed paper.
46
+
47
+ Args:
48
+ seed_doi: DOI of the seed paper
49
+ top_n: Number of most similar papers to include
50
+ weight_coupling: Weight for bibliographic coupling
51
+ weight_cocitation: Weight for co-citation
52
+ weight_direct: Weight for direct citations
53
+
54
+ Returns:
55
+ CitationGraph object with nodes and edges
56
+ """
57
+ with self.db:
58
+ # Calculate similarity scores
59
+ scores = self.db.get_combined_similarity_scores(
60
+ seed_doi,
61
+ weight_coupling=weight_coupling,
62
+ weight_cocitation=weight_cocitation,
63
+ weight_direct=weight_direct,
64
+ )
65
+
66
+ # Get top N most similar papers
67
+ top_dois = [seed_doi] + [doi for doi, _ in scores.most_common(top_n)]
68
+
69
+ # Build nodes with metadata
70
+ nodes = []
71
+ for doi in top_dois:
72
+ node = self._create_paper_node(doi, scores.get(doi, 100.0))
73
+ nodes.append(node)
74
+
75
+ # Build edges (citations between papers in network)
76
+ edges = self._build_citation_edges(top_dois)
77
+
78
+ # Create graph
79
+ graph = CitationGraph(
80
+ seed_doi=seed_doi,
81
+ nodes=nodes,
82
+ edges=edges,
83
+ metadata={
84
+ "top_n": top_n,
85
+ "weights": {
86
+ "coupling": weight_coupling,
87
+ "cocitation": weight_cocitation,
88
+ "direct": weight_direct,
89
+ },
90
+ },
91
+ )
92
+
93
+ return graph
94
+
95
+ def _create_paper_node(
96
+ self, doi: str, similarity_score: float
97
+ ) -> PaperNode:
98
+ """
99
+ Create a PaperNode with metadata from database.
100
+
101
+ Args:
102
+ doi: DOI of the paper
103
+ similarity_score: Calculated similarity score
104
+
105
+ Returns:
106
+ PaperNode object
107
+ """
108
+ metadata = self.db.get_paper_metadata(doi)
109
+
110
+ if metadata:
111
+ # Extract author names
112
+ authors = metadata.get("author", [])
113
+ author_names = [
114
+ f"{a.get('family', '')} {a.get('given', '')[:1]}"
115
+ for a in authors[:3]
116
+ ]
117
+
118
+ # Extract year
119
+ year = 0
120
+ if "published" in metadata and "date-parts" in metadata["published"]:
121
+ date_parts = metadata["published"]["date-parts"]
122
+ if date_parts and date_parts[0]:
123
+ year = date_parts[0][0] if date_parts[0][0] else 0
124
+
125
+ # Extract journal
126
+ journal = ""
127
+ if "container-title" in metadata and metadata["container-title"]:
128
+ journal = metadata["container-title"][0]
129
+
130
+ return PaperNode(
131
+ doi=doi,
132
+ title=metadata.get("title", ["Unknown"])[0][:200],
133
+ year=year,
134
+ authors=author_names,
135
+ journal=journal,
136
+ similarity_score=similarity_score,
137
+ )
138
+ else:
139
+ return PaperNode(doi=doi, similarity_score=similarity_score)
140
+
141
+ def _build_citation_edges(self, dois: List[str]) -> List[CitationEdge]:
142
+ """
143
+ Build citation edges between papers in the network.
144
+
145
+ Args:
146
+ dois: List of DOIs in the network
147
+
148
+ Returns:
149
+ List of CitationEdge objects
150
+ """
151
+ edges = []
152
+ doi_set = set(d.lower() for d in dois)
153
+
154
+ for doi in dois:
155
+ # Get references (papers this one cites)
156
+ refs = self.db.get_references(doi, limit=100)
157
+
158
+ for cited_doi in refs:
159
+ if cited_doi in doi_set:
160
+ edges.append(
161
+ CitationEdge(
162
+ source=doi,
163
+ target=cited_doi,
164
+ edge_type="cites",
165
+ )
166
+ )
167
+
168
+ return edges
169
+
170
+ def export_json(self, graph: CitationGraph, output_path: str):
171
+ """
172
+ Export graph to JSON file for visualization.
173
+
174
+ Args:
175
+ graph: CitationGraph to export
176
+ output_path: Path to output JSON file
177
+ """
178
+ output = Path(output_path)
179
+ with open(output, "w") as f:
180
+ json.dump(graph.to_dict(), f, indent=2)
181
+
182
+ def get_paper_summary(self, doi: str) -> Optional[dict]:
183
+ """
184
+ Get summary information for a paper.
185
+
186
+ Args:
187
+ doi: DOI of the paper
188
+
189
+ Returns:
190
+ Dictionary with paper summary
191
+ """
192
+ with self.db:
193
+ metadata = self.db.get_paper_metadata(doi)
194
+
195
+ if not metadata:
196
+ return None
197
+
198
+ # Get citation counts
199
+ refs = self.db.get_references(doi, limit=1000)
200
+ citations = self.db.get_citations(doi, limit=1000)
201
+
202
+ return {
203
+ "doi": doi,
204
+ "title": metadata.get("title", ["Unknown"])[0],
205
+ "year": metadata.get("published", {})
206
+ .get("date-parts", [[0]])[0][0],
207
+ "authors": [
208
+ f"{a.get('family', '')} {a.get('given', '')}"
209
+ for a in metadata.get("author", [])[:5]
210
+ ],
211
+ "journal": metadata.get("container-title", ["Unknown"])[0],
212
+ "reference_count": len(refs),
213
+ "citation_count": len(citations),
214
+ }
@@ -0,0 +1,246 @@
1
+ """
2
+ Database access layer for citation graph queries.
3
+
4
+ Handles all SQL queries to the CrossRef SQLite database with
5
+ optimized queries and connection management.
6
+ """
7
+
8
+ import sqlite3
9
+ import json
10
+ from pathlib import Path
11
+ from typing import List, Tuple, Dict, Optional
12
+ from collections import Counter
13
+
14
+
15
+ class CitationDatabase:
16
+ """
17
+ Database interface for citation graph operations.
18
+
19
+ Provides optimized queries for:
20
+ - Citation extraction (forward/reverse)
21
+ - Co-citation analysis
22
+ - Bibliographic coupling
23
+ - Paper metadata lookup
24
+ """
25
+
26
+ def __init__(self, db_path: str):
27
+ """
28
+ Initialize database connection.
29
+
30
+ Args:
31
+ db_path: Path to CrossRef SQLite database
32
+ """
33
+ self.db_path = Path(db_path)
34
+ if not self.db_path.exists():
35
+ raise FileNotFoundError(f"Database not found: {db_path}")
36
+
37
+ self.conn = None
38
+
39
+ def connect(self, read_only: bool = True):
40
+ """
41
+ Connect to database.
42
+
43
+ Args:
44
+ read_only: If True, open in read-only mode (default)
45
+ """
46
+ if read_only:
47
+ self.conn = sqlite3.connect(
48
+ f"file:{self.db_path}?mode=ro",
49
+ uri=True,
50
+ check_same_thread=False # Allow multi-threaded access (e.g., Django)
51
+ )
52
+ else:
53
+ self.conn = sqlite3.connect(
54
+ self.db_path,
55
+ check_same_thread=False
56
+ )
57
+
58
+ self.conn.row_factory = sqlite3.Row
59
+
60
+ def close(self):
61
+ """Close database connection."""
62
+ if self.conn:
63
+ self.conn.close()
64
+ self.conn = None
65
+
66
+ def __enter__(self):
67
+ """Context manager entry."""
68
+ self.connect()
69
+ return self
70
+
71
+ def __exit__(self, exc_type, exc_val, exc_tb):
72
+ """Context manager exit."""
73
+ self.close()
74
+
75
+ def get_references(self, doi: str, limit: int = 100) -> List[str]:
76
+ """
77
+ Get papers cited by this DOI (forward citations).
78
+
79
+ Args:
80
+ doi: DOI of the paper
81
+ limit: Maximum number of references to return
82
+
83
+ Returns:
84
+ List of DOIs cited by the paper
85
+ """
86
+ cursor = self.conn.execute(
87
+ """
88
+ SELECT cited_doi
89
+ FROM citations
90
+ WHERE citing_doi = ?
91
+ LIMIT ?
92
+ """,
93
+ (doi.lower(), limit),
94
+ )
95
+ return [row[0] for row in cursor]
96
+
97
+ def get_citations(self, doi: str, limit: int = 100) -> List[Tuple[str, int]]:
98
+ """
99
+ Get papers that cite this DOI (reverse citations).
100
+
101
+ Args:
102
+ doi: DOI of the paper
103
+ limit: Maximum number of citations to return
104
+
105
+ Returns:
106
+ List of (citing_doi, year) tuples
107
+ """
108
+ cursor = self.conn.execute(
109
+ """
110
+ SELECT citing_doi, citing_year
111
+ FROM citations
112
+ WHERE cited_doi = ?
113
+ ORDER BY citing_year DESC
114
+ LIMIT ?
115
+ """,
116
+ (doi.lower(), limit),
117
+ )
118
+ return [(row[0], row[1]) for row in cursor]
119
+
120
+ def get_cocited_papers(
121
+ self, doi: str, limit: int = 50
122
+ ) -> List[Tuple[str, int]]:
123
+ """
124
+ Find papers co-cited with this DOI.
125
+
126
+ Papers are co-cited if they appear together in reference lists.
127
+
128
+ Args:
129
+ doi: DOI of the paper
130
+ limit: Maximum number of results
131
+
132
+ Returns:
133
+ List of (cocited_doi, cocitation_count) tuples
134
+ """
135
+ cursor = self.conn.execute(
136
+ """
137
+ SELECT c2.cited_doi, COUNT(*) as cocitation_count
138
+ FROM citations c1
139
+ JOIN citations c2 ON c1.citing_doi = c2.citing_doi
140
+ WHERE c1.cited_doi = ?
141
+ AND c2.cited_doi != ?
142
+ GROUP BY c2.cited_doi
143
+ ORDER BY cocitation_count DESC
144
+ LIMIT ?
145
+ """,
146
+ (doi.lower(), doi.lower(), limit),
147
+ )
148
+ return [(row[0], row[1]) for row in cursor]
149
+
150
+ def get_bibliographic_coupled_papers(
151
+ self, doi: str, limit: int = 50
152
+ ) -> List[Tuple[str, int]]:
153
+ """
154
+ Find papers with similar references (bibliographic coupling).
155
+
156
+ Papers are bibliographically coupled if they cite the same references.
157
+
158
+ Args:
159
+ doi: DOI of the paper
160
+ limit: Maximum number of results
161
+
162
+ Returns:
163
+ List of (coupled_doi, shared_references_count) tuples
164
+ """
165
+ cursor = self.conn.execute(
166
+ """
167
+ SELECT c2.citing_doi, COUNT(*) as shared_refs
168
+ FROM citations c1
169
+ JOIN citations c2 ON c1.cited_doi = c2.cited_doi
170
+ WHERE c1.citing_doi = ?
171
+ AND c2.citing_doi != ?
172
+ GROUP BY c2.citing_doi
173
+ ORDER BY shared_refs DESC
174
+ LIMIT ?
175
+ """,
176
+ (doi.lower(), doi.lower(), limit),
177
+ )
178
+ return [(row[0], row[1]) for row in cursor]
179
+
180
+ def get_paper_metadata(self, doi: str) -> Optional[Dict]:
181
+ """
182
+ Get metadata for a paper from works table.
183
+
184
+ Args:
185
+ doi: DOI of the paper
186
+
187
+ Returns:
188
+ Dictionary with paper metadata, or None if not found
189
+ """
190
+ cursor = self.conn.execute(
191
+ "SELECT metadata FROM works WHERE doi = ?", (doi,)
192
+ )
193
+ row = cursor.fetchone()
194
+
195
+ if row:
196
+ return json.loads(row[0])
197
+ return None
198
+
199
+ def get_combined_similarity_scores(
200
+ self,
201
+ seed_doi: str,
202
+ weight_coupling: float = 2.0,
203
+ weight_cocitation: float = 2.0,
204
+ weight_direct: float = 1.0,
205
+ max_papers: int = 100,
206
+ ) -> Counter:
207
+ """
208
+ Calculate combined similarity scores using multiple metrics.
209
+
210
+ Combines:
211
+ - Bibliographic coupling (shared references)
212
+ - Co-citation (cited together)
213
+ - Direct citations (cites or is cited by)
214
+
215
+ Args:
216
+ seed_doi: DOI of the seed paper
217
+ weight_coupling: Weight for bibliographic coupling score
218
+ weight_cocitation: Weight for co-citation score
219
+ weight_direct: Weight for direct citation score
220
+ max_papers: Maximum papers to consider per metric
221
+
222
+ Returns:
223
+ Counter with {doi: combined_score}
224
+ """
225
+ scores = Counter()
226
+
227
+ # 1. Bibliographic coupling
228
+ coupled = self.get_bibliographic_coupled_papers(seed_doi, limit=max_papers)
229
+ for doi, count in coupled:
230
+ scores[doi] += count * weight_coupling
231
+
232
+ # 2. Co-citation
233
+ cocited = self.get_cocited_papers(seed_doi, limit=max_papers)
234
+ for doi, count in cocited:
235
+ scores[doi] += count * weight_cocitation
236
+
237
+ # 3. Direct citations
238
+ refs = self.get_references(seed_doi, limit=50)
239
+ for doi in refs:
240
+ scores[doi] += weight_direct
241
+
242
+ citations = self.get_citations(seed_doi, limit=50)
243
+ for doi, _ in citations:
244
+ scores[doi] += weight_direct
245
+
246
+ return scores
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Example usage of the citation_graph module.
4
+
5
+ Run this from the scitex-code root:
6
+ python -m scitex.scholar.citation_graph.example
7
+ """
8
+
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ # Add parent directory to path for imports
13
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
14
+
15
+ from scitex.scholar.citation_graph import CitationGraphBuilder
16
+
17
+
18
+ def main():
19
+ # Database path (adjust to your setup)
20
+ db_path = Path.home() / "proj/crossref_local/data/crossref.db"
21
+
22
+ if not db_path.exists():
23
+ print(f"❌ Database not found: {db_path}")
24
+ print("Please update the db_path in this script.")
25
+ return 1
26
+
27
+ print("="*70)
28
+ print(" Citation Graph Example")
29
+ print("="*70)
30
+ print(f"\nDatabase: {db_path}")
31
+
32
+ # Initialize builder
33
+ builder = CitationGraphBuilder(str(db_path))
34
+
35
+ # Example DOI (a well-cited paper)
36
+ seed_doi = "10.1001/2013.jamapsychiatry.4"
37
+
38
+ # Get paper summary
39
+ print(f"\n1. Getting paper summary for {seed_doi}...")
40
+ summary = builder.get_paper_summary(seed_doi)
41
+
42
+ if summary:
43
+ print(f"\nPaper: {summary['title']}")
44
+ print(f"Authors: {', '.join(summary['authors'][:3])}")
45
+ print(f"Year: {summary['year']}")
46
+ print(f"Journal: {summary['journal']}")
47
+ print(f"References: {summary['reference_count']}")
48
+ print(f"Citations: {summary['citation_count']}")
49
+ else:
50
+ print("Paper not found in database")
51
+ return 1
52
+
53
+ # Build citation network
54
+ print(f"\n2. Building citation network (top 20 papers)...")
55
+ graph = builder.build(seed_doi, top_n=20)
56
+
57
+ print(f"\nNetwork built:")
58
+ print(f" Nodes: {graph.node_count}")
59
+ print(f" Edges: {graph.edge_count}")
60
+
61
+ # Show top papers by similarity
62
+ print(f"\nTop 10 most similar papers:")
63
+ print(f"{'Rank':<5} {'Score':<7} {'Year':<6} {'Title':<60}")
64
+ print("-"*85)
65
+
66
+ sorted_nodes = sorted(
67
+ graph.nodes,
68
+ key=lambda n: n.similarity_score,
69
+ reverse=True
70
+ )
71
+
72
+ for i, node in enumerate(sorted_nodes[:11], 1):
73
+ if node.doi.lower() == seed_doi.lower():
74
+ continue
75
+ print(
76
+ f"{i:<5} {node.similarity_score:<7.1f} "
77
+ f"{node.year:<6} {node.title[:60]:<60}"
78
+ )
79
+
80
+ # Export to JSON
81
+ output_path = Path(__file__).parent / "example_output.json"
82
+ builder.export_json(graph, str(output_path))
83
+ print(f"\n3. Network exported to: {output_path}")
84
+ print(f" File size: {output_path.stat().st_size / 1024:.1f} KB")
85
+
86
+ print("\n✅ Example complete!")
87
+ print("\nNext steps:")
88
+ print(" - Open example_output.json to see the graph data")
89
+ print(" - Use this JSON with D3.js, vis.js, or Cytoscape for visualization")
90
+ print(" - Integrate with scitex-cloud for API endpoints")
91
+
92
+ return 0
93
+
94
+
95
+ if __name__ == "__main__":
96
+ sys.exit(main())