scholarimpact 0.0.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scholarimpact/__init__.py +80 -0
- scholarimpact/_version.py +21 -0
- scholarimpact/assets/README.md +36 -0
- scholarimpact/assets/__init__.py +136 -0
- scholarimpact/assets/fonts/OFL-SpaceGrotesk.txt +93 -0
- scholarimpact/assets/fonts/OFL-SpaceMono.txt +93 -0
- scholarimpact/assets/fonts/SpaceGrotesk-SemiBold.ttf +0 -0
- scholarimpact/assets/fonts/SpaceGrotesk-VariableFont_wght.ttf +0 -0
- scholarimpact/assets/fonts/SpaceMono-Bold.ttf +0 -0
- scholarimpact/assets/fonts/SpaceMono-BoldItalic.ttf +0 -0
- scholarimpact/assets/fonts/SpaceMono-Italic.ttf +0 -0
- scholarimpact/assets/fonts/SpaceMono-Regular.ttf +0 -0
- scholarimpact/assets/streamlit/config.toml +55 -0
- scholarimpact/cli/__init__.py +5 -0
- scholarimpact/cli/commands/__init__.py +5 -0
- scholarimpact/cli/commands/crawl.py +115 -0
- scholarimpact/cli/commands/dashboard.py +49 -0
- scholarimpact/cli/commands/extract.py +45 -0
- scholarimpact/cli/commands/generate.py +126 -0
- scholarimpact/cli/main.py +78 -0
- scholarimpact/core/__init__.py +6 -0
- scholarimpact/core/crawler.py +1455 -0
- scholarimpact/core/extractor.py +173 -0
- scholarimpact/core/utils.py +429 -0
- scholarimpact/dashboard/__init__.py +6 -0
- scholarimpact/dashboard/app.py +298 -0
- scholarimpact/dashboard/components/__init__.py +17 -0
- scholarimpact/dashboard/components/base.py +121 -0
- scholarimpact/dashboard/components/config.py +142 -0
- scholarimpact/dashboard/components/layout.py +178 -0
- scholarimpact/dashboard/components/streamlit_app.py +1280 -0
- scholarimpact-0.0.1.dev1.dist-info/METADATA +436 -0
- scholarimpact-0.0.1.dev1.dist-info/RECORD +37 -0
- scholarimpact-0.0.1.dev1.dist-info/WHEEL +5 -0
- scholarimpact-0.0.1.dev1.dist-info/entry_points.txt +2 -0
- scholarimpact-0.0.1.dev1.dist-info/licenses/LICENSE +21 -0
- scholarimpact-0.0.1.dev1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ScholarImpact: Citation Analysis and Dashboard Package
|
|
3
|
+
|
|
4
|
+
A comprehensive tool for analyzing Google Scholar citations with geographic
|
|
5
|
+
and institutional insights, featuring interactive Streamlit dashboards.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .core.crawler import CitationCrawler
|
|
9
|
+
from .core.extractor import AuthorExtractor
|
|
10
|
+
from .dashboard.app import Dashboard
|
|
11
|
+
from .data.loader import load_data
|
|
12
|
+
|
|
13
|
+
# Version handling with setuptools-scm
|
|
14
|
+
try:
|
|
15
|
+
from ._version import version as __version__
|
|
16
|
+
from ._version import version_tuple
|
|
17
|
+
except ImportError:
|
|
18
|
+
# Fallback for development or when _version.py doesn't exist
|
|
19
|
+
__version__ = "0.0.0+unknown"
|
|
20
|
+
version_tuple = (0, 0, 0, "unknown", "unknown")
|
|
21
|
+
|
|
22
|
+
__author__ = "Abhishek Tiwari"
|
|
23
|
+
__email__ = "schoscholarimpact@abhishek-tiwari.com"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Main convenience functions
|
|
27
|
+
def extract_author(scholar_id, **kwargs):
|
|
28
|
+
"""Extract author publications from Google Scholar."""
|
|
29
|
+
extractor = AuthorExtractor()
|
|
30
|
+
return extractor.extract(scholar_id, **kwargs)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def crawl_citations(url_or_data, **kwargs):
|
|
34
|
+
"""Crawl citations from Google Scholar."""
|
|
35
|
+
crawler = CitationCrawler(**kwargs)
|
|
36
|
+
return crawler.crawl_all_citations(url_or_data)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def create_dashboard(data_dir, **kwargs):
|
|
40
|
+
"""Create a Streamlit dashboard for citation analysis."""
|
|
41
|
+
return Dashboard(data_dir=data_dir, **kwargs)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Quick start function
|
|
45
|
+
def quick_analysis(scholar_id, openalex_email=None, launch_dashboard=True, data_dir="./data"):
|
|
46
|
+
"""Complete analysis pipeline from Scholar ID to dashboard."""
|
|
47
|
+
import os
|
|
48
|
+
|
|
49
|
+
# Set default data directory
|
|
50
|
+
os.makedirs(data_dir, exist_ok=True)
|
|
51
|
+
|
|
52
|
+
# Extract author data
|
|
53
|
+
print(f"Extracting author data for {scholar_id}...")
|
|
54
|
+
author_data = extract_author(scholar_id, output_dir=data_dir)
|
|
55
|
+
|
|
56
|
+
# Crawl citations
|
|
57
|
+
print("Crawling citations...")
|
|
58
|
+
citation_data = crawl_citations(f"{data_dir}/author.json", openalex_email=openalex_email)
|
|
59
|
+
|
|
60
|
+
# Create and optionally launch dashboard
|
|
61
|
+
print("Creating dashboard...")
|
|
62
|
+
dashboard = create_dashboard(data_dir)
|
|
63
|
+
|
|
64
|
+
if launch_dashboard:
|
|
65
|
+
print("Launching dashboard...")
|
|
66
|
+
dashboard.run()
|
|
67
|
+
|
|
68
|
+
return {"author": author_data, "citations": citation_data, "dashboard": dashboard}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
__all__ = [
|
|
72
|
+
"CitationCrawler",
|
|
73
|
+
"AuthorExtractor",
|
|
74
|
+
"Dashboard",
|
|
75
|
+
"load_data",
|
|
76
|
+
"extract_author",
|
|
77
|
+
"crawl_citations",
|
|
78
|
+
"create_dashboard",
|
|
79
|
+
"quick_analysis",
|
|
80
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
|
5
|
+
|
|
6
|
+
TYPE_CHECKING = False
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from typing import Tuple
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
11
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
12
|
+
else:
|
|
13
|
+
VERSION_TUPLE = object
|
|
14
|
+
|
|
15
|
+
version: str
|
|
16
|
+
__version__: str
|
|
17
|
+
__version_tuple__: VERSION_TUPLE
|
|
18
|
+
version_tuple: VERSION_TUPLE
|
|
19
|
+
|
|
20
|
+
__version__ = version = '0.0.1.dev1'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 0, 1, 'dev1')
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# ScholarImpact Assets
|
|
2
|
+
|
|
3
|
+
This directory contains bundled assets for the ScholarImpact package.
|
|
4
|
+
|
|
5
|
+
## Files
|
|
6
|
+
|
|
7
|
+
- `config.toml` - Default Streamlit configuration with optimized settings
|
|
8
|
+
- Font files (*.ttf, *.otf, *.woff) - Custom fonts for dashboard theming
|
|
9
|
+
|
|
10
|
+
## Usage
|
|
11
|
+
|
|
12
|
+
Assets are automatically copied when using:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
scholarimpact generate-dashboard
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Or programmatically:
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from scholarimpact.assets import copy_streamlit_config, copy_fonts
|
|
22
|
+
|
|
23
|
+
# Copy config
|
|
24
|
+
copy_streamlit_config('.streamlit/')
|
|
25
|
+
|
|
26
|
+
# Copy fonts
|
|
27
|
+
copy_fonts('.streamlit/')
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Adding Custom Assets
|
|
31
|
+
|
|
32
|
+
To add custom fonts or configurations:
|
|
33
|
+
|
|
34
|
+
1. Place font files in this directory
|
|
35
|
+
2. Update config.toml as needed
|
|
36
|
+
3. Reinstall package: `pip install -e .`
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Assets module for ScholarImpact package.
|
|
3
|
+
|
|
4
|
+
Contains bundled configuration files, fonts, and other static assets.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import shutil
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from importlib.resources import files
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_asset_path(asset_name: str) -> Optional[Path]:
|
|
15
|
+
"""
|
|
16
|
+
Get path to bundled asset.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
asset_name: Name of asset file
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Path to asset or None if not found
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
# Try to get asset from package using importlib.resources
|
|
26
|
+
assets_pkg = files("scholarimpact.assets")
|
|
27
|
+
asset_ref = assets_pkg / asset_name
|
|
28
|
+
if asset_ref.is_file():
|
|
29
|
+
# Return a Path object that can be used directly
|
|
30
|
+
return Path(str(asset_ref))
|
|
31
|
+
except Exception:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
# Fallback to local assets
|
|
35
|
+
assets_dir = Path(__file__).parent
|
|
36
|
+
asset_file = assets_dir / asset_name
|
|
37
|
+
if asset_file.exists():
|
|
38
|
+
return asset_file
|
|
39
|
+
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def copy_streamlit_config(output_dir: str, config_name: str = "config.toml") -> bool:
|
|
44
|
+
"""
|
|
45
|
+
Copy bundled Streamlit config to output directory.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
output_dir: Output directory path
|
|
49
|
+
config_name: Config file name (default: config.toml)
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
True if successful, False otherwise
|
|
53
|
+
"""
|
|
54
|
+
config_path = get_asset_path(config_name)
|
|
55
|
+
if not config_path:
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
output_path = Path(output_dir)
|
|
59
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
# Extract just the filename if config_name has a path
|
|
63
|
+
target_name = Path(config_name).name
|
|
64
|
+
shutil.copy2(config_path, output_path / target_name)
|
|
65
|
+
return True
|
|
66
|
+
except Exception:
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def copy_fonts(output_dir: str) -> int:
|
|
71
|
+
"""
|
|
72
|
+
Copy bundled fonts and license files to static directory.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
output_dir: Output directory path (should be .streamlit directory)
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Number of files copied (fonts + licenses)
|
|
79
|
+
"""
|
|
80
|
+
assets_dir = Path(__file__).parent
|
|
81
|
+
fonts_dir = assets_dir / "fonts"
|
|
82
|
+
|
|
83
|
+
# Look for font files and license files in the fonts subdirectory
|
|
84
|
+
font_files = []
|
|
85
|
+
license_files = []
|
|
86
|
+
if fonts_dir.exists():
|
|
87
|
+
font_files = (
|
|
88
|
+
list(fonts_dir.glob("*.ttf"))
|
|
89
|
+
+ list(fonts_dir.glob("*.otf"))
|
|
90
|
+
+ list(fonts_dir.glob("*.woff*"))
|
|
91
|
+
)
|
|
92
|
+
license_files = list(fonts_dir.glob("*.txt")) # License files
|
|
93
|
+
|
|
94
|
+
if not font_files and not license_files:
|
|
95
|
+
return 0
|
|
96
|
+
|
|
97
|
+
# Create static directory structure
|
|
98
|
+
output_path = Path(output_dir)
|
|
99
|
+
if output_path.name == ".streamlit":
|
|
100
|
+
# Create static at parent level (alongside .streamlit)
|
|
101
|
+
static_dir = output_path.parent / "static"
|
|
102
|
+
else:
|
|
103
|
+
# Fallback: create static at output directory level
|
|
104
|
+
static_dir = output_path.parent / "static"
|
|
105
|
+
static_dir.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
|
|
107
|
+
copied = 0
|
|
108
|
+
|
|
109
|
+
# Copy font files to static directory
|
|
110
|
+
for font_file in font_files:
|
|
111
|
+
try:
|
|
112
|
+
shutil.copy2(font_file, static_dir / font_file.name)
|
|
113
|
+
copied += 1
|
|
114
|
+
except Exception:
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
# Copy license files to static directory
|
|
118
|
+
for license_file in license_files:
|
|
119
|
+
try:
|
|
120
|
+
shutil.copy2(license_file, static_dir / license_file.name)
|
|
121
|
+
copied += 1
|
|
122
|
+
except Exception:
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
return copied
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def list_assets() -> list:
|
|
129
|
+
"""
|
|
130
|
+
List all available bundled assets.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
List of asset file names
|
|
134
|
+
"""
|
|
135
|
+
assets_dir = Path(__file__).parent
|
|
136
|
+
return [f.name for f in assets_dir.iterdir() if f.is_file() and f.name != "__init__.py"]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
Copyright 2020 The Space Grotesk Project Authors (https://github.com/floriankarsten/space-grotesk)
|
|
2
|
+
|
|
3
|
+
This Font Software is licensed under the SIL Open Font License, Version 1.1.
|
|
4
|
+
This license is copied below, and is also available with a FAQ at:
|
|
5
|
+
https://openfontlicense.org
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
-----------------------------------------------------------
|
|
9
|
+
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
|
|
10
|
+
-----------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
PREAMBLE
|
|
13
|
+
The goals of the Open Font License (OFL) are to stimulate worldwide
|
|
14
|
+
development of collaborative font projects, to support the font creation
|
|
15
|
+
efforts of academic and linguistic communities, and to provide a free and
|
|
16
|
+
open framework in which fonts may be shared and improved in partnership
|
|
17
|
+
with others.
|
|
18
|
+
|
|
19
|
+
The OFL allows the licensed fonts to be used, studied, modified and
|
|
20
|
+
redistributed freely as long as they are not sold by themselves. The
|
|
21
|
+
fonts, including any derivative works, can be bundled, embedded,
|
|
22
|
+
redistributed and/or sold with any software provided that any reserved
|
|
23
|
+
names are not used by derivative works. The fonts and derivatives,
|
|
24
|
+
however, cannot be released under any other type of license. The
|
|
25
|
+
requirement for fonts to remain under this license does not apply
|
|
26
|
+
to any document created using the fonts or their derivatives.
|
|
27
|
+
|
|
28
|
+
DEFINITIONS
|
|
29
|
+
"Font Software" refers to the set of files released by the Copyright
|
|
30
|
+
Holder(s) under this license and clearly marked as such. This may
|
|
31
|
+
include source files, build scripts and documentation.
|
|
32
|
+
|
|
33
|
+
"Reserved Font Name" refers to any names specified as such after the
|
|
34
|
+
copyright statement(s).
|
|
35
|
+
|
|
36
|
+
"Original Version" refers to the collection of Font Software components as
|
|
37
|
+
distributed by the Copyright Holder(s).
|
|
38
|
+
|
|
39
|
+
"Modified Version" refers to any derivative made by adding to, deleting,
|
|
40
|
+
or substituting -- in part or in whole -- any of the components of the
|
|
41
|
+
Original Version, by changing formats or by porting the Font Software to a
|
|
42
|
+
new environment.
|
|
43
|
+
|
|
44
|
+
"Author" refers to any designer, engineer, programmer, technical
|
|
45
|
+
writer or other person who contributed to the Font Software.
|
|
46
|
+
|
|
47
|
+
PERMISSION & CONDITIONS
|
|
48
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
49
|
+
a copy of the Font Software, to use, study, copy, merge, embed, modify,
|
|
50
|
+
redistribute, and sell modified and unmodified copies of the Font
|
|
51
|
+
Software, subject to the following conditions:
|
|
52
|
+
|
|
53
|
+
1) Neither the Font Software nor any of its individual components,
|
|
54
|
+
in Original or Modified Versions, may be sold by itself.
|
|
55
|
+
|
|
56
|
+
2) Original or Modified Versions of the Font Software may be bundled,
|
|
57
|
+
redistributed and/or sold with any software, provided that each copy
|
|
58
|
+
contains the above copyright notice and this license. These can be
|
|
59
|
+
included either as stand-alone text files, human-readable headers or
|
|
60
|
+
in the appropriate machine-readable metadata fields within text or
|
|
61
|
+
binary files as long as those fields can be easily viewed by the user.
|
|
62
|
+
|
|
63
|
+
3) No Modified Version of the Font Software may use the Reserved Font
|
|
64
|
+
Name(s) unless explicit written permission is granted by the corresponding
|
|
65
|
+
Copyright Holder. This restriction only applies to the primary font name as
|
|
66
|
+
presented to the users.
|
|
67
|
+
|
|
68
|
+
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
|
|
69
|
+
Software shall not be used to promote, endorse or advertise any
|
|
70
|
+
Modified Version, except to acknowledge the contribution(s) of the
|
|
71
|
+
Copyright Holder(s) and the Author(s) or with their explicit written
|
|
72
|
+
permission.
|
|
73
|
+
|
|
74
|
+
5) The Font Software, modified or unmodified, in part or in whole,
|
|
75
|
+
must be distributed entirely under this license, and must not be
|
|
76
|
+
distributed under any other license. The requirement for fonts to
|
|
77
|
+
remain under this license does not apply to any document created
|
|
78
|
+
using the Font Software.
|
|
79
|
+
|
|
80
|
+
TERMINATION
|
|
81
|
+
This license becomes null and void if any of the above conditions are
|
|
82
|
+
not met.
|
|
83
|
+
|
|
84
|
+
DISCLAIMER
|
|
85
|
+
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
86
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
|
|
87
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
|
88
|
+
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
|
|
89
|
+
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
90
|
+
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
|
91
|
+
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
92
|
+
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
|
|
93
|
+
OTHER DEALINGS IN THE FONT SOFTWARE.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
Copyright 2016 The Space Mono Project Authors (https://github.com/googlefonts/spacemono)
|
|
2
|
+
|
|
3
|
+
This Font Software is licensed under the SIL Open Font License, Version 1.1.
|
|
4
|
+
This license is copied below, and is also available with a FAQ at:
|
|
5
|
+
https://openfontlicense.org
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
-----------------------------------------------------------
|
|
9
|
+
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
|
|
10
|
+
-----------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
PREAMBLE
|
|
13
|
+
The goals of the Open Font License (OFL) are to stimulate worldwide
|
|
14
|
+
development of collaborative font projects, to support the font creation
|
|
15
|
+
efforts of academic and linguistic communities, and to provide a free and
|
|
16
|
+
open framework in which fonts may be shared and improved in partnership
|
|
17
|
+
with others.
|
|
18
|
+
|
|
19
|
+
The OFL allows the licensed fonts to be used, studied, modified and
|
|
20
|
+
redistributed freely as long as they are not sold by themselves. The
|
|
21
|
+
fonts, including any derivative works, can be bundled, embedded,
|
|
22
|
+
redistributed and/or sold with any software provided that any reserved
|
|
23
|
+
names are not used by derivative works. The fonts and derivatives,
|
|
24
|
+
however, cannot be released under any other type of license. The
|
|
25
|
+
requirement for fonts to remain under this license does not apply
|
|
26
|
+
to any document created using the fonts or their derivatives.
|
|
27
|
+
|
|
28
|
+
DEFINITIONS
|
|
29
|
+
"Font Software" refers to the set of files released by the Copyright
|
|
30
|
+
Holder(s) under this license and clearly marked as such. This may
|
|
31
|
+
include source files, build scripts and documentation.
|
|
32
|
+
|
|
33
|
+
"Reserved Font Name" refers to any names specified as such after the
|
|
34
|
+
copyright statement(s).
|
|
35
|
+
|
|
36
|
+
"Original Version" refers to the collection of Font Software components as
|
|
37
|
+
distributed by the Copyright Holder(s).
|
|
38
|
+
|
|
39
|
+
"Modified Version" refers to any derivative made by adding to, deleting,
|
|
40
|
+
or substituting -- in part or in whole -- any of the components of the
|
|
41
|
+
Original Version, by changing formats or by porting the Font Software to a
|
|
42
|
+
new environment.
|
|
43
|
+
|
|
44
|
+
"Author" refers to any designer, engineer, programmer, technical
|
|
45
|
+
writer or other person who contributed to the Font Software.
|
|
46
|
+
|
|
47
|
+
PERMISSION & CONDITIONS
|
|
48
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
49
|
+
a copy of the Font Software, to use, study, copy, merge, embed, modify,
|
|
50
|
+
redistribute, and sell modified and unmodified copies of the Font
|
|
51
|
+
Software, subject to the following conditions:
|
|
52
|
+
|
|
53
|
+
1) Neither the Font Software nor any of its individual components,
|
|
54
|
+
in Original or Modified Versions, may be sold by itself.
|
|
55
|
+
|
|
56
|
+
2) Original or Modified Versions of the Font Software may be bundled,
|
|
57
|
+
redistributed and/or sold with any software, provided that each copy
|
|
58
|
+
contains the above copyright notice and this license. These can be
|
|
59
|
+
included either as stand-alone text files, human-readable headers or
|
|
60
|
+
in the appropriate machine-readable metadata fields within text or
|
|
61
|
+
binary files as long as those fields can be easily viewed by the user.
|
|
62
|
+
|
|
63
|
+
3) No Modified Version of the Font Software may use the Reserved Font
|
|
64
|
+
Name(s) unless explicit written permission is granted by the corresponding
|
|
65
|
+
Copyright Holder. This restriction only applies to the primary font name as
|
|
66
|
+
presented to the users.
|
|
67
|
+
|
|
68
|
+
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
|
|
69
|
+
Software shall not be used to promote, endorse or advertise any
|
|
70
|
+
Modified Version, except to acknowledge the contribution(s) of the
|
|
71
|
+
Copyright Holder(s) and the Author(s) or with their explicit written
|
|
72
|
+
permission.
|
|
73
|
+
|
|
74
|
+
5) The Font Software, modified or unmodified, in part or in whole,
|
|
75
|
+
must be distributed entirely under this license, and must not be
|
|
76
|
+
distributed under any other license. The requirement for fonts to
|
|
77
|
+
remain under this license does not apply to any document created
|
|
78
|
+
using the Font Software.
|
|
79
|
+
|
|
80
|
+
TERMINATION
|
|
81
|
+
This license becomes null and void if any of the above conditions are
|
|
82
|
+
not met.
|
|
83
|
+
|
|
84
|
+
DISCLAIMER
|
|
85
|
+
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
86
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
|
|
87
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
|
88
|
+
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
|
|
89
|
+
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
90
|
+
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
|
91
|
+
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
92
|
+
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
|
|
93
|
+
OTHER DEALINGS IN THE FONT SOFTWARE.
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[server]
|
|
2
|
+
enableStaticServing = true
|
|
3
|
+
|
|
4
|
+
[[theme.fontFaces]]
|
|
5
|
+
family = "SpaceGrotesk"
|
|
6
|
+
url = "app/static/SpaceGrotesk-VariableFont_wght.ttf"
|
|
7
|
+
|
|
8
|
+
[[theme.fontFaces]]
|
|
9
|
+
family = "SpaceMono"
|
|
10
|
+
url = "app/static/SpaceMono-Bold.ttf"
|
|
11
|
+
style = "normal"
|
|
12
|
+
weight = 700
|
|
13
|
+
|
|
14
|
+
[[theme.fontFaces]]
|
|
15
|
+
family = "SpaceMono"
|
|
16
|
+
url = "app/static/SpaceMono-BoldItalic.ttf"
|
|
17
|
+
style = "italic"
|
|
18
|
+
weight = 700
|
|
19
|
+
|
|
20
|
+
[[theme.fontFaces]]
|
|
21
|
+
family = "SpaceMono"
|
|
22
|
+
url = "app/static/SpaceMono-Italic.ttf"
|
|
23
|
+
style = "italic"
|
|
24
|
+
weight = 400
|
|
25
|
+
|
|
26
|
+
[[theme.fontFaces]]
|
|
27
|
+
family = "SpaceMono"
|
|
28
|
+
url = "app/static/SpaceMono-Regular.ttf"
|
|
29
|
+
style = "normal"
|
|
30
|
+
weight = 400
|
|
31
|
+
|
|
32
|
+
[theme]
|
|
33
|
+
primaryColor = "#cb785c"
|
|
34
|
+
backgroundColor = "#fdfdf8"
|
|
35
|
+
secondaryBackgroundColor = "#ecebe3"
|
|
36
|
+
textColor = "#3d3a2a"
|
|
37
|
+
linkColor = "#3d3a2a"
|
|
38
|
+
borderColor = "#d3d2ca"
|
|
39
|
+
showWidgetBorder = true
|
|
40
|
+
baseRadius = "0.75rem"
|
|
41
|
+
buttonRadius = "full"
|
|
42
|
+
font = "SpaceGrotesk"
|
|
43
|
+
headingFontWeights = [600,500,500,500,500,500]
|
|
44
|
+
headingFontSizes = ["3rem", "2rem"]
|
|
45
|
+
codeFont = "SpaceMono"
|
|
46
|
+
codeFontSize = ".75rem"
|
|
47
|
+
codeBackgroundColor = "#ecebe4"
|
|
48
|
+
showSidebarBorder = true
|
|
49
|
+
chartCategoricalColors = ["#0ea5e9", "#059669", "#fbbf24"]
|
|
50
|
+
|
|
51
|
+
[theme.sidebar]
|
|
52
|
+
backgroundColor = "#f0f0ec"
|
|
53
|
+
secondaryBackgroundColor = "#ecebe3"
|
|
54
|
+
headingFontSizes = ["1.6rem", "1.4rem", "1.2rem"]
|
|
55
|
+
dataframeHeaderBackgroundColor = "#e4e4e0"
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Crawl citations command for CLI."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from ...core.crawler import CitationCrawler
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@click.command(name="crawl-citations")
|
|
12
|
+
@click.argument("author_json")
|
|
13
|
+
@click.option("--openalex-email", help="OpenAlex email for enhanced data")
|
|
14
|
+
@click.option("--max-citations", type=int, help="Maximum citations per paper")
|
|
15
|
+
@click.option(
|
|
16
|
+
"--delay-min", default=5.0, type=float, help="Minimum delay between requests (default: 5.0)"
|
|
17
|
+
)
|
|
18
|
+
@click.option(
|
|
19
|
+
"--delay-max", default=10.0, type=float, help="Maximum delay between requests (default: 10.0)"
|
|
20
|
+
)
|
|
21
|
+
@click.option(
|
|
22
|
+
"--delay-between-articles-min",
|
|
23
|
+
default=16.0,
|
|
24
|
+
type=float,
|
|
25
|
+
help="Minimum delay between articles (default: 16.0)",
|
|
26
|
+
)
|
|
27
|
+
@click.option(
|
|
28
|
+
"--delay-between-articles-max",
|
|
29
|
+
default=22.0,
|
|
30
|
+
type=float,
|
|
31
|
+
help="Maximum delay between articles (default: 22.0)",
|
|
32
|
+
)
|
|
33
|
+
@click.option("--output-dir", help="Output directory (defaults to author.json directory)")
|
|
34
|
+
def crawl_citations(
|
|
35
|
+
author_json,
|
|
36
|
+
openalex_email,
|
|
37
|
+
max_citations,
|
|
38
|
+
delay_min,
|
|
39
|
+
delay_max,
|
|
40
|
+
delay_between_articles_min,
|
|
41
|
+
delay_between_articles_max,
|
|
42
|
+
output_dir,
|
|
43
|
+
):
|
|
44
|
+
"""Crawl citations for publications in author.json file."""
|
|
45
|
+
|
|
46
|
+
click.echo(f"Loading author data from: {author_json}")
|
|
47
|
+
|
|
48
|
+
# Load author data
|
|
49
|
+
try:
|
|
50
|
+
with open(author_json, "r", encoding="utf-8") as f:
|
|
51
|
+
author_data = json.load(f)
|
|
52
|
+
except FileNotFoundError:
|
|
53
|
+
raise click.ClickException(f"Author file not found: {author_json}")
|
|
54
|
+
except json.JSONDecodeError:
|
|
55
|
+
raise click.ClickException(f"Invalid JSON in author file: {author_json}")
|
|
56
|
+
|
|
57
|
+
# Determine output directory
|
|
58
|
+
if not output_dir:
|
|
59
|
+
output_dir = str(Path(author_json).parent)
|
|
60
|
+
|
|
61
|
+
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
62
|
+
|
|
63
|
+
# Get articles from author data
|
|
64
|
+
articles = author_data.get("articles", [])
|
|
65
|
+
if not articles:
|
|
66
|
+
click.echo("No articles found in author data")
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
click.echo(f"Found {len(articles)} articles to process")
|
|
70
|
+
|
|
71
|
+
# Initialize crawler
|
|
72
|
+
delay_range = (delay_min, delay_max)
|
|
73
|
+
crawler = CitationCrawler(delay_range=delay_range, openalex_email=openalex_email)
|
|
74
|
+
|
|
75
|
+
# Process each article
|
|
76
|
+
processed = 0
|
|
77
|
+
skipped = 0
|
|
78
|
+
errors = 0
|
|
79
|
+
|
|
80
|
+
with click.progressbar(articles, label="Crawling citations") as article_bar:
|
|
81
|
+
for article in article_bar:
|
|
82
|
+
cites_id = article.get("cites_id")
|
|
83
|
+
if not cites_id:
|
|
84
|
+
skipped += 1
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
# Check if already processed
|
|
88
|
+
output_file = Path(output_dir) / f"cites-{cites_id.replace(',', '_')}.json"
|
|
89
|
+
if output_file.exists():
|
|
90
|
+
skipped += 1
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
# Crawl citations
|
|
95
|
+
citations = crawler.crawl_all_citations(
|
|
96
|
+
cites_id, max_pages=None # max_citations is handled differently
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Save citations to file
|
|
100
|
+
if citations:
|
|
101
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
102
|
+
json.dump(citations, f, ensure_ascii=False, indent=2)
|
|
103
|
+
|
|
104
|
+
processed += 1
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
click.echo(f"\nError processing {article.get('title', 'Unknown')}: {e}")
|
|
108
|
+
errors += 1
|
|
109
|
+
|
|
110
|
+
# Summary
|
|
111
|
+
click.echo(f"\n Citation crawling complete!")
|
|
112
|
+
click.echo(f" Processed: {processed}")
|
|
113
|
+
click.echo(f" Skipped (no ID or exists): {skipped}")
|
|
114
|
+
if errors > 0:
|
|
115
|
+
click.echo(f" Errors: {errors}")
|