PyPI - scholarcli - Versions diffs - 1.0__tar.gz - Mend

scholarcli 1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

scholarcli-1.0/LICENSE +21 -0
scholarcli-1.0/PKG-INFO +280 -0
scholarcli-1.0/README.md +236 -0
scholarcli-1.0/pyproject.toml +80 -0
scholarcli-1.0/setup.cfg +4 -0
scholarcli-1.0/src/scholar/__init__.py +21 -0
scholarcli-1.0/src/scholar/__main__.py +6 -0
scholarcli-1.0/src/scholar/cache.py +141 -0
scholarcli-1.0/src/scholar/cli.py +1783 -0
scholarcli-1.0/src/scholar/enrich.py +361 -0
scholarcli-1.0/src/scholar/llm_review.py +804 -0
scholarcli-1.0/src/scholar/notes.py +591 -0
scholarcli-1.0/src/scholar/pdf.py +482 -0
scholarcli-1.0/src/scholar/providers.py +2537 -0
scholarcli-1.0/src/scholar/review.py +1438 -0
scholarcli-1.0/src/scholar/scholar.py +712 -0
scholarcli-1.0/src/scholar/tui.py +1859 -0
scholarcli-1.0/src/scholar/utils.py +72 -0
scholarcli-1.0/src/scholarcli.egg-info/PKG-INFO +280 -0
scholarcli-1.0/src/scholarcli.egg-info/SOURCES.txt +74 -0
scholarcli-1.0/src/scholarcli.egg-info/dependency_links.txt +1 -0
scholarcli-1.0/src/scholarcli.egg-info/entry_points.txt +2 -0
scholarcli-1.0/src/scholarcli.egg-info/requires.txt +36 -0
scholarcli-1.0/src/scholarcli.egg-info/top_level.txt +3 -0
scholarcli-1.0/src/snowball/__init__.py +195 -0
scholarcli-1.0/src/snowball/apis/__init__.py +0 -0
scholarcli-1.0/src/snowball/apis/aggregator.py +296 -0
scholarcli-1.0/src/snowball/apis/arxiv.py +233 -0
scholarcli-1.0/src/snowball/apis/base.py +84 -0
scholarcli-1.0/src/snowball/apis/crossref.py +203 -0
scholarcli-1.0/src/snowball/apis/google_scholar.py +259 -0
scholarcli-1.0/src/snowball/apis/openalex.py +299 -0
scholarcli-1.0/src/snowball/apis/opencitations.py +288 -0
scholarcli-1.0/src/snowball/apis/semantic_scholar.py +320 -0
scholarcli-1.0/src/snowball/cli.py +1127 -0
scholarcli-1.0/src/snowball/exporters/__init__.py +0 -0
scholarcli-1.0/src/snowball/exporters/bibtex.py +153 -0
scholarcli-1.0/src/snowball/exporters/csv_exporter.py +139 -0
scholarcli-1.0/src/snowball/exporters/tikz.py +224 -0
scholarcli-1.0/src/snowball/filters/__init__.py +0 -0
scholarcli-1.0/src/snowball/filters/filter_engine.py +166 -0
scholarcli-1.0/src/snowball/models.py +152 -0
scholarcli-1.0/src/snowball/paper_utils.py +612 -0
scholarcli-1.0/src/snowball/parsers/__init__.py +0 -0
scholarcli-1.0/src/snowball/parsers/pdf_parser.py +371 -0
scholarcli-1.0/src/snowball/scoring/__init__.py +30 -0
scholarcli-1.0/src/snowball/scoring/base.py +47 -0
scholarcli-1.0/src/snowball/scoring/llm_scorer.py +170 -0
scholarcli-1.0/src/snowball/scoring/tfidf_scorer.py +147 -0
scholarcli-1.0/src/snowball/snowballing.py +623 -0
scholarcli-1.0/src/snowball/storage/__init__.py +0 -0
scholarcli-1.0/src/snowball/storage/json_storage.py +287 -0
scholarcli-1.0/src/snowball/tui/__init__.py +0 -0
scholarcli-1.0/src/snowball/tui/app.py +1925 -0
scholarcli-1.0/src/snowball/visualization.py +290 -0
scholarcli-1.0/src/tuxedo/__init__.py +136 -0
scholarcli-1.0/src/tuxedo/analysis.py +265 -0
scholarcli-1.0/src/tuxedo/cli.py +1402 -0
scholarcli-1.0/src/tuxedo/clustering.py +703 -0
scholarcli-1.0/src/tuxedo/database.py +650 -0
scholarcli-1.0/src/tuxedo/grobid.py +519 -0
scholarcli-1.0/src/tuxedo/logging.py +118 -0
scholarcli-1.0/src/tuxedo/models.py +127 -0
scholarcli-1.0/src/tuxedo/project.py +288 -0
scholarcli-1.0/src/tuxedo/tui.py +2836 -0
scholarcli-1.0/tests/test_cache.py +98 -0
scholarcli-1.0/tests/test_cli.py +332 -0
scholarcli-1.0/tests/test_enrich.py +378 -0
scholarcli-1.0/tests/test_llm_review.py +589 -0
scholarcli-1.0/tests/test_notes.py +450 -0
scholarcli-1.0/tests/test_pdf.py +279 -0
scholarcli-1.0/tests/test_providers.py +2077 -0
scholarcli-1.0/tests/test_review.py +1056 -0
scholarcli-1.0/tests/test_scholar.py +696 -0
scholarcli-1.0/tests/test_tui.py +11 -0
scholarcli-1.0/tests/test_utils.py +75 -0

scholarcli-1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025--2026 Daniel Bosk
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

scholarcli-1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,280 @@
+Metadata-Version: 2.4
+Name: scholarcli
+Version: 1.0
+Summary: A tool for structured literature searches across bibliographic databases
+Author-email: Daniel Bosk <dbosk@kth.se>, Ric Glassey <glassey@kth.se>
+License-Expression: MIT
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: requests>=2.32.5
+Requires-Dist: typer>=0.21.0
+Requires-Dist: rich>=14.2.0
+Requires-Dist: pyalex>=0.19
+Requires-Dist: arxiv>=2.1.0
+Requires-Dist: cachetools>=6.2.4
+Requires-Dist: platformdirs>=4.5.1
+Requires-Dist: textual>=6.11.0
+Requires-Dist: pypandoc>=1.14
+Requires-Dist: click>=8.0.0
+Requires-Dist: llm>=0.19
+Requires-Dist: llm-openai-plugin>=0.7
+Requires-Dist: llm-gpt4all>=0.4
+Requires-Dist: llm-azure>=2.1
+Requires-Dist: llm-anthropic>=0.23
+Requires-Dist: llm-gemini>=0.28.2
+Provides-Extra: snowball
+Requires-Dist: pypdfium2>=4.26.0; extra == "snowball"
+Requires-Dist: bibtexparser>=1.4.0; extra == "snowball"
+Requires-Dist: pandas>=2.0.0; extra == "snowball"
+Requires-Dist: pydantic>=2.0.0; extra == "snowball"
+Requires-Dist: python-dotenv>=1.0.0; extra == "snowball"
+Requires-Dist: httpx>=0.25.0; extra == "snowball"
+Requires-Dist: grobid-client-python>=0.1.0; extra == "snowball"
+Requires-Dist: scholarly>=1.7.0; extra == "snowball"
+Provides-Extra: tuxedo
+Requires-Dist: httpx>=0.27.0; extra == "tuxedo"
+Requires-Dist: openai>=1.0.0; extra == "tuxedo"
+Requires-Dist: pydantic>=2.0.0; extra == "tuxedo"
+Requires-Dist: pyyaml>=6.0; extra == "tuxedo"
+Provides-Extra: all
+Requires-Dist: scholar[snowball]; extra == "all"
+Requires-Dist: scholar[tuxedo]; extra == "all"
+Dynamic: license-file
+# Scholar
+A command-line tool for conducting structured literature searches across multiple academic databases, with built-in support for systematic literature reviews.
+## Features
+### Multi-Database Search
+Search across five academic databases with a single query:
+- **Semantic Scholar** - AI-powered research database with 200M+ papers
+- **OpenAlex** - Open catalog of 250M+ scholarly works
+- **DBLP** - Computer science bibliography
+- **Web of Science** - Comprehensive citation index (requires API key)
+- **IEEE Xplore** - IEEE technical literature (requires API key)
+```bash
+# Search all available providers
+scholar search "machine learning privacy"
+# Search specific providers
+scholar search "federated learning" -p semantic_scholar -p openalex
+```
+### Interactive Review Interface
+Review search results in a terminal-based interface with vim-style navigation:
+```bash
+scholar search "neural networks" --review
+```
+The TUI supports:
+- **Keep/Discard decisions** with mandatory motivations for discards
+- **Theme tagging** for organizing kept papers
+- **Note-taking** with your preferred editor
+- **PDF viewing** with automatic download and caching
+- **Abstract enrichment** for papers missing abstracts
+- **LLM-assisted classification** to help review large result sets
+- **Sorting and filtering** by various criteria
+### Output Formats
+Export results in multiple formats:
+```bash
+# Pretty table (default for terminal)
+scholar search "query"
+# Machine-readable formats
+scholar search "query" -f json
+scholar search "query" -f csv
+scholar search "query" -f bibtex
+```
+### Session Management
+Save and resume review sessions:
+```bash
+# List saved sessions
+scholar sessions list
+# Resume a session
+scholar sessions resume "machine learning"
+# Export session to reports
+scholar sessions export "machine learning" -f all
+```
+### Paper Notes
+Manage notes across all reviewed papers:
+```bash
+# Browse papers with notes
+scholar notes
+# List papers with notes
+scholar notes list
+# Export/import notes
+scholar notes export notes.json
+scholar notes import notes.json
+```
+### Caching
+Search results are cached to avoid redundant API calls:
+```bash
+scholar cache info    # Show cache statistics
+scholar cache clear   # Delete cached results
+scholar cache path    # Print cache directory
+```
+PDF downloads are also cached for offline viewing.
+## Installation
+```bash
+pip install scholar-cli
+```
+Or with [uv](https://github.com/astral-sh/uv):
+```bash
+uv pip install scholar-cli
+```
+## Configuration
+Some providers require API keys set as environment variables:
+| Provider | Environment Variable | Required | How to Get |
+|----------|---------------------|----------|------------|
+| Semantic Scholar | `S2_API_KEY` | No | [api.semanticscholar.org](https://api.semanticscholar.org) |
+| OpenAlex | `OPENALEX_EMAIL` | No | Any email (for polite pool) |
+| DBLP | - | No | No key needed |
+| Web of Science | `WOS_API_KEY` | Yes | [developer.clarivate.com](https://developer.clarivate.com) |
+| IEEE Xplore | `IEEE_API_KEY` | Yes | [developer.ieee.org](https://developer.ieee.org) |
+View provider status:
+```bash
+scholar providers
+```
+## Usage Examples
+### Basic Search
+```bash
+# Search with default providers (Semantic Scholar, OpenAlex, DBLP)
+scholar search "differential privacy"
+# Limit results per provider
+scholar search "blockchain" -l 50
+```
+### Systematic Review Workflow
+```bash
+# 1. Search and review interactively
+scholar search "privacy-preserving machine learning" --review --name "privacy-ml-review"
+# 2. Add more searches to the same session
+scholar search "federated learning privacy" --review --name "privacy-ml-review"
+# 3. Resume reviewing later
+scholar sessions resume "privacy-ml-review"
+# 4. Generate reports
+scholar sessions export "privacy-ml-review" -f all
+```
+### Enriching Results
+Some providers (like DBLP) don't include abstracts. Fetch them from other sources:
+```bash
+# Enrich during search
+scholar search "query" --enrich
+# Enrich an existing session
+scholar enrich "session-name"
+```
+### PDF Management
+```bash
+# Download and open a PDF
+scholar pdf open "https://arxiv.org/pdf/2301.00001.pdf"
+# View PDF cache
+scholar pdf info
+scholar pdf clear
+```
+## Keybindings (Review TUI)
+| Key | Action |
+|-----|--------|
+| `j`/`k` | Navigate up/down |
+| `Enter` | View paper details |
+| `K` | Keep paper (quick) |
+| `T` | Keep with themes |
+| `d` | Discard (requires motivation) |
+| `n` | Edit notes |
+| `p` | Open PDF |
+| `e` | Enrich (fetch abstract) |
+| `L` | LLM-assisted classification |
+| `s` | Sort papers |
+| `f` | Filter by status |
+| `q` | Quit |
+## LLM-Assisted Review
+For large result sets, Scholar can use LLMs to assist with paper classification:
+```bash
+# In the TUI, press 'L' to invoke LLM classification
+# Or use the CLI command directly
+scholar llm-review "session-name" --count 10
+```
+### How It Works
+1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
+2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
+3. **Invoke LLM classification** - The LLM classifies pending papers based on your examples, returning confidence scores.
+4. **Review LLM decisions** - Prioritize low-confidence classifications. Accept correct ones, correct wrong ones.
+5. **Iterate** - Corrections become training examples for the next round.
+### Requirements
+Install the `llm` package and configure a model:
+```bash
+pip install llm
+llm keys set openai  # Or configure another provider
+```
+The LLM module supports any model available through Simon Willison's `llm` package (OpenAI, Anthropic, local models, etc.).
+## Documentation
+Full documentation is available in the `doc/` directory as a literate program combining documentation and implementation.
+## License
+MIT License - see [LICENSE](LICENSE) for details.

scholarcli-1.0/README.md ADDED Viewed

@@ -0,0 +1,236 @@
+# Scholar
+A command-line tool for conducting structured literature searches across multiple academic databases, with built-in support for systematic literature reviews.
+## Features
+### Multi-Database Search
+Search across five academic databases with a single query:
+- **Semantic Scholar** - AI-powered research database with 200M+ papers
+- **OpenAlex** - Open catalog of 250M+ scholarly works
+- **DBLP** - Computer science bibliography
+- **Web of Science** - Comprehensive citation index (requires API key)
+- **IEEE Xplore** - IEEE technical literature (requires API key)
+```bash
+# Search all available providers
+scholar search "machine learning privacy"
+# Search specific providers
+scholar search "federated learning" -p semantic_scholar -p openalex
+```
+### Interactive Review Interface
+Review search results in a terminal-based interface with vim-style navigation:
+```bash
+scholar search "neural networks" --review
+```
+The TUI supports:
+- **Keep/Discard decisions** with mandatory motivations for discards
+- **Theme tagging** for organizing kept papers
+- **Note-taking** with your preferred editor
+- **PDF viewing** with automatic download and caching
+- **Abstract enrichment** for papers missing abstracts
+- **LLM-assisted classification** to help review large result sets
+- **Sorting and filtering** by various criteria
+### Output Formats
+Export results in multiple formats:
+```bash
+# Pretty table (default for terminal)
+scholar search "query"
+# Machine-readable formats
+scholar search "query" -f json
+scholar search "query" -f csv
+scholar search "query" -f bibtex
+```
+### Session Management
+Save and resume review sessions:
+```bash
+# List saved sessions
+scholar sessions list
+# Resume a session
+scholar sessions resume "machine learning"
+# Export session to reports
+scholar sessions export "machine learning" -f all
+```
+### Paper Notes
+Manage notes across all reviewed papers:
+```bash
+# Browse papers with notes
+scholar notes
+# List papers with notes
+scholar notes list
+# Export/import notes
+scholar notes export notes.json
+scholar notes import notes.json
+```
+### Caching
+Search results are cached to avoid redundant API calls:
+```bash
+scholar cache info    # Show cache statistics
+scholar cache clear   # Delete cached results
+scholar cache path    # Print cache directory
+```
+PDF downloads are also cached for offline viewing.
+## Installation
+```bash
+pip install scholar-cli
+```
+Or with [uv](https://github.com/astral-sh/uv):
+```bash
+uv pip install scholar-cli
+```
+## Configuration
+Some providers require API keys set as environment variables:
+| Provider | Environment Variable | Required | How to Get |
+|----------|---------------------|----------|------------|
+| Semantic Scholar | `S2_API_KEY` | No | [api.semanticscholar.org](https://api.semanticscholar.org) |
+| OpenAlex | `OPENALEX_EMAIL` | No | Any email (for polite pool) |
+| DBLP | - | No | No key needed |
+| Web of Science | `WOS_API_KEY` | Yes | [developer.clarivate.com](https://developer.clarivate.com) |
+| IEEE Xplore | `IEEE_API_KEY` | Yes | [developer.ieee.org](https://developer.ieee.org) |
+View provider status:
+```bash
+scholar providers
+```
+## Usage Examples
+### Basic Search
+```bash
+# Search with default providers (Semantic Scholar, OpenAlex, DBLP)
+scholar search "differential privacy"
+# Limit results per provider
+scholar search "blockchain" -l 50
+```
+### Systematic Review Workflow
+```bash
+# 1. Search and review interactively
+scholar search "privacy-preserving machine learning" --review --name "privacy-ml-review"
+# 2. Add more searches to the same session
+scholar search "federated learning privacy" --review --name "privacy-ml-review"
+# 3. Resume reviewing later
+scholar sessions resume "privacy-ml-review"
+# 4. Generate reports
+scholar sessions export "privacy-ml-review" -f all
+```
+### Enriching Results
+Some providers (like DBLP) don't include abstracts. Fetch them from other sources:
+```bash
+# Enrich during search
+scholar search "query" --enrich
+# Enrich an existing session
+scholar enrich "session-name"
+```
+### PDF Management
+```bash
+# Download and open a PDF
+scholar pdf open "https://arxiv.org/pdf/2301.00001.pdf"
+# View PDF cache
+scholar pdf info
+scholar pdf clear
+```
+## Keybindings (Review TUI)
+| Key | Action |
+|-----|--------|
+| `j`/`k` | Navigate up/down |
+| `Enter` | View paper details |
+| `K` | Keep paper (quick) |
+| `T` | Keep with themes |
+| `d` | Discard (requires motivation) |
+| `n` | Edit notes |
+| `p` | Open PDF |
+| `e` | Enrich (fetch abstract) |
+| `L` | LLM-assisted classification |
+| `s` | Sort papers |
+| `f` | Filter by status |
+| `q` | Quit |
+## LLM-Assisted Review
+For large result sets, Scholar can use LLMs to assist with paper classification:
+```bash
+# In the TUI, press 'L' to invoke LLM classification
+# Or use the CLI command directly
+scholar llm-review "session-name" --count 10
+```
+### How It Works
+1. **Tag some papers manually** - The LLM needs examples to learn from. Review at least 5 papers with tags (themes for kept, motivations for discarded).
+2. **Set research context** (optional) - Describe your review's focus to help the LLM understand relevance criteria.
+3. **Invoke LLM classification** - The LLM classifies pending papers based on your examples, returning confidence scores.
+4. **Review LLM decisions** - Prioritize low-confidence classifications. Accept correct ones, correct wrong ones.
+5. **Iterate** - Corrections become training examples for the next round.
+### Requirements
+Install the `llm` package and configure a model:
+```bash
+pip install llm
+llm keys set openai  # Or configure another provider
+```
+The LLM module supports any model available through Simon Willison's `llm` package (OpenAI, Anthropic, local models, etc.).
+## Documentation
+Full documentation is available in the `doc/` directory as a literate program combining documentation and implementation.
+## License
+MIT License - see [LICENSE](LICENSE) for details.

scholarcli-1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,80 @@
+[project]
+name = "scholarcli"
+version = "1.0"
+description = "A tool for structured literature searches across bibliographic databases"
+authors = [{ name = "Daniel Bosk", email = "dbosk@kth.se" },
+           { name = "Ric Glassey", email = "glassey@kth.se" }]
+readme = "README.md"
+license = "MIT"
+requires-python = ">= 3.12"
+dependencies = [
+    "requests>=2.32.5",
+    "typer>=0.21.0",
+    "rich>=14.2.0",
+    "pyalex>=0.19",
+    "arxiv>=2.1.0",
+    "cachetools>=6.2.4",
+    "platformdirs>=4.5.1",
+    "textual>=6.11.0",
+    "pypandoc>=1.14",
+    "click>=8.0.0",
+    "llm>=0.19",
+    "llm-openai-plugin>=0.7",
+    "llm-gpt4all>=0.4",
+    "llm-azure>=2.1",
+    "llm-anthropic>=0.23",
+    "llm-gemini>=0.28.2",
+]
+[project.optional-dependencies]
+# Snowball dependencies (for scholar snowball subcommand)
+snowball = [
+    "pypdfium2>=4.26.0",
+    "bibtexparser>=1.4.0",
+    "pandas>=2.0.0",
+    "pydantic>=2.0.0",
+    "python-dotenv>=1.0.0",
+    "httpx>=0.25.0",
+    "grobid-client-python>=0.1.0",
+    "scholarly>=1.7.0",
+]
+# Tuxedo dependencies (for scholar tuxedo subcommand)
+tuxedo = [
+    "httpx>=0.27.0",
+    "openai>=1.0.0",
+    "pydantic>=2.0.0",
+    "pyyaml>=6.0",
+]
+# All subcommand dependencies
+all = [
+    "scholar[snowball]",
+    "scholar[tuxedo]",
+]
+[project.scripts]
+scholar = "scholar.cli:main"
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+markers = [
+    "integration: marks tests as integration tests (make real API calls)",
+]
+[tool.black]
+line-length = 78
+[dependency-groups]
+dev = [
+    "pytest (>=9.0.2,<10.0.0)",
+    "pytest-cov (>=7.0.0,<8.0.0)",
+    "black (>=25.12.0,<26.0.0)",
+    "mypy (>=1.19.1,<2.0.0)"
+]

scholarcli-1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

scholarcli-1.0/src/scholar/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+Scholar package for structured literature searches.
+"""
+from .scholar import Search, SearchResult, Paper, SearchFilters
+from .scholar import search, filter_papers
+from .scholar import get_registry, isolated_registry
+from .utils import safe_get_nested, ensure_list
+__all__ = [
+    "Search",
+    "SearchResult",
+    "Paper",
+    "SearchFilters",
+    "search",
+    "filter_papers",
+    "get_registry",
+    "isolated_registry",
+    "safe_get_nested",
+    "ensure_list",
+]

scholarcli-1.0/src/scholar/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Allow running Scholar as a module: python -m scholar"""
+from scholar.cli import main
+if __name__ == "__main__":
+    main()