fluff-cutter 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fluff_cutter-0.1.0/src/fluff_cutter.egg-info → fluff_cutter-0.2.0}/PKG-INFO +30 -10
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/README.md +28 -9
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/pyproject.toml +13 -1
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/__init__.py +1 -1
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/analyzer.py +2 -1
- fluff_cutter-0.2.0/src/fluff_cutter/cli.py +291 -0
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/config.py +24 -0
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/output.py +1 -3
- fluff_cutter-0.2.0/src/fluff_cutter/pdf.py +100 -0
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/providers/anthropic.py +1 -1
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/providers/openai.py +2 -2
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0/src/fluff_cutter.egg-info}/PKG-INFO +30 -10
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/SOURCES.txt +6 -1
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/requires.txt +1 -0
- fluff_cutter-0.2.0/tests/test_analyzer.py +84 -0
- fluff_cutter-0.2.0/tests/test_config.py +195 -0
- fluff_cutter-0.2.0/tests/test_output.py +86 -0
- fluff_cutter-0.2.0/tests/test_pdf.py +170 -0
- fluff_cutter-0.2.0/tests/test_providers.py +93 -0
- fluff_cutter-0.1.0/src/fluff_cutter/cli.py +0 -185
- fluff_cutter-0.1.0/src/fluff_cutter/pdf.py +0 -45
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/LICENSE +0 -0
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/setup.cfg +0 -0
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/providers/__init__.py +1 -1
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/providers/base.py +0 -0
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/dependency_links.txt +0 -0
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/entry_points.txt +0 -0
- {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fluff-cutter
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A CLI tool to analyze academic papers and extract their core value
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -9,6 +9,7 @@ Requires-Dist: click>=8.0
|
|
|
9
9
|
Requires-Dist: openai>=1.0
|
|
10
10
|
Requires-Dist: anthropic>=0.18
|
|
11
11
|
Requires-Dist: python-dotenv>=1.0
|
|
12
|
+
Requires-Dist: pypdf>=4.0
|
|
12
13
|
Provides-Extra: dev
|
|
13
14
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
15
|
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
@@ -16,6 +17,8 @@ Dynamic: license-file
|
|
|
16
17
|
|
|
17
18
|
# Paper Fluff Cutter
|
|
18
19
|
|
|
20
|
+
[](https://pypi.org/project/fluff-cutter/)
|
|
21
|
+
|
|
19
22
|
A CLI tool that cuts through academic paper fluff to extract what actually matters.
|
|
20
23
|
|
|
21
24
|
Most research has close to zero value. This tool uses multimodal LLMs to analyze papers and answer the three questions every paper should be able to answer:
|
|
@@ -26,13 +29,17 @@ Most research has close to zero value. This tool uses multimodal LLMs to analyze
|
|
|
26
29
|
|
|
27
30
|
## Installation
|
|
28
31
|
|
|
29
|
-
|
|
32
|
+
```bash
|
|
33
|
+
pip install fluff-cutter
|
|
34
|
+
```
|
|
30
35
|
|
|
31
|
-
|
|
36
|
+
Requires Python 3.10+.
|
|
32
37
|
|
|
33
|
-
###
|
|
38
|
+
### Development install
|
|
34
39
|
|
|
35
40
|
```bash
|
|
41
|
+
git clone https://github.com/weijianzhg/paper-fluff-cutter.git
|
|
42
|
+
cd paper-fluff-cutter
|
|
36
43
|
pip install -e .
|
|
37
44
|
```
|
|
38
45
|
|
|
@@ -44,7 +51,7 @@ pip install -e .
|
|
|
44
51
|
fluff-cutter init
|
|
45
52
|
```
|
|
46
53
|
|
|
47
|
-
This will prompt you for your API keys and save them to `~/.config/fluff-cutter/config.json`.
|
|
54
|
+
This will prompt you for your API keys, default provider, and model preferences, then save them to `~/.config/fluff-cutter/config.json`.
|
|
48
55
|
|
|
49
56
|
### Option 2: Environment variables
|
|
50
57
|
|
|
@@ -52,6 +59,8 @@ This will prompt you for your API keys and save them to `~/.config/fluff-cutter/
|
|
|
52
59
|
export OPENAI_API_KEY=sk-your-key-here
|
|
53
60
|
export ANTHROPIC_API_KEY=sk-ant-your-key-here
|
|
54
61
|
export FLUFF_CUTTER_PROVIDER=anthropic # optional, default provider
|
|
62
|
+
export FLUFF_CUTTER_OPENAI_MODEL=gpt-5.2 # optional, override default model
|
|
63
|
+
export FLUFF_CUTTER_ANTHROPIC_MODEL=claude-sonnet-4-5 # optional, override default model
|
|
55
64
|
```
|
|
56
65
|
|
|
57
66
|
## Usage
|
|
@@ -72,8 +81,8 @@ fluff-cutter analyze paper.pdf --provider anthropic
|
|
|
72
81
|
### Specify model
|
|
73
82
|
|
|
74
83
|
```bash
|
|
75
|
-
fluff-cutter analyze paper.pdf --provider openai --model gpt-
|
|
76
|
-
fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-
|
|
84
|
+
fluff-cutter analyze paper.pdf --provider openai --model gpt-5.2
|
|
85
|
+
fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-5
|
|
77
86
|
```
|
|
78
87
|
|
|
79
88
|
### Save output to file
|
|
@@ -82,12 +91,22 @@ fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-2025
|
|
|
82
91
|
fluff-cutter analyze paper.pdf --output analysis.md
|
|
83
92
|
```
|
|
84
93
|
|
|
94
|
+
### Long papers
|
|
95
|
+
|
|
96
|
+
For very long papers that exceed the model's token limit, you can limit the number of pages:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
fluff-cutter analyze paper.pdf --max-pages 30
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
If you don't specify `--max-pages` and the paper exceeds the token limit, it will automatically truncate to the first 50 pages and retry.
|
|
103
|
+
|
|
85
104
|
## Supported Providers
|
|
86
105
|
|
|
87
106
|
| Provider | Default Model | Environment Variable |
|
|
88
107
|
|----------|---------------|---------------------|
|
|
89
|
-
| OpenAI | gpt-
|
|
90
|
-
| Anthropic | claude-sonnet-4-
|
|
108
|
+
| OpenAI | gpt-5.2 | `OPENAI_API_KEY` |
|
|
109
|
+
| Anthropic | claude-sonnet-4-5 | `ANTHROPIC_API_KEY` |
|
|
91
110
|
|
|
92
111
|
Both providers now support native PDF input - no external dependencies like poppler needed.
|
|
93
112
|
|
|
@@ -96,8 +115,9 @@ Both providers now support native PDF input - no external dependencies like popp
|
|
|
96
115
|
Configuration is loaded with the following precedence (highest to lowest):
|
|
97
116
|
|
|
98
117
|
1. Command-line arguments (`--provider`, `--model`)
|
|
99
|
-
2. Environment variables
|
|
118
|
+
2. Environment variables (`FLUFF_CUTTER_PROVIDER`, `FLUFF_CUTTER_OPENAI_MODEL`, `FLUFF_CUTTER_ANTHROPIC_MODEL`)
|
|
100
119
|
3. Config file (`~/.config/fluff-cutter/config.json`)
|
|
120
|
+
4. Provider defaults (gpt-5.2 for OpenAI, claude-sonnet-4-5 for Anthropic)
|
|
101
121
|
|
|
102
122
|
## License
|
|
103
123
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Paper Fluff Cutter
|
|
2
2
|
|
|
3
|
+
[](https://pypi.org/project/fluff-cutter/)
|
|
4
|
+
|
|
3
5
|
A CLI tool that cuts through academic paper fluff to extract what actually matters.
|
|
4
6
|
|
|
5
7
|
Most research has close to zero value. This tool uses multimodal LLMs to analyze papers and answer the three questions every paper should be able to answer:
|
|
@@ -10,13 +12,17 @@ Most research has close to zero value. This tool uses multimodal LLMs to analyze
|
|
|
10
12
|
|
|
11
13
|
## Installation
|
|
12
14
|
|
|
13
|
-
|
|
15
|
+
```bash
|
|
16
|
+
pip install fluff-cutter
|
|
17
|
+
```
|
|
14
18
|
|
|
15
|
-
|
|
19
|
+
Requires Python 3.10+.
|
|
16
20
|
|
|
17
|
-
###
|
|
21
|
+
### Development install
|
|
18
22
|
|
|
19
23
|
```bash
|
|
24
|
+
git clone https://github.com/weijianzhg/paper-fluff-cutter.git
|
|
25
|
+
cd paper-fluff-cutter
|
|
20
26
|
pip install -e .
|
|
21
27
|
```
|
|
22
28
|
|
|
@@ -28,7 +34,7 @@ pip install -e .
|
|
|
28
34
|
fluff-cutter init
|
|
29
35
|
```
|
|
30
36
|
|
|
31
|
-
This will prompt you for your API keys and save them to `~/.config/fluff-cutter/config.json`.
|
|
37
|
+
This will prompt you for your API keys, default provider, and model preferences, then save them to `~/.config/fluff-cutter/config.json`.
|
|
32
38
|
|
|
33
39
|
### Option 2: Environment variables
|
|
34
40
|
|
|
@@ -36,6 +42,8 @@ This will prompt you for your API keys and save them to `~/.config/fluff-cutter/
|
|
|
36
42
|
export OPENAI_API_KEY=sk-your-key-here
|
|
37
43
|
export ANTHROPIC_API_KEY=sk-ant-your-key-here
|
|
38
44
|
export FLUFF_CUTTER_PROVIDER=anthropic # optional, default provider
|
|
45
|
+
export FLUFF_CUTTER_OPENAI_MODEL=gpt-5.2 # optional, override default model
|
|
46
|
+
export FLUFF_CUTTER_ANTHROPIC_MODEL=claude-sonnet-4-5 # optional, override default model
|
|
39
47
|
```
|
|
40
48
|
|
|
41
49
|
## Usage
|
|
@@ -56,8 +64,8 @@ fluff-cutter analyze paper.pdf --provider anthropic
|
|
|
56
64
|
### Specify model
|
|
57
65
|
|
|
58
66
|
```bash
|
|
59
|
-
fluff-cutter analyze paper.pdf --provider openai --model gpt-
|
|
60
|
-
fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-
|
|
67
|
+
fluff-cutter analyze paper.pdf --provider openai --model gpt-5.2
|
|
68
|
+
fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-5
|
|
61
69
|
```
|
|
62
70
|
|
|
63
71
|
### Save output to file
|
|
@@ -66,12 +74,22 @@ fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-2025
|
|
|
66
74
|
fluff-cutter analyze paper.pdf --output analysis.md
|
|
67
75
|
```
|
|
68
76
|
|
|
77
|
+
### Long papers
|
|
78
|
+
|
|
79
|
+
For very long papers that exceed the model's token limit, you can limit the number of pages:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
fluff-cutter analyze paper.pdf --max-pages 30
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
If you don't specify `--max-pages` and the paper exceeds the token limit, it will automatically truncate to the first 50 pages and retry.
|
|
86
|
+
|
|
69
87
|
## Supported Providers
|
|
70
88
|
|
|
71
89
|
| Provider | Default Model | Environment Variable |
|
|
72
90
|
|----------|---------------|---------------------|
|
|
73
|
-
| OpenAI | gpt-
|
|
74
|
-
| Anthropic | claude-sonnet-4-
|
|
91
|
+
| OpenAI | gpt-5.2 | `OPENAI_API_KEY` |
|
|
92
|
+
| Anthropic | claude-sonnet-4-5 | `ANTHROPIC_API_KEY` |
|
|
75
93
|
|
|
76
94
|
Both providers now support native PDF input - no external dependencies like poppler needed.
|
|
77
95
|
|
|
@@ -80,8 +98,9 @@ Both providers now support native PDF input - no external dependencies like popp
|
|
|
80
98
|
Configuration is loaded with the following precedence (highest to lowest):
|
|
81
99
|
|
|
82
100
|
1. Command-line arguments (`--provider`, `--model`)
|
|
83
|
-
2. Environment variables
|
|
101
|
+
2. Environment variables (`FLUFF_CUTTER_PROVIDER`, `FLUFF_CUTTER_OPENAI_MODEL`, `FLUFF_CUTTER_ANTHROPIC_MODEL`)
|
|
84
102
|
3. Config file (`~/.config/fluff-cutter/config.json`)
|
|
103
|
+
4. Provider defaults (gpt-5.2 for OpenAI, claude-sonnet-4-5 for Anthropic)
|
|
85
104
|
|
|
86
105
|
## License
|
|
87
106
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "fluff-cutter"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "A CLI tool to analyze academic papers and extract their core value"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -13,6 +13,7 @@ dependencies = [
|
|
|
13
13
|
"openai>=1.0",
|
|
14
14
|
"anthropic>=0.18",
|
|
15
15
|
"python-dotenv>=1.0",
|
|
16
|
+
"pypdf>=4.0",
|
|
16
17
|
]
|
|
17
18
|
|
|
18
19
|
[project.scripts]
|
|
@@ -26,3 +27,14 @@ dev = [
|
|
|
26
27
|
|
|
27
28
|
[tool.setuptools.packages.find]
|
|
28
29
|
where = ["src"]
|
|
30
|
+
|
|
31
|
+
[tool.pytest.ini_options]
|
|
32
|
+
testpaths = ["tests"]
|
|
33
|
+
pythonpath = ["src"]
|
|
34
|
+
|
|
35
|
+
[tool.ruff]
|
|
36
|
+
target-version = "py310"
|
|
37
|
+
line-length = 100
|
|
38
|
+
|
|
39
|
+
[tool.ruff.lint]
|
|
40
|
+
select = ["E", "F", "I", "W"]
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
from .providers.base import BaseLLMProvider
|
|
4
4
|
|
|
5
|
-
ANALYSIS_PROMPT = """You are analyzing an academic paper. Your job is to cut through
|
|
5
|
+
ANALYSIS_PROMPT = """You are analyzing an academic paper. Your job is to cut through \
|
|
6
|
+
all the fluff and extract only what matters.
|
|
6
7
|
|
|
7
8
|
Answer these three questions concisely and critically:
|
|
8
9
|
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""Command-line interface for Paper Fluff Cutter."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from . import __version__
|
|
8
|
+
from .analyzer import analyze_paper
|
|
9
|
+
from .config import (
|
|
10
|
+
get_api_key,
|
|
11
|
+
get_config_path,
|
|
12
|
+
get_default_model,
|
|
13
|
+
get_default_provider,
|
|
14
|
+
is_configured,
|
|
15
|
+
load_config,
|
|
16
|
+
save_config,
|
|
17
|
+
)
|
|
18
|
+
from .output import print_analysis, save_analysis
|
|
19
|
+
from .pdf import DEFAULT_MAX_PAGES, get_pdf_filename, read_pdf_as_base64
|
|
20
|
+
from .providers import AnthropicProvider, OpenAIProvider
|
|
21
|
+
|
|
22
|
+
PROVIDERS = {
|
|
23
|
+
"openai": OpenAIProvider,
|
|
24
|
+
"anthropic": AnthropicProvider,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@click.group()
|
|
29
|
+
@click.version_option(version=__version__)
|
|
30
|
+
def main():
|
|
31
|
+
"""Paper Fluff Cutter - Extract the core value from academic papers."""
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _mask_key(key: str) -> str:
|
|
36
|
+
"""Mask an API key for display, showing only first 4 and last 4 chars."""
|
|
37
|
+
if len(key) <= 12:
|
|
38
|
+
return "*" * len(key)
|
|
39
|
+
return f"{key[:4]}...{key[-4:]}"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@main.command()
|
|
43
|
+
def init():
|
|
44
|
+
"""Initialize configuration with API keys, provider, and model settings."""
|
|
45
|
+
click.echo("Paper Fluff Cutter Configuration")
|
|
46
|
+
click.echo("=" * 40)
|
|
47
|
+
click.echo()
|
|
48
|
+
|
|
49
|
+
# Load existing configuration (includes env vars)
|
|
50
|
+
existing_config = load_config()
|
|
51
|
+
existing_openai_key = existing_config.get("openai_api_key")
|
|
52
|
+
existing_anthropic_key = existing_config.get("anthropic_api_key")
|
|
53
|
+
|
|
54
|
+
# Show current status
|
|
55
|
+
if existing_openai_key or existing_anthropic_key:
|
|
56
|
+
click.echo("Current configuration:")
|
|
57
|
+
if existing_openai_key:
|
|
58
|
+
click.echo(f" OpenAI API Key: {_mask_key(existing_openai_key)}")
|
|
59
|
+
if existing_anthropic_key:
|
|
60
|
+
click.echo(f" Anthropic API Key: {_mask_key(existing_anthropic_key)}")
|
|
61
|
+
click.echo()
|
|
62
|
+
|
|
63
|
+
config = {}
|
|
64
|
+
|
|
65
|
+
# OpenAI API Key
|
|
66
|
+
click.echo("Enter your API keys (press Enter to keep existing or skip):")
|
|
67
|
+
click.echo()
|
|
68
|
+
|
|
69
|
+
openai_prompt = "OpenAI API Key"
|
|
70
|
+
if existing_openai_key:
|
|
71
|
+
openai_prompt += f" [{_mask_key(existing_openai_key)}]"
|
|
72
|
+
|
|
73
|
+
openai_key = click.prompt(
|
|
74
|
+
openai_prompt,
|
|
75
|
+
default="",
|
|
76
|
+
hide_input=True,
|
|
77
|
+
show_default=False,
|
|
78
|
+
)
|
|
79
|
+
if openai_key:
|
|
80
|
+
config["openai_api_key"] = openai_key
|
|
81
|
+
click.echo(" ✓ OpenAI API key updated")
|
|
82
|
+
elif existing_openai_key:
|
|
83
|
+
config["openai_api_key"] = existing_openai_key
|
|
84
|
+
click.echo(" ✓ OpenAI API key kept")
|
|
85
|
+
|
|
86
|
+
# Anthropic API Key
|
|
87
|
+
anthropic_prompt = "Anthropic API Key"
|
|
88
|
+
if existing_anthropic_key:
|
|
89
|
+
anthropic_prompt += f" [{_mask_key(existing_anthropic_key)}]"
|
|
90
|
+
|
|
91
|
+
anthropic_key = click.prompt(
|
|
92
|
+
anthropic_prompt,
|
|
93
|
+
default="",
|
|
94
|
+
hide_input=True,
|
|
95
|
+
show_default=False,
|
|
96
|
+
)
|
|
97
|
+
if anthropic_key:
|
|
98
|
+
config["anthropic_api_key"] = anthropic_key
|
|
99
|
+
click.echo(" ✓ Anthropic API key updated")
|
|
100
|
+
elif existing_anthropic_key:
|
|
101
|
+
config["anthropic_api_key"] = existing_anthropic_key
|
|
102
|
+
click.echo(" ✓ Anthropic API key kept")
|
|
103
|
+
|
|
104
|
+
if not config:
|
|
105
|
+
click.echo()
|
|
106
|
+
click.echo("No API keys provided. Configuration not saved.")
|
|
107
|
+
click.echo("You can set keys via environment variables instead:")
|
|
108
|
+
click.echo(" export OPENAI_API_KEY=sk-...")
|
|
109
|
+
click.echo(" export ANTHROPIC_API_KEY=sk-ant-...")
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
# Default provider
|
|
113
|
+
click.echo()
|
|
114
|
+
available_providers = []
|
|
115
|
+
if "openai_api_key" in config:
|
|
116
|
+
available_providers.append("openai")
|
|
117
|
+
if "anthropic_api_key" in config:
|
|
118
|
+
available_providers.append("anthropic")
|
|
119
|
+
|
|
120
|
+
current_default = existing_config.get("default_provider")
|
|
121
|
+
if len(available_providers) > 1:
|
|
122
|
+
default_choice = current_default if current_default in available_providers else None
|
|
123
|
+
if not default_choice:
|
|
124
|
+
default_choice = "anthropic" if "anthropic" in available_providers else "openai"
|
|
125
|
+
default_provider = click.prompt(
|
|
126
|
+
"Default provider",
|
|
127
|
+
type=click.Choice(available_providers),
|
|
128
|
+
default=default_choice,
|
|
129
|
+
)
|
|
130
|
+
else:
|
|
131
|
+
default_provider = available_providers[0]
|
|
132
|
+
|
|
133
|
+
config["default_provider"] = default_provider
|
|
134
|
+
|
|
135
|
+
# Model configuration
|
|
136
|
+
click.echo()
|
|
137
|
+
click.echo("Configure default models (press Enter for provider defaults):")
|
|
138
|
+
click.echo()
|
|
139
|
+
|
|
140
|
+
if "openai_api_key" in config:
|
|
141
|
+
openai_default = OpenAIProvider(api_key="").default_model
|
|
142
|
+
current_openai_model = existing_config.get("openai_model", openai_default)
|
|
143
|
+
openai_model = click.prompt(
|
|
144
|
+
"OpenAI model",
|
|
145
|
+
default=current_openai_model,
|
|
146
|
+
show_default=True,
|
|
147
|
+
)
|
|
148
|
+
if openai_model != openai_default:
|
|
149
|
+
config["openai_model"] = openai_model
|
|
150
|
+
click.echo(f" ✓ OpenAI model set to: {openai_model}")
|
|
151
|
+
else:
|
|
152
|
+
click.echo(f" Using default: {openai_default}")
|
|
153
|
+
|
|
154
|
+
if "anthropic_api_key" in config:
|
|
155
|
+
anthropic_default = AnthropicProvider(api_key="").default_model
|
|
156
|
+
current_anthropic_model = existing_config.get("anthropic_model", anthropic_default)
|
|
157
|
+
anthropic_model = click.prompt(
|
|
158
|
+
"Anthropic model",
|
|
159
|
+
default=current_anthropic_model,
|
|
160
|
+
show_default=True,
|
|
161
|
+
)
|
|
162
|
+
if anthropic_model != anthropic_default:
|
|
163
|
+
config["anthropic_model"] = anthropic_model
|
|
164
|
+
click.echo(f" ✓ Anthropic model set to: {anthropic_model}")
|
|
165
|
+
else:
|
|
166
|
+
click.echo(f" Using default: {anthropic_default}")
|
|
167
|
+
|
|
168
|
+
# Save configuration
|
|
169
|
+
save_config(config)
|
|
170
|
+
|
|
171
|
+
click.echo()
|
|
172
|
+
click.echo(f"Configuration saved to: {get_config_path()}")
|
|
173
|
+
click.echo(f"Default provider: {default_provider}")
|
|
174
|
+
click.echo()
|
|
175
|
+
click.echo("You're ready to analyze papers!")
|
|
176
|
+
click.echo(" fluff-cutter analyze <paper.pdf>")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@main.command()
|
|
180
|
+
@click.argument("paper_path", type=click.Path(exists=True))
|
|
181
|
+
@click.option(
|
|
182
|
+
"-p",
|
|
183
|
+
"--provider",
|
|
184
|
+
type=click.Choice(["openai", "anthropic"]),
|
|
185
|
+
help="LLM provider to use",
|
|
186
|
+
)
|
|
187
|
+
@click.option(
|
|
188
|
+
"-m",
|
|
189
|
+
"--model",
|
|
190
|
+
help="Specific model to use (overrides provider default)",
|
|
191
|
+
)
|
|
192
|
+
@click.option(
|
|
193
|
+
"-o",
|
|
194
|
+
"--output",
|
|
195
|
+
type=click.Path(),
|
|
196
|
+
help="Save output to file instead of printing",
|
|
197
|
+
)
|
|
198
|
+
@click.option(
|
|
199
|
+
"--max-pages",
|
|
200
|
+
type=int,
|
|
201
|
+
default=None,
|
|
202
|
+
help=f"Maximum pages to analyze (default: auto-truncate at {DEFAULT_MAX_PAGES} if needed)",
|
|
203
|
+
)
|
|
204
|
+
def analyze(
|
|
205
|
+
paper_path: str,
|
|
206
|
+
provider: str | None,
|
|
207
|
+
model: str | None,
|
|
208
|
+
output: str | None,
|
|
209
|
+
max_pages: int | None,
|
|
210
|
+
):
|
|
211
|
+
"""Analyze an academic paper and extract its core value."""
|
|
212
|
+
# Check configuration
|
|
213
|
+
if not is_configured():
|
|
214
|
+
click.echo("Error: No API keys configured.", err=True)
|
|
215
|
+
click.echo("Run 'fluff-cutter init' to set up your API keys.", err=True)
|
|
216
|
+
click.echo("Or set environment variables:", err=True)
|
|
217
|
+
click.echo(" export OPENAI_API_KEY=sk-...", err=True)
|
|
218
|
+
click.echo(" export ANTHROPIC_API_KEY=sk-ant-...", err=True)
|
|
219
|
+
sys.exit(1)
|
|
220
|
+
|
|
221
|
+
# Load config and determine provider
|
|
222
|
+
config = load_config()
|
|
223
|
+
provider_name = provider or get_default_provider(config)
|
|
224
|
+
|
|
225
|
+
# Get API key for the selected provider
|
|
226
|
+
api_key = get_api_key(provider_name, config)
|
|
227
|
+
if not api_key:
|
|
228
|
+
click.echo(f"Error: No API key configured for {provider_name}.", err=True)
|
|
229
|
+
click.echo(f"Run 'fluff-cutter init' or set {provider_name.upper()}_API_KEY.", err=True)
|
|
230
|
+
sys.exit(1)
|
|
231
|
+
|
|
232
|
+
# Get model: CLI option > config file > provider default
|
|
233
|
+
model_to_use = model or get_default_model(provider_name, config)
|
|
234
|
+
|
|
235
|
+
# Create provider instance
|
|
236
|
+
provider_class = PROVIDERS[provider_name]
|
|
237
|
+
llm_provider = provider_class(api_key=api_key, model=model_to_use)
|
|
238
|
+
|
|
239
|
+
click.echo(f"Analyzing paper: {paper_path}")
|
|
240
|
+
click.echo(f"Using: {llm_provider.get_model_info()}")
|
|
241
|
+
click.echo()
|
|
242
|
+
|
|
243
|
+
# Read PDF
|
|
244
|
+
click.echo("Reading PDF...")
|
|
245
|
+
try:
|
|
246
|
+
pdf_base64, total_pages, was_truncated = read_pdf_as_base64(paper_path, max_pages)
|
|
247
|
+
filename = get_pdf_filename(paper_path)
|
|
248
|
+
if was_truncated:
|
|
249
|
+
click.echo(f" PDF truncated: analyzing first {max_pages} of {total_pages} pages")
|
|
250
|
+
else:
|
|
251
|
+
click.echo(f" PDF loaded successfully ({total_pages} pages)")
|
|
252
|
+
except Exception as e:
|
|
253
|
+
click.echo(f"Error reading PDF: {e}", err=True)
|
|
254
|
+
sys.exit(1)
|
|
255
|
+
|
|
256
|
+
# Analyze the paper (with auto-retry on token limit)
|
|
257
|
+
click.echo("Analyzing paper (this may take a minute)...")
|
|
258
|
+
try:
|
|
259
|
+
result = analyze_paper(llm_provider, pdf_base64, filename)
|
|
260
|
+
except Exception as e:
|
|
261
|
+
error_msg = str(e)
|
|
262
|
+
# Check if it's a token limit error and we haven't already truncated
|
|
263
|
+
if "too long" in error_msg.lower() and "token" in error_msg.lower() and not was_truncated:
|
|
264
|
+
click.echo()
|
|
265
|
+
click.echo(
|
|
266
|
+
f" Paper exceeds token limit. Auto-truncating to {DEFAULT_MAX_PAGES} pages...",
|
|
267
|
+
err=True,
|
|
268
|
+
)
|
|
269
|
+
try:
|
|
270
|
+
pdf_base64, total_pages, _ = read_pdf_as_base64(paper_path, DEFAULT_MAX_PAGES)
|
|
271
|
+
click.echo(f" Retrying with first {DEFAULT_MAX_PAGES} of {total_pages} pages...")
|
|
272
|
+
result = analyze_paper(llm_provider, pdf_base64, filename)
|
|
273
|
+
except Exception as retry_error:
|
|
274
|
+
click.echo(f"Error during analysis: {retry_error}", err=True)
|
|
275
|
+
sys.exit(1)
|
|
276
|
+
else:
|
|
277
|
+
click.echo(f"Error during analysis: {e}", err=True)
|
|
278
|
+
sys.exit(1)
|
|
279
|
+
|
|
280
|
+
click.echo()
|
|
281
|
+
|
|
282
|
+
# Output results
|
|
283
|
+
if output:
|
|
284
|
+
save_analysis(result["title"], result["analysis"], result["model_info"], output)
|
|
285
|
+
click.echo(f"Analysis saved to: {output}")
|
|
286
|
+
else:
|
|
287
|
+
print_analysis(result["title"], result["analysis"], result["model_info"])
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
if __name__ == "__main__":
|
|
291
|
+
main()
|
|
@@ -70,6 +70,12 @@ def load_config() -> dict[str, Any]:
|
|
|
70
70
|
if os.environ.get("FLUFF_CUTTER_PROVIDER"):
|
|
71
71
|
config["default_provider"] = os.environ["FLUFF_CUTTER_PROVIDER"]
|
|
72
72
|
|
|
73
|
+
if os.environ.get("FLUFF_CUTTER_OPENAI_MODEL"):
|
|
74
|
+
config["openai_model"] = os.environ["FLUFF_CUTTER_OPENAI_MODEL"]
|
|
75
|
+
|
|
76
|
+
if os.environ.get("FLUFF_CUTTER_ANTHROPIC_MODEL"):
|
|
77
|
+
config["anthropic_model"] = os.environ["FLUFF_CUTTER_ANTHROPIC_MODEL"]
|
|
78
|
+
|
|
73
79
|
return config
|
|
74
80
|
|
|
75
81
|
|
|
@@ -107,6 +113,24 @@ def get_default_provider(config: dict[str, Any] | None = None) -> str:
|
|
|
107
113
|
return config.get("default_provider", DEFAULT_PROVIDER)
|
|
108
114
|
|
|
109
115
|
|
|
116
|
+
def get_default_model(provider: str, config: dict[str, Any] | None = None) -> str | None:
|
|
117
|
+
"""
|
|
118
|
+
Get the configured default model for a provider.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
provider: The provider name ('openai' or 'anthropic').
|
|
122
|
+
config: Optional pre-loaded config. If None, loads config.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
The configured model name, or None to use provider default.
|
|
126
|
+
"""
|
|
127
|
+
if config is None:
|
|
128
|
+
config = load_config()
|
|
129
|
+
|
|
130
|
+
model_key = f"{provider}_model"
|
|
131
|
+
return config.get(model_key)
|
|
132
|
+
|
|
133
|
+
|
|
110
134
|
def is_configured() -> bool:
|
|
111
135
|
"""
|
|
112
136
|
Check if at least one provider is configured.
|
|
@@ -39,9 +39,7 @@ def print_analysis(title: str, analysis: str, model_info: str) -> None:
|
|
|
39
39
|
print(format_analysis(title, analysis, model_info))
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
def save_analysis(
|
|
43
|
-
title: str, analysis: str, model_info: str, output_path: str
|
|
44
|
-
) -> None:
|
|
42
|
+
def save_analysis(title: str, analysis: str, model_info: str, output_path: str) -> None:
|
|
45
43
|
"""
|
|
46
44
|
Save the formatted analysis to a file.
|
|
47
45
|
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""PDF handling for LLM analysis."""
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import io
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from pypdf import PdfReader, PdfWriter
|
|
8
|
+
|
|
9
|
+
# Default max pages when auto-truncating (roughly ~150K tokens for most papers)
|
|
10
|
+
DEFAULT_MAX_PAGES = 50
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_pdf_page_count(pdf_path: str | Path) -> int:
|
|
14
|
+
"""
|
|
15
|
+
Get the number of pages in a PDF file.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
pdf_path: Path to the PDF file.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Number of pages in the PDF.
|
|
22
|
+
"""
|
|
23
|
+
pdf_path = Path(pdf_path)
|
|
24
|
+
reader = PdfReader(pdf_path)
|
|
25
|
+
return len(reader.pages)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def truncate_pdf(pdf_path: str | Path, max_pages: int) -> bytes:
|
|
29
|
+
"""
|
|
30
|
+
Read a PDF and return only the first N pages as bytes.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
pdf_path: Path to the PDF file.
|
|
34
|
+
max_pages: Maximum number of pages to include.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
PDF data as bytes containing only the first max_pages pages.
|
|
38
|
+
"""
|
|
39
|
+
pdf_path = Path(pdf_path)
|
|
40
|
+
reader = PdfReader(pdf_path)
|
|
41
|
+
|
|
42
|
+
writer = PdfWriter()
|
|
43
|
+
for i in range(min(max_pages, len(reader.pages))):
|
|
44
|
+
writer.add_page(reader.pages[i])
|
|
45
|
+
|
|
46
|
+
output = io.BytesIO()
|
|
47
|
+
writer.write(output)
|
|
48
|
+
return output.getvalue()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def read_pdf_as_base64(
|
|
52
|
+
pdf_path: str | Path,
|
|
53
|
+
max_pages: int | None = None,
|
|
54
|
+
) -> tuple[str, int, bool]:
|
|
55
|
+
"""
|
|
56
|
+
Read a PDF file and encode it as base64, optionally truncating to max pages.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
pdf_path: Path to the PDF file.
|
|
60
|
+
max_pages: Maximum number of pages to include. If None, includes all pages.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Tuple of (base64-encoded PDF data, total page count, was_truncated).
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
FileNotFoundError: If the PDF file doesn't exist.
|
|
67
|
+
ValueError: If the file is not a PDF.
|
|
68
|
+
"""
|
|
69
|
+
pdf_path = Path(pdf_path)
|
|
70
|
+
|
|
71
|
+
if not pdf_path.exists():
|
|
72
|
+
raise FileNotFoundError(f"PDF file not found: {pdf_path}")
|
|
73
|
+
|
|
74
|
+
if pdf_path.suffix.lower() != ".pdf":
|
|
75
|
+
raise ValueError(f"File is not a PDF: {pdf_path}")
|
|
76
|
+
|
|
77
|
+
total_pages = get_pdf_page_count(pdf_path)
|
|
78
|
+
was_truncated = False
|
|
79
|
+
|
|
80
|
+
if max_pages is not None and total_pages > max_pages:
|
|
81
|
+
pdf_data = truncate_pdf(pdf_path, max_pages)
|
|
82
|
+
was_truncated = True
|
|
83
|
+
else:
|
|
84
|
+
with open(pdf_path, "rb") as f:
|
|
85
|
+
pdf_data = f.read()
|
|
86
|
+
|
|
87
|
+
return base64.standard_b64encode(pdf_data).decode("utf-8"), total_pages, was_truncated
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_pdf_filename(pdf_path: str | Path) -> str:
|
|
91
|
+
"""
|
|
92
|
+
Get the filename from a PDF path.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
pdf_path: Path to the PDF file.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
The filename.
|
|
99
|
+
"""
|
|
100
|
+
return Path(pdf_path).name
|