fluff-cutter 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {fluff_cutter-0.1.0/src/fluff_cutter.egg-info → fluff_cutter-0.2.0}/PKG-INFO +30 -10
  2. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/README.md +28 -9
  3. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/pyproject.toml +13 -1
  4. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/__init__.py +1 -1
  5. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/analyzer.py +2 -1
  6. fluff_cutter-0.2.0/src/fluff_cutter/cli.py +291 -0
  7. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/config.py +24 -0
  8. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/output.py +1 -3
  9. fluff_cutter-0.2.0/src/fluff_cutter/pdf.py +100 -0
  10. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/providers/anthropic.py +1 -1
  11. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/providers/openai.py +2 -2
  12. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0/src/fluff_cutter.egg-info}/PKG-INFO +30 -10
  13. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/SOURCES.txt +6 -1
  14. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/requires.txt +1 -0
  15. fluff_cutter-0.2.0/tests/test_analyzer.py +84 -0
  16. fluff_cutter-0.2.0/tests/test_config.py +195 -0
  17. fluff_cutter-0.2.0/tests/test_output.py +86 -0
  18. fluff_cutter-0.2.0/tests/test_pdf.py +170 -0
  19. fluff_cutter-0.2.0/tests/test_providers.py +93 -0
  20. fluff_cutter-0.1.0/src/fluff_cutter/cli.py +0 -185
  21. fluff_cutter-0.1.0/src/fluff_cutter/pdf.py +0 -45
  22. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/LICENSE +0 -0
  23. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/setup.cfg +0 -0
  24. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/providers/__init__.py +1 -1
  25. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter/providers/base.py +0 -0
  26. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/dependency_links.txt +0 -0
  27. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/entry_points.txt +0 -0
  28. {fluff_cutter-0.1.0 → fluff_cutter-0.2.0}/src/fluff_cutter.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fluff-cutter
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: A CLI tool to analyze academic papers and extract their core value
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -9,6 +9,7 @@ Requires-Dist: click>=8.0
9
9
  Requires-Dist: openai>=1.0
10
10
  Requires-Dist: anthropic>=0.18
11
11
  Requires-Dist: python-dotenv>=1.0
12
+ Requires-Dist: pypdf>=4.0
12
13
  Provides-Extra: dev
13
14
  Requires-Dist: pytest>=7.0; extra == "dev"
14
15
  Requires-Dist: ruff>=0.1.0; extra == "dev"
@@ -16,6 +17,8 @@ Dynamic: license-file
16
17
 
17
18
  # Paper Fluff Cutter
18
19
 
20
+ [![PyPI version](https://badge.fury.io/py/fluff-cutter.svg)](https://pypi.org/project/fluff-cutter/)
21
+
19
22
  A CLI tool that cuts through academic paper fluff to extract what actually matters.
20
23
 
21
24
  Most research has close to zero value. This tool uses multimodal LLMs to analyze papers and answer the three questions every paper should be able to answer:
@@ -26,13 +29,17 @@ Most research has close to zero value. This tool uses multimodal LLMs to analyze
26
29
 
27
30
  ## Installation
28
31
 
29
- ### Prerequisites
32
+ ```bash
33
+ pip install fluff-cutter
34
+ ```
30
35
 
31
- - Python 3.10+
36
+ Requires Python 3.10+.
32
37
 
33
- ### Install the tool
38
+ ### Development install
34
39
 
35
40
  ```bash
41
+ git clone https://github.com/weijianzhg/paper-fluff-cutter.git
42
+ cd paper-fluff-cutter
36
43
  pip install -e .
37
44
  ```
38
45
 
@@ -44,7 +51,7 @@ pip install -e .
44
51
  fluff-cutter init
45
52
  ```
46
53
 
47
- This will prompt you for your API keys and save them to `~/.config/fluff-cutter/config.json`.
54
+ This will prompt you for your API keys, default provider, and model preferences, then save them to `~/.config/fluff-cutter/config.json`.
48
55
 
49
56
  ### Option 2: Environment variables
50
57
 
@@ -52,6 +59,8 @@ This will prompt you for your API keys and save them to `~/.config/fluff-cutter/
52
59
  export OPENAI_API_KEY=sk-your-key-here
53
60
  export ANTHROPIC_API_KEY=sk-ant-your-key-here
54
61
  export FLUFF_CUTTER_PROVIDER=anthropic # optional, default provider
62
+ export FLUFF_CUTTER_OPENAI_MODEL=gpt-5.2 # optional, override default model
63
+ export FLUFF_CUTTER_ANTHROPIC_MODEL=claude-sonnet-4-5 # optional, override default model
55
64
  ```
56
65
 
57
66
  ## Usage
@@ -72,8 +81,8 @@ fluff-cutter analyze paper.pdf --provider anthropic
72
81
  ### Specify model
73
82
 
74
83
  ```bash
75
- fluff-cutter analyze paper.pdf --provider openai --model gpt-4o
76
- fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-20250514
84
+ fluff-cutter analyze paper.pdf --provider openai --model gpt-5.2
85
+ fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-5
77
86
  ```
78
87
 
79
88
  ### Save output to file
@@ -82,12 +91,22 @@ fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-2025
82
91
  fluff-cutter analyze paper.pdf --output analysis.md
83
92
  ```
84
93
 
94
+ ### Long papers
95
+
96
+ For very long papers that exceed the model's token limit, you can limit the number of pages:
97
+
98
+ ```bash
99
+ fluff-cutter analyze paper.pdf --max-pages 30
100
+ ```
101
+
102
+ If you don't specify `--max-pages` and the paper exceeds the token limit, it will automatically truncate to the first 50 pages and retry.
103
+
85
104
  ## Supported Providers
86
105
 
87
106
  | Provider | Default Model | Environment Variable |
88
107
  |----------|---------------|---------------------|
89
- | OpenAI | gpt-4o | `OPENAI_API_KEY` |
90
- | Anthropic | claude-sonnet-4-20250514 | `ANTHROPIC_API_KEY` |
108
+ | OpenAI | gpt-5.2 | `OPENAI_API_KEY` |
109
+ | Anthropic | claude-sonnet-4-5 | `ANTHROPIC_API_KEY` |
91
110
 
92
111
  Both providers now support native PDF input - no external dependencies like poppler needed.
93
112
 
@@ -96,8 +115,9 @@ Both providers now support native PDF input - no external dependencies like popp
96
115
  Configuration is loaded with the following precedence (highest to lowest):
97
116
 
98
117
  1. Command-line arguments (`--provider`, `--model`)
99
- 2. Environment variables
118
+ 2. Environment variables (`FLUFF_CUTTER_PROVIDER`, `FLUFF_CUTTER_OPENAI_MODEL`, `FLUFF_CUTTER_ANTHROPIC_MODEL`)
100
119
  3. Config file (`~/.config/fluff-cutter/config.json`)
120
+ 4. Provider defaults (gpt-5.2 for OpenAI, claude-sonnet-4-5 for Anthropic)
101
121
 
102
122
  ## License
103
123
 
@@ -1,5 +1,7 @@
1
1
  # Paper Fluff Cutter
2
2
 
3
+ [![PyPI version](https://badge.fury.io/py/fluff-cutter.svg)](https://pypi.org/project/fluff-cutter/)
4
+
3
5
  A CLI tool that cuts through academic paper fluff to extract what actually matters.
4
6
 
5
7
  Most research has close to zero value. This tool uses multimodal LLMs to analyze papers and answer the three questions every paper should be able to answer:
@@ -10,13 +12,17 @@ Most research has close to zero value. This tool uses multimodal LLMs to analyze
10
12
 
11
13
  ## Installation
12
14
 
13
- ### Prerequisites
15
+ ```bash
16
+ pip install fluff-cutter
17
+ ```
14
18
 
15
- - Python 3.10+
19
+ Requires Python 3.10+.
16
20
 
17
- ### Install the tool
21
+ ### Development install
18
22
 
19
23
  ```bash
24
+ git clone https://github.com/weijianzhg/paper-fluff-cutter.git
25
+ cd paper-fluff-cutter
20
26
  pip install -e .
21
27
  ```
22
28
 
@@ -28,7 +34,7 @@ pip install -e .
28
34
  fluff-cutter init
29
35
  ```
30
36
 
31
- This will prompt you for your API keys and save them to `~/.config/fluff-cutter/config.json`.
37
+ This will prompt you for your API keys, default provider, and model preferences, then save them to `~/.config/fluff-cutter/config.json`.
32
38
 
33
39
  ### Option 2: Environment variables
34
40
 
@@ -36,6 +42,8 @@ This will prompt you for your API keys and save them to `~/.config/fluff-cutter/
36
42
  export OPENAI_API_KEY=sk-your-key-here
37
43
  export ANTHROPIC_API_KEY=sk-ant-your-key-here
38
44
  export FLUFF_CUTTER_PROVIDER=anthropic # optional, default provider
45
+ export FLUFF_CUTTER_OPENAI_MODEL=gpt-5.2 # optional, override default model
46
+ export FLUFF_CUTTER_ANTHROPIC_MODEL=claude-sonnet-4-5 # optional, override default model
39
47
  ```
40
48
 
41
49
  ## Usage
@@ -56,8 +64,8 @@ fluff-cutter analyze paper.pdf --provider anthropic
56
64
  ### Specify model
57
65
 
58
66
  ```bash
59
- fluff-cutter analyze paper.pdf --provider openai --model gpt-4o
60
- fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-20250514
67
+ fluff-cutter analyze paper.pdf --provider openai --model gpt-5.2
68
+ fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-5
61
69
  ```
62
70
 
63
71
  ### Save output to file
@@ -66,12 +74,22 @@ fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-2025
66
74
  fluff-cutter analyze paper.pdf --output analysis.md
67
75
  ```
68
76
 
77
+ ### Long papers
78
+
79
+ For very long papers that exceed the model's token limit, you can limit the number of pages:
80
+
81
+ ```bash
82
+ fluff-cutter analyze paper.pdf --max-pages 30
83
+ ```
84
+
85
+ If you don't specify `--max-pages` and the paper exceeds the token limit, it will automatically truncate to the first 50 pages and retry.
86
+
69
87
  ## Supported Providers
70
88
 
71
89
  | Provider | Default Model | Environment Variable |
72
90
  |----------|---------------|---------------------|
73
- | OpenAI | gpt-4o | `OPENAI_API_KEY` |
74
- | Anthropic | claude-sonnet-4-20250514 | `ANTHROPIC_API_KEY` |
91
+ | OpenAI | gpt-5.2 | `OPENAI_API_KEY` |
92
+ | Anthropic | claude-sonnet-4-5 | `ANTHROPIC_API_KEY` |
75
93
 
76
94
  Both providers now support native PDF input - no external dependencies like poppler needed.
77
95
 
@@ -80,8 +98,9 @@ Both providers now support native PDF input - no external dependencies like popp
80
98
  Configuration is loaded with the following precedence (highest to lowest):
81
99
 
82
100
  1. Command-line arguments (`--provider`, `--model`)
83
- 2. Environment variables
101
+ 2. Environment variables (`FLUFF_CUTTER_PROVIDER`, `FLUFF_CUTTER_OPENAI_MODEL`, `FLUFF_CUTTER_ANTHROPIC_MODEL`)
84
102
  3. Config file (`~/.config/fluff-cutter/config.json`)
103
+ 4. Provider defaults (gpt-5.2 for OpenAI, claude-sonnet-4-5 for Anthropic)
85
104
 
86
105
  ## License
87
106
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "fluff-cutter"
7
- version = "0.1.0"
7
+ version = "0.2.0"
8
8
  description = "A CLI tool to analyze academic papers and extract their core value"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -13,6 +13,7 @@ dependencies = [
13
13
  "openai>=1.0",
14
14
  "anthropic>=0.18",
15
15
  "python-dotenv>=1.0",
16
+ "pypdf>=4.0",
16
17
  ]
17
18
 
18
19
  [project.scripts]
@@ -26,3 +27,14 @@ dev = [
26
27
 
27
28
  [tool.setuptools.packages.find]
28
29
  where = ["src"]
30
+
31
+ [tool.pytest.ini_options]
32
+ testpaths = ["tests"]
33
+ pythonpath = ["src"]
34
+
35
+ [tool.ruff]
36
+ target-version = "py310"
37
+ line-length = 100
38
+
39
+ [tool.ruff.lint]
40
+ select = ["E", "F", "I", "W"]
@@ -1,3 +1,3 @@
1
1
  """Paper Fluff Cutter - Extract the core value from academic papers."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.2.0"
@@ -2,7 +2,8 @@
2
2
 
3
3
  from .providers.base import BaseLLMProvider
4
4
 
5
- ANALYSIS_PROMPT = """You are analyzing an academic paper. Your job is to cut through all the fluff and extract only what matters.
5
+ ANALYSIS_PROMPT = """You are analyzing an academic paper. Your job is to cut through \
6
+ all the fluff and extract only what matters.
6
7
 
7
8
  Answer these three questions concisely and critically:
8
9
 
@@ -0,0 +1,291 @@
1
+ """Command-line interface for Paper Fluff Cutter."""
2
+
3
+ import sys
4
+
5
+ import click
6
+
7
+ from . import __version__
8
+ from .analyzer import analyze_paper
9
+ from .config import (
10
+ get_api_key,
11
+ get_config_path,
12
+ get_default_model,
13
+ get_default_provider,
14
+ is_configured,
15
+ load_config,
16
+ save_config,
17
+ )
18
+ from .output import print_analysis, save_analysis
19
+ from .pdf import DEFAULT_MAX_PAGES, get_pdf_filename, read_pdf_as_base64
20
+ from .providers import AnthropicProvider, OpenAIProvider
21
+
22
+ PROVIDERS = {
23
+ "openai": OpenAIProvider,
24
+ "anthropic": AnthropicProvider,
25
+ }
26
+
27
+
28
+ @click.group()
29
+ @click.version_option(version=__version__)
30
+ def main():
31
+ """Paper Fluff Cutter - Extract the core value from academic papers."""
32
+ pass
33
+
34
+
35
+ def _mask_key(key: str) -> str:
36
+ """Mask an API key for display, showing only first 4 and last 4 chars."""
37
+ if len(key) <= 12:
38
+ return "*" * len(key)
39
+ return f"{key[:4]}...{key[-4:]}"
40
+
41
+
42
+ @main.command()
43
+ def init():
44
+ """Initialize configuration with API keys, provider, and model settings."""
45
+ click.echo("Paper Fluff Cutter Configuration")
46
+ click.echo("=" * 40)
47
+ click.echo()
48
+
49
+ # Load existing configuration (includes env vars)
50
+ existing_config = load_config()
51
+ existing_openai_key = existing_config.get("openai_api_key")
52
+ existing_anthropic_key = existing_config.get("anthropic_api_key")
53
+
54
+ # Show current status
55
+ if existing_openai_key or existing_anthropic_key:
56
+ click.echo("Current configuration:")
57
+ if existing_openai_key:
58
+ click.echo(f" OpenAI API Key: {_mask_key(existing_openai_key)}")
59
+ if existing_anthropic_key:
60
+ click.echo(f" Anthropic API Key: {_mask_key(existing_anthropic_key)}")
61
+ click.echo()
62
+
63
+ config = {}
64
+
65
+ # OpenAI API Key
66
+ click.echo("Enter your API keys (press Enter to keep existing or skip):")
67
+ click.echo()
68
+
69
+ openai_prompt = "OpenAI API Key"
70
+ if existing_openai_key:
71
+ openai_prompt += f" [{_mask_key(existing_openai_key)}]"
72
+
73
+ openai_key = click.prompt(
74
+ openai_prompt,
75
+ default="",
76
+ hide_input=True,
77
+ show_default=False,
78
+ )
79
+ if openai_key:
80
+ config["openai_api_key"] = openai_key
81
+ click.echo(" ✓ OpenAI API key updated")
82
+ elif existing_openai_key:
83
+ config["openai_api_key"] = existing_openai_key
84
+ click.echo(" ✓ OpenAI API key kept")
85
+
86
+ # Anthropic API Key
87
+ anthropic_prompt = "Anthropic API Key"
88
+ if existing_anthropic_key:
89
+ anthropic_prompt += f" [{_mask_key(existing_anthropic_key)}]"
90
+
91
+ anthropic_key = click.prompt(
92
+ anthropic_prompt,
93
+ default="",
94
+ hide_input=True,
95
+ show_default=False,
96
+ )
97
+ if anthropic_key:
98
+ config["anthropic_api_key"] = anthropic_key
99
+ click.echo(" ✓ Anthropic API key updated")
100
+ elif existing_anthropic_key:
101
+ config["anthropic_api_key"] = existing_anthropic_key
102
+ click.echo(" ✓ Anthropic API key kept")
103
+
104
+ if not config:
105
+ click.echo()
106
+ click.echo("No API keys provided. Configuration not saved.")
107
+ click.echo("You can set keys via environment variables instead:")
108
+ click.echo(" export OPENAI_API_KEY=sk-...")
109
+ click.echo(" export ANTHROPIC_API_KEY=sk-ant-...")
110
+ return
111
+
112
+ # Default provider
113
+ click.echo()
114
+ available_providers = []
115
+ if "openai_api_key" in config:
116
+ available_providers.append("openai")
117
+ if "anthropic_api_key" in config:
118
+ available_providers.append("anthropic")
119
+
120
+ current_default = existing_config.get("default_provider")
121
+ if len(available_providers) > 1:
122
+ default_choice = current_default if current_default in available_providers else None
123
+ if not default_choice:
124
+ default_choice = "anthropic" if "anthropic" in available_providers else "openai"
125
+ default_provider = click.prompt(
126
+ "Default provider",
127
+ type=click.Choice(available_providers),
128
+ default=default_choice,
129
+ )
130
+ else:
131
+ default_provider = available_providers[0]
132
+
133
+ config["default_provider"] = default_provider
134
+
135
+ # Model configuration
136
+ click.echo()
137
+ click.echo("Configure default models (press Enter for provider defaults):")
138
+ click.echo()
139
+
140
+ if "openai_api_key" in config:
141
+ openai_default = OpenAIProvider(api_key="").default_model
142
+ current_openai_model = existing_config.get("openai_model", openai_default)
143
+ openai_model = click.prompt(
144
+ "OpenAI model",
145
+ default=current_openai_model,
146
+ show_default=True,
147
+ )
148
+ if openai_model != openai_default:
149
+ config["openai_model"] = openai_model
150
+ click.echo(f" ✓ OpenAI model set to: {openai_model}")
151
+ else:
152
+ click.echo(f" Using default: {openai_default}")
153
+
154
+ if "anthropic_api_key" in config:
155
+ anthropic_default = AnthropicProvider(api_key="").default_model
156
+ current_anthropic_model = existing_config.get("anthropic_model", anthropic_default)
157
+ anthropic_model = click.prompt(
158
+ "Anthropic model",
159
+ default=current_anthropic_model,
160
+ show_default=True,
161
+ )
162
+ if anthropic_model != anthropic_default:
163
+ config["anthropic_model"] = anthropic_model
164
+ click.echo(f" ✓ Anthropic model set to: {anthropic_model}")
165
+ else:
166
+ click.echo(f" Using default: {anthropic_default}")
167
+
168
+ # Save configuration
169
+ save_config(config)
170
+
171
+ click.echo()
172
+ click.echo(f"Configuration saved to: {get_config_path()}")
173
+ click.echo(f"Default provider: {default_provider}")
174
+ click.echo()
175
+ click.echo("You're ready to analyze papers!")
176
+ click.echo(" fluff-cutter analyze <paper.pdf>")
177
+
178
+
179
+ @main.command()
180
+ @click.argument("paper_path", type=click.Path(exists=True))
181
+ @click.option(
182
+ "-p",
183
+ "--provider",
184
+ type=click.Choice(["openai", "anthropic"]),
185
+ help="LLM provider to use",
186
+ )
187
+ @click.option(
188
+ "-m",
189
+ "--model",
190
+ help="Specific model to use (overrides provider default)",
191
+ )
192
+ @click.option(
193
+ "-o",
194
+ "--output",
195
+ type=click.Path(),
196
+ help="Save output to file instead of printing",
197
+ )
198
+ @click.option(
199
+ "--max-pages",
200
+ type=int,
201
+ default=None,
202
+ help=f"Maximum pages to analyze (default: auto-truncate at {DEFAULT_MAX_PAGES} if needed)",
203
+ )
204
+ def analyze(
205
+ paper_path: str,
206
+ provider: str | None,
207
+ model: str | None,
208
+ output: str | None,
209
+ max_pages: int | None,
210
+ ):
211
+ """Analyze an academic paper and extract its core value."""
212
+ # Check configuration
213
+ if not is_configured():
214
+ click.echo("Error: No API keys configured.", err=True)
215
+ click.echo("Run 'fluff-cutter init' to set up your API keys.", err=True)
216
+ click.echo("Or set environment variables:", err=True)
217
+ click.echo(" export OPENAI_API_KEY=sk-...", err=True)
218
+ click.echo(" export ANTHROPIC_API_KEY=sk-ant-...", err=True)
219
+ sys.exit(1)
220
+
221
+ # Load config and determine provider
222
+ config = load_config()
223
+ provider_name = provider or get_default_provider(config)
224
+
225
+ # Get API key for the selected provider
226
+ api_key = get_api_key(provider_name, config)
227
+ if not api_key:
228
+ click.echo(f"Error: No API key configured for {provider_name}.", err=True)
229
+ click.echo(f"Run 'fluff-cutter init' or set {provider_name.upper()}_API_KEY.", err=True)
230
+ sys.exit(1)
231
+
232
+ # Get model: CLI option > config file > provider default
233
+ model_to_use = model or get_default_model(provider_name, config)
234
+
235
+ # Create provider instance
236
+ provider_class = PROVIDERS[provider_name]
237
+ llm_provider = provider_class(api_key=api_key, model=model_to_use)
238
+
239
+ click.echo(f"Analyzing paper: {paper_path}")
240
+ click.echo(f"Using: {llm_provider.get_model_info()}")
241
+ click.echo()
242
+
243
+ # Read PDF
244
+ click.echo("Reading PDF...")
245
+ try:
246
+ pdf_base64, total_pages, was_truncated = read_pdf_as_base64(paper_path, max_pages)
247
+ filename = get_pdf_filename(paper_path)
248
+ if was_truncated:
249
+ click.echo(f" PDF truncated: analyzing first {max_pages} of {total_pages} pages")
250
+ else:
251
+ click.echo(f" PDF loaded successfully ({total_pages} pages)")
252
+ except Exception as e:
253
+ click.echo(f"Error reading PDF: {e}", err=True)
254
+ sys.exit(1)
255
+
256
+ # Analyze the paper (with auto-retry on token limit)
257
+ click.echo("Analyzing paper (this may take a minute)...")
258
+ try:
259
+ result = analyze_paper(llm_provider, pdf_base64, filename)
260
+ except Exception as e:
261
+ error_msg = str(e)
262
+ # Check if it's a token limit error and we haven't already truncated
263
+ if "too long" in error_msg.lower() and "token" in error_msg.lower() and not was_truncated:
264
+ click.echo()
265
+ click.echo(
266
+ f" Paper exceeds token limit. Auto-truncating to {DEFAULT_MAX_PAGES} pages...",
267
+ err=True,
268
+ )
269
+ try:
270
+ pdf_base64, total_pages, _ = read_pdf_as_base64(paper_path, DEFAULT_MAX_PAGES)
271
+ click.echo(f" Retrying with first {DEFAULT_MAX_PAGES} of {total_pages} pages...")
272
+ result = analyze_paper(llm_provider, pdf_base64, filename)
273
+ except Exception as retry_error:
274
+ click.echo(f"Error during analysis: {retry_error}", err=True)
275
+ sys.exit(1)
276
+ else:
277
+ click.echo(f"Error during analysis: {e}", err=True)
278
+ sys.exit(1)
279
+
280
+ click.echo()
281
+
282
+ # Output results
283
+ if output:
284
+ save_analysis(result["title"], result["analysis"], result["model_info"], output)
285
+ click.echo(f"Analysis saved to: {output}")
286
+ else:
287
+ print_analysis(result["title"], result["analysis"], result["model_info"])
288
+
289
+
290
+ if __name__ == "__main__":
291
+ main()
@@ -70,6 +70,12 @@ def load_config() -> dict[str, Any]:
70
70
  if os.environ.get("FLUFF_CUTTER_PROVIDER"):
71
71
  config["default_provider"] = os.environ["FLUFF_CUTTER_PROVIDER"]
72
72
 
73
+ if os.environ.get("FLUFF_CUTTER_OPENAI_MODEL"):
74
+ config["openai_model"] = os.environ["FLUFF_CUTTER_OPENAI_MODEL"]
75
+
76
+ if os.environ.get("FLUFF_CUTTER_ANTHROPIC_MODEL"):
77
+ config["anthropic_model"] = os.environ["FLUFF_CUTTER_ANTHROPIC_MODEL"]
78
+
73
79
  return config
74
80
 
75
81
 
@@ -107,6 +113,24 @@ def get_default_provider(config: dict[str, Any] | None = None) -> str:
107
113
  return config.get("default_provider", DEFAULT_PROVIDER)
108
114
 
109
115
 
116
+ def get_default_model(provider: str, config: dict[str, Any] | None = None) -> str | None:
117
+ """
118
+ Get the configured default model for a provider.
119
+
120
+ Args:
121
+ provider: The provider name ('openai' or 'anthropic').
122
+ config: Optional pre-loaded config. If None, loads config.
123
+
124
+ Returns:
125
+ The configured model name, or None to use provider default.
126
+ """
127
+ if config is None:
128
+ config = load_config()
129
+
130
+ model_key = f"{provider}_model"
131
+ return config.get(model_key)
132
+
133
+
110
134
  def is_configured() -> bool:
111
135
  """
112
136
  Check if at least one provider is configured.
@@ -39,9 +39,7 @@ def print_analysis(title: str, analysis: str, model_info: str) -> None:
39
39
  print(format_analysis(title, analysis, model_info))
40
40
 
41
41
 
42
- def save_analysis(
43
- title: str, analysis: str, model_info: str, output_path: str
44
- ) -> None:
42
+ def save_analysis(title: str, analysis: str, model_info: str, output_path: str) -> None:
45
43
  """
46
44
  Save the formatted analysis to a file.
47
45
 
@@ -0,0 +1,100 @@
1
+ """PDF handling for LLM analysis."""
2
+
3
+ import base64
4
+ import io
5
+ from pathlib import Path
6
+
7
+ from pypdf import PdfReader, PdfWriter
8
+
9
+ # Default max pages when auto-truncating (roughly ~150K tokens for most papers)
10
+ DEFAULT_MAX_PAGES = 50
11
+
12
+
13
+ def get_pdf_page_count(pdf_path: str | Path) -> int:
14
+ """
15
+ Get the number of pages in a PDF file.
16
+
17
+ Args:
18
+ pdf_path: Path to the PDF file.
19
+
20
+ Returns:
21
+ Number of pages in the PDF.
22
+ """
23
+ pdf_path = Path(pdf_path)
24
+ reader = PdfReader(pdf_path)
25
+ return len(reader.pages)
26
+
27
+
28
+ def truncate_pdf(pdf_path: str | Path, max_pages: int) -> bytes:
29
+ """
30
+ Read a PDF and return only the first N pages as bytes.
31
+
32
+ Args:
33
+ pdf_path: Path to the PDF file.
34
+ max_pages: Maximum number of pages to include.
35
+
36
+ Returns:
37
+ PDF data as bytes containing only the first max_pages pages.
38
+ """
39
+ pdf_path = Path(pdf_path)
40
+ reader = PdfReader(pdf_path)
41
+
42
+ writer = PdfWriter()
43
+ for i in range(min(max_pages, len(reader.pages))):
44
+ writer.add_page(reader.pages[i])
45
+
46
+ output = io.BytesIO()
47
+ writer.write(output)
48
+ return output.getvalue()
49
+
50
+
51
+ def read_pdf_as_base64(
52
+ pdf_path: str | Path,
53
+ max_pages: int | None = None,
54
+ ) -> tuple[str, int, bool]:
55
+ """
56
+ Read a PDF file and encode it as base64, optionally truncating to max pages.
57
+
58
+ Args:
59
+ pdf_path: Path to the PDF file.
60
+ max_pages: Maximum number of pages to include. If None, includes all pages.
61
+
62
+ Returns:
63
+ Tuple of (base64-encoded PDF data, total page count, was_truncated).
64
+
65
+ Raises:
66
+ FileNotFoundError: If the PDF file doesn't exist.
67
+ ValueError: If the file is not a PDF.
68
+ """
69
+ pdf_path = Path(pdf_path)
70
+
71
+ if not pdf_path.exists():
72
+ raise FileNotFoundError(f"PDF file not found: {pdf_path}")
73
+
74
+ if pdf_path.suffix.lower() != ".pdf":
75
+ raise ValueError(f"File is not a PDF: {pdf_path}")
76
+
77
+ total_pages = get_pdf_page_count(pdf_path)
78
+ was_truncated = False
79
+
80
+ if max_pages is not None and total_pages > max_pages:
81
+ pdf_data = truncate_pdf(pdf_path, max_pages)
82
+ was_truncated = True
83
+ else:
84
+ with open(pdf_path, "rb") as f:
85
+ pdf_data = f.read()
86
+
87
+ return base64.standard_b64encode(pdf_data).decode("utf-8"), total_pages, was_truncated
88
+
89
+
90
+ def get_pdf_filename(pdf_path: str | Path) -> str:
91
+ """
92
+ Get the filename from a PDF path.
93
+
94
+ Args:
95
+ pdf_path: Path to the PDF file.
96
+
97
+ Returns:
98
+ The filename.
99
+ """
100
+ return Path(pdf_path).name