fluff-cutter 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fluff_cutter/__init__.py +1 -1
- fluff_cutter/analyzer.py +2 -1
- fluff_cutter/cli.py +122 -16
- fluff_cutter/config.py +24 -0
- fluff_cutter/output.py +1 -3
- fluff_cutter/pdf.py +61 -6
- fluff_cutter/providers/__init__.py +1 -1
- fluff_cutter/providers/anthropic.py +1 -1
- fluff_cutter/providers/openai.py +2 -2
- {fluff_cutter-0.1.0.dist-info → fluff_cutter-0.2.0.dist-info}/METADATA +30 -10
- fluff_cutter-0.2.0.dist-info/RECORD +16 -0
- fluff_cutter-0.1.0.dist-info/RECORD +0 -16
- {fluff_cutter-0.1.0.dist-info → fluff_cutter-0.2.0.dist-info}/WHEEL +0 -0
- {fluff_cutter-0.1.0.dist-info → fluff_cutter-0.2.0.dist-info}/entry_points.txt +0 -0
- {fluff_cutter-0.1.0.dist-info → fluff_cutter-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {fluff_cutter-0.1.0.dist-info → fluff_cutter-0.2.0.dist-info}/top_level.txt +0 -0
fluff_cutter/__init__.py
CHANGED
fluff_cutter/analyzer.py
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
from .providers.base import BaseLLMProvider
|
|
4
4
|
|
|
5
|
-
ANALYSIS_PROMPT = """You are analyzing an academic paper. Your job is to cut through
|
|
5
|
+
ANALYSIS_PROMPT = """You are analyzing an academic paper. Your job is to cut through \
|
|
6
|
+
all the fluff and extract only what matters.
|
|
6
7
|
|
|
7
8
|
Answer these three questions concisely and critically:
|
|
8
9
|
|
fluff_cutter/cli.py
CHANGED
|
@@ -9,16 +9,16 @@ from .analyzer import analyze_paper
|
|
|
9
9
|
from .config import (
|
|
10
10
|
get_api_key,
|
|
11
11
|
get_config_path,
|
|
12
|
+
get_default_model,
|
|
12
13
|
get_default_provider,
|
|
13
14
|
is_configured,
|
|
14
15
|
load_config,
|
|
15
16
|
save_config,
|
|
16
17
|
)
|
|
17
18
|
from .output import print_analysis, save_analysis
|
|
18
|
-
from .pdf import get_pdf_filename, read_pdf_as_base64
|
|
19
|
+
from .pdf import DEFAULT_MAX_PAGES, get_pdf_filename, read_pdf_as_base64
|
|
19
20
|
from .providers import AnthropicProvider, OpenAIProvider
|
|
20
21
|
|
|
21
|
-
|
|
22
22
|
PROVIDERS = {
|
|
23
23
|
"openai": OpenAIProvider,
|
|
24
24
|
"anthropic": AnthropicProvider,
|
|
@@ -32,39 +32,74 @@ def main():
|
|
|
32
32
|
pass
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def _mask_key(key: str) -> str:
|
|
36
|
+
"""Mask an API key for display, showing only first 4 and last 4 chars."""
|
|
37
|
+
if len(key) <= 12:
|
|
38
|
+
return "*" * len(key)
|
|
39
|
+
return f"{key[:4]}...{key[-4:]}"
|
|
40
|
+
|
|
41
|
+
|
|
35
42
|
@main.command()
|
|
36
43
|
def init():
|
|
37
|
-
"""Initialize configuration with API keys."""
|
|
44
|
+
"""Initialize configuration with API keys, provider, and model settings."""
|
|
38
45
|
click.echo("Paper Fluff Cutter Configuration")
|
|
39
46
|
click.echo("=" * 40)
|
|
40
47
|
click.echo()
|
|
41
48
|
|
|
49
|
+
# Load existing configuration (includes env vars)
|
|
50
|
+
existing_config = load_config()
|
|
51
|
+
existing_openai_key = existing_config.get("openai_api_key")
|
|
52
|
+
existing_anthropic_key = existing_config.get("anthropic_api_key")
|
|
53
|
+
|
|
54
|
+
# Show current status
|
|
55
|
+
if existing_openai_key or existing_anthropic_key:
|
|
56
|
+
click.echo("Current configuration:")
|
|
57
|
+
if existing_openai_key:
|
|
58
|
+
click.echo(f" OpenAI API Key: {_mask_key(existing_openai_key)}")
|
|
59
|
+
if existing_anthropic_key:
|
|
60
|
+
click.echo(f" Anthropic API Key: {_mask_key(existing_anthropic_key)}")
|
|
61
|
+
click.echo()
|
|
62
|
+
|
|
42
63
|
config = {}
|
|
43
64
|
|
|
44
65
|
# OpenAI API Key
|
|
45
|
-
click.echo("Enter your API keys (press Enter to skip):")
|
|
66
|
+
click.echo("Enter your API keys (press Enter to keep existing or skip):")
|
|
46
67
|
click.echo()
|
|
47
68
|
|
|
69
|
+
openai_prompt = "OpenAI API Key"
|
|
70
|
+
if existing_openai_key:
|
|
71
|
+
openai_prompt += f" [{_mask_key(existing_openai_key)}]"
|
|
72
|
+
|
|
48
73
|
openai_key = click.prompt(
|
|
49
|
-
|
|
74
|
+
openai_prompt,
|
|
50
75
|
default="",
|
|
51
76
|
hide_input=True,
|
|
52
77
|
show_default=False,
|
|
53
78
|
)
|
|
54
79
|
if openai_key:
|
|
55
80
|
config["openai_api_key"] = openai_key
|
|
56
|
-
click.echo(" ✓ OpenAI API key
|
|
81
|
+
click.echo(" ✓ OpenAI API key updated")
|
|
82
|
+
elif existing_openai_key:
|
|
83
|
+
config["openai_api_key"] = existing_openai_key
|
|
84
|
+
click.echo(" ✓ OpenAI API key kept")
|
|
57
85
|
|
|
58
86
|
# Anthropic API Key
|
|
87
|
+
anthropic_prompt = "Anthropic API Key"
|
|
88
|
+
if existing_anthropic_key:
|
|
89
|
+
anthropic_prompt += f" [{_mask_key(existing_anthropic_key)}]"
|
|
90
|
+
|
|
59
91
|
anthropic_key = click.prompt(
|
|
60
|
-
|
|
92
|
+
anthropic_prompt,
|
|
61
93
|
default="",
|
|
62
94
|
hide_input=True,
|
|
63
95
|
show_default=False,
|
|
64
96
|
)
|
|
65
97
|
if anthropic_key:
|
|
66
98
|
config["anthropic_api_key"] = anthropic_key
|
|
67
|
-
click.echo(" ✓ Anthropic API key
|
|
99
|
+
click.echo(" ✓ Anthropic API key updated")
|
|
100
|
+
elif existing_anthropic_key:
|
|
101
|
+
config["anthropic_api_key"] = existing_anthropic_key
|
|
102
|
+
click.echo(" ✓ Anthropic API key kept")
|
|
68
103
|
|
|
69
104
|
if not config:
|
|
70
105
|
click.echo()
|
|
@@ -82,17 +117,54 @@ def init():
|
|
|
82
117
|
if "anthropic_api_key" in config:
|
|
83
118
|
available_providers.append("anthropic")
|
|
84
119
|
|
|
120
|
+
current_default = existing_config.get("default_provider")
|
|
85
121
|
if len(available_providers) > 1:
|
|
122
|
+
default_choice = current_default if current_default in available_providers else None
|
|
123
|
+
if not default_choice:
|
|
124
|
+
default_choice = "anthropic" if "anthropic" in available_providers else "openai"
|
|
86
125
|
default_provider = click.prompt(
|
|
87
126
|
"Default provider",
|
|
88
127
|
type=click.Choice(available_providers),
|
|
89
|
-
default=
|
|
128
|
+
default=default_choice,
|
|
90
129
|
)
|
|
91
130
|
else:
|
|
92
131
|
default_provider = available_providers[0]
|
|
93
132
|
|
|
94
133
|
config["default_provider"] = default_provider
|
|
95
134
|
|
|
135
|
+
# Model configuration
|
|
136
|
+
click.echo()
|
|
137
|
+
click.echo("Configure default models (press Enter for provider defaults):")
|
|
138
|
+
click.echo()
|
|
139
|
+
|
|
140
|
+
if "openai_api_key" in config:
|
|
141
|
+
openai_default = OpenAIProvider(api_key="").default_model
|
|
142
|
+
current_openai_model = existing_config.get("openai_model", openai_default)
|
|
143
|
+
openai_model = click.prompt(
|
|
144
|
+
"OpenAI model",
|
|
145
|
+
default=current_openai_model,
|
|
146
|
+
show_default=True,
|
|
147
|
+
)
|
|
148
|
+
if openai_model != openai_default:
|
|
149
|
+
config["openai_model"] = openai_model
|
|
150
|
+
click.echo(f" ✓ OpenAI model set to: {openai_model}")
|
|
151
|
+
else:
|
|
152
|
+
click.echo(f" Using default: {openai_default}")
|
|
153
|
+
|
|
154
|
+
if "anthropic_api_key" in config:
|
|
155
|
+
anthropic_default = AnthropicProvider(api_key="").default_model
|
|
156
|
+
current_anthropic_model = existing_config.get("anthropic_model", anthropic_default)
|
|
157
|
+
anthropic_model = click.prompt(
|
|
158
|
+
"Anthropic model",
|
|
159
|
+
default=current_anthropic_model,
|
|
160
|
+
show_default=True,
|
|
161
|
+
)
|
|
162
|
+
if anthropic_model != anthropic_default:
|
|
163
|
+
config["anthropic_model"] = anthropic_model
|
|
164
|
+
click.echo(f" ✓ Anthropic model set to: {anthropic_model}")
|
|
165
|
+
else:
|
|
166
|
+
click.echo(f" Using default: {anthropic_default}")
|
|
167
|
+
|
|
96
168
|
# Save configuration
|
|
97
169
|
save_config(config)
|
|
98
170
|
|
|
@@ -123,7 +195,19 @@ def init():
|
|
|
123
195
|
type=click.Path(),
|
|
124
196
|
help="Save output to file instead of printing",
|
|
125
197
|
)
|
|
126
|
-
|
|
198
|
+
@click.option(
|
|
199
|
+
"--max-pages",
|
|
200
|
+
type=int,
|
|
201
|
+
default=None,
|
|
202
|
+
help=f"Maximum pages to analyze (default: auto-truncate at {DEFAULT_MAX_PAGES} if needed)",
|
|
203
|
+
)
|
|
204
|
+
def analyze(
|
|
205
|
+
paper_path: str,
|
|
206
|
+
provider: str | None,
|
|
207
|
+
model: str | None,
|
|
208
|
+
output: str | None,
|
|
209
|
+
max_pages: int | None,
|
|
210
|
+
):
|
|
127
211
|
"""Analyze an academic paper and extract its core value."""
|
|
128
212
|
# Check configuration
|
|
129
213
|
if not is_configured():
|
|
@@ -145,9 +229,12 @@ def analyze(paper_path: str, provider: str | None, model: str | None, output: st
|
|
|
145
229
|
click.echo(f"Run 'fluff-cutter init' or set {provider_name.upper()}_API_KEY.", err=True)
|
|
146
230
|
sys.exit(1)
|
|
147
231
|
|
|
232
|
+
# Get model: CLI option > config file > provider default
|
|
233
|
+
model_to_use = model or get_default_model(provider_name, config)
|
|
234
|
+
|
|
148
235
|
# Create provider instance
|
|
149
236
|
provider_class = PROVIDERS[provider_name]
|
|
150
|
-
llm_provider = provider_class(api_key=api_key, model=
|
|
237
|
+
llm_provider = provider_class(api_key=api_key, model=model_to_use)
|
|
151
238
|
|
|
152
239
|
click.echo(f"Analyzing paper: {paper_path}")
|
|
153
240
|
click.echo(f"Using: {llm_provider.get_model_info()}")
|
|
@@ -156,20 +243,39 @@ def analyze(paper_path: str, provider: str | None, model: str | None, output: st
|
|
|
156
243
|
# Read PDF
|
|
157
244
|
click.echo("Reading PDF...")
|
|
158
245
|
try:
|
|
159
|
-
pdf_base64 = read_pdf_as_base64(paper_path)
|
|
246
|
+
pdf_base64, total_pages, was_truncated = read_pdf_as_base64(paper_path, max_pages)
|
|
160
247
|
filename = get_pdf_filename(paper_path)
|
|
161
|
-
|
|
248
|
+
if was_truncated:
|
|
249
|
+
click.echo(f" PDF truncated: analyzing first {max_pages} of {total_pages} pages")
|
|
250
|
+
else:
|
|
251
|
+
click.echo(f" PDF loaded successfully ({total_pages} pages)")
|
|
162
252
|
except Exception as e:
|
|
163
253
|
click.echo(f"Error reading PDF: {e}", err=True)
|
|
164
254
|
sys.exit(1)
|
|
165
255
|
|
|
166
|
-
# Analyze the paper
|
|
256
|
+
# Analyze the paper (with auto-retry on token limit)
|
|
167
257
|
click.echo("Analyzing paper (this may take a minute)...")
|
|
168
258
|
try:
|
|
169
259
|
result = analyze_paper(llm_provider, pdf_base64, filename)
|
|
170
260
|
except Exception as e:
|
|
171
|
-
|
|
172
|
-
|
|
261
|
+
error_msg = str(e)
|
|
262
|
+
# Check if it's a token limit error and we haven't already truncated
|
|
263
|
+
if "too long" in error_msg.lower() and "token" in error_msg.lower() and not was_truncated:
|
|
264
|
+
click.echo()
|
|
265
|
+
click.echo(
|
|
266
|
+
f" Paper exceeds token limit. Auto-truncating to {DEFAULT_MAX_PAGES} pages...",
|
|
267
|
+
err=True,
|
|
268
|
+
)
|
|
269
|
+
try:
|
|
270
|
+
pdf_base64, total_pages, _ = read_pdf_as_base64(paper_path, DEFAULT_MAX_PAGES)
|
|
271
|
+
click.echo(f" Retrying with first {DEFAULT_MAX_PAGES} of {total_pages} pages...")
|
|
272
|
+
result = analyze_paper(llm_provider, pdf_base64, filename)
|
|
273
|
+
except Exception as retry_error:
|
|
274
|
+
click.echo(f"Error during analysis: {retry_error}", err=True)
|
|
275
|
+
sys.exit(1)
|
|
276
|
+
else:
|
|
277
|
+
click.echo(f"Error during analysis: {e}", err=True)
|
|
278
|
+
sys.exit(1)
|
|
173
279
|
|
|
174
280
|
click.echo()
|
|
175
281
|
|
fluff_cutter/config.py
CHANGED
|
@@ -70,6 +70,12 @@ def load_config() -> dict[str, Any]:
|
|
|
70
70
|
if os.environ.get("FLUFF_CUTTER_PROVIDER"):
|
|
71
71
|
config["default_provider"] = os.environ["FLUFF_CUTTER_PROVIDER"]
|
|
72
72
|
|
|
73
|
+
if os.environ.get("FLUFF_CUTTER_OPENAI_MODEL"):
|
|
74
|
+
config["openai_model"] = os.environ["FLUFF_CUTTER_OPENAI_MODEL"]
|
|
75
|
+
|
|
76
|
+
if os.environ.get("FLUFF_CUTTER_ANTHROPIC_MODEL"):
|
|
77
|
+
config["anthropic_model"] = os.environ["FLUFF_CUTTER_ANTHROPIC_MODEL"]
|
|
78
|
+
|
|
73
79
|
return config
|
|
74
80
|
|
|
75
81
|
|
|
@@ -107,6 +113,24 @@ def get_default_provider(config: dict[str, Any] | None = None) -> str:
|
|
|
107
113
|
return config.get("default_provider", DEFAULT_PROVIDER)
|
|
108
114
|
|
|
109
115
|
|
|
116
|
+
def get_default_model(provider: str, config: dict[str, Any] | None = None) -> str | None:
|
|
117
|
+
"""
|
|
118
|
+
Get the configured default model for a provider.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
provider: The provider name ('openai' or 'anthropic').
|
|
122
|
+
config: Optional pre-loaded config. If None, loads config.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
The configured model name, or None to use provider default.
|
|
126
|
+
"""
|
|
127
|
+
if config is None:
|
|
128
|
+
config = load_config()
|
|
129
|
+
|
|
130
|
+
model_key = f"{provider}_model"
|
|
131
|
+
return config.get(model_key)
|
|
132
|
+
|
|
133
|
+
|
|
110
134
|
def is_configured() -> bool:
|
|
111
135
|
"""
|
|
112
136
|
Check if at least one provider is configured.
|
fluff_cutter/output.py
CHANGED
|
@@ -39,9 +39,7 @@ def print_analysis(title: str, analysis: str, model_info: str) -> None:
|
|
|
39
39
|
print(format_analysis(title, analysis, model_info))
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
def save_analysis(
|
|
43
|
-
title: str, analysis: str, model_info: str, output_path: str
|
|
44
|
-
) -> None:
|
|
42
|
+
def save_analysis(title: str, analysis: str, model_info: str, output_path: str) -> None:
|
|
45
43
|
"""
|
|
46
44
|
Save the formatted analysis to a file.
|
|
47
45
|
|
fluff_cutter/pdf.py
CHANGED
|
@@ -1,18 +1,66 @@
|
|
|
1
1
|
"""PDF handling for LLM analysis."""
|
|
2
2
|
|
|
3
3
|
import base64
|
|
4
|
+
import io
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
7
|
+
from pypdf import PdfReader, PdfWriter
|
|
6
8
|
|
|
7
|
-
|
|
9
|
+
# Default max pages when auto-truncating (roughly ~150K tokens for most papers)
|
|
10
|
+
DEFAULT_MAX_PAGES = 50
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_pdf_page_count(pdf_path: str | Path) -> int:
|
|
8
14
|
"""
|
|
9
|
-
|
|
15
|
+
Get the number of pages in a PDF file.
|
|
10
16
|
|
|
11
17
|
Args:
|
|
12
18
|
pdf_path: Path to the PDF file.
|
|
13
19
|
|
|
14
20
|
Returns:
|
|
15
|
-
|
|
21
|
+
Number of pages in the PDF.
|
|
22
|
+
"""
|
|
23
|
+
pdf_path = Path(pdf_path)
|
|
24
|
+
reader = PdfReader(pdf_path)
|
|
25
|
+
return len(reader.pages)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def truncate_pdf(pdf_path: str | Path, max_pages: int) -> bytes:
|
|
29
|
+
"""
|
|
30
|
+
Read a PDF and return only the first N pages as bytes.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
pdf_path: Path to the PDF file.
|
|
34
|
+
max_pages: Maximum number of pages to include.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
PDF data as bytes containing only the first max_pages pages.
|
|
38
|
+
"""
|
|
39
|
+
pdf_path = Path(pdf_path)
|
|
40
|
+
reader = PdfReader(pdf_path)
|
|
41
|
+
|
|
42
|
+
writer = PdfWriter()
|
|
43
|
+
for i in range(min(max_pages, len(reader.pages))):
|
|
44
|
+
writer.add_page(reader.pages[i])
|
|
45
|
+
|
|
46
|
+
output = io.BytesIO()
|
|
47
|
+
writer.write(output)
|
|
48
|
+
return output.getvalue()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def read_pdf_as_base64(
|
|
52
|
+
pdf_path: str | Path,
|
|
53
|
+
max_pages: int | None = None,
|
|
54
|
+
) -> tuple[str, int, bool]:
|
|
55
|
+
"""
|
|
56
|
+
Read a PDF file and encode it as base64, optionally truncating to max pages.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
pdf_path: Path to the PDF file.
|
|
60
|
+
max_pages: Maximum number of pages to include. If None, includes all pages.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Tuple of (base64-encoded PDF data, total page count, was_truncated).
|
|
16
64
|
|
|
17
65
|
Raises:
|
|
18
66
|
FileNotFoundError: If the PDF file doesn't exist.
|
|
@@ -26,10 +74,17 @@ def read_pdf_as_base64(pdf_path: str | Path) -> str:
|
|
|
26
74
|
if pdf_path.suffix.lower() != ".pdf":
|
|
27
75
|
raise ValueError(f"File is not a PDF: {pdf_path}")
|
|
28
76
|
|
|
29
|
-
|
|
30
|
-
|
|
77
|
+
total_pages = get_pdf_page_count(pdf_path)
|
|
78
|
+
was_truncated = False
|
|
79
|
+
|
|
80
|
+
if max_pages is not None and total_pages > max_pages:
|
|
81
|
+
pdf_data = truncate_pdf(pdf_path, max_pages)
|
|
82
|
+
was_truncated = True
|
|
83
|
+
else:
|
|
84
|
+
with open(pdf_path, "rb") as f:
|
|
85
|
+
pdf_data = f.read()
|
|
31
86
|
|
|
32
|
-
return base64.standard_b64encode(pdf_data).decode("utf-8")
|
|
87
|
+
return base64.standard_b64encode(pdf_data).decode("utf-8"), total_pages, was_truncated
|
|
33
88
|
|
|
34
89
|
|
|
35
90
|
def get_pdf_filename(pdf_path: str | Path) -> str:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""LLM provider implementations."""
|
|
2
2
|
|
|
3
|
+
from .anthropic import AnthropicProvider
|
|
3
4
|
from .base import BaseLLMProvider
|
|
4
5
|
from .openai import OpenAIProvider
|
|
5
|
-
from .anthropic import AnthropicProvider
|
|
6
6
|
|
|
7
7
|
__all__ = ["BaseLLMProvider", "OpenAIProvider", "AnthropicProvider"]
|
fluff_cutter/providers/openai.py
CHANGED
|
@@ -6,11 +6,11 @@ from .base import BaseLLMProvider
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class OpenAIProvider(BaseLLMProvider):
|
|
9
|
-
"""OpenAI GPT-
|
|
9
|
+
"""OpenAI GPT-5.2 provider with native PDF support."""
|
|
10
10
|
|
|
11
11
|
@property
|
|
12
12
|
def default_model(self) -> str:
|
|
13
|
-
return "gpt-
|
|
13
|
+
return "gpt-5.2"
|
|
14
14
|
|
|
15
15
|
@property
|
|
16
16
|
def provider_name(self) -> str:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fluff-cutter
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A CLI tool to analyze academic papers and extract their core value
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -9,6 +9,7 @@ Requires-Dist: click>=8.0
|
|
|
9
9
|
Requires-Dist: openai>=1.0
|
|
10
10
|
Requires-Dist: anthropic>=0.18
|
|
11
11
|
Requires-Dist: python-dotenv>=1.0
|
|
12
|
+
Requires-Dist: pypdf>=4.0
|
|
12
13
|
Provides-Extra: dev
|
|
13
14
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
15
|
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
@@ -16,6 +17,8 @@ Dynamic: license-file
|
|
|
16
17
|
|
|
17
18
|
# Paper Fluff Cutter
|
|
18
19
|
|
|
20
|
+
[](https://pypi.org/project/fluff-cutter/)
|
|
21
|
+
|
|
19
22
|
A CLI tool that cuts through academic paper fluff to extract what actually matters.
|
|
20
23
|
|
|
21
24
|
Most research has close to zero value. This tool uses multimodal LLMs to analyze papers and answer the three questions every paper should be able to answer:
|
|
@@ -26,13 +29,17 @@ Most research has close to zero value. This tool uses multimodal LLMs to analyze
|
|
|
26
29
|
|
|
27
30
|
## Installation
|
|
28
31
|
|
|
29
|
-
|
|
32
|
+
```bash
|
|
33
|
+
pip install fluff-cutter
|
|
34
|
+
```
|
|
30
35
|
|
|
31
|
-
|
|
36
|
+
Requires Python 3.10+.
|
|
32
37
|
|
|
33
|
-
###
|
|
38
|
+
### Development install
|
|
34
39
|
|
|
35
40
|
```bash
|
|
41
|
+
git clone https://github.com/weijianzhg/paper-fluff-cutter.git
|
|
42
|
+
cd paper-fluff-cutter
|
|
36
43
|
pip install -e .
|
|
37
44
|
```
|
|
38
45
|
|
|
@@ -44,7 +51,7 @@ pip install -e .
|
|
|
44
51
|
fluff-cutter init
|
|
45
52
|
```
|
|
46
53
|
|
|
47
|
-
This will prompt you for your API keys and save them to `~/.config/fluff-cutter/config.json`.
|
|
54
|
+
This will prompt you for your API keys, default provider, and model preferences, then save them to `~/.config/fluff-cutter/config.json`.
|
|
48
55
|
|
|
49
56
|
### Option 2: Environment variables
|
|
50
57
|
|
|
@@ -52,6 +59,8 @@ This will prompt you for your API keys and save them to `~/.config/fluff-cutter/
|
|
|
52
59
|
export OPENAI_API_KEY=sk-your-key-here
|
|
53
60
|
export ANTHROPIC_API_KEY=sk-ant-your-key-here
|
|
54
61
|
export FLUFF_CUTTER_PROVIDER=anthropic # optional, default provider
|
|
62
|
+
export FLUFF_CUTTER_OPENAI_MODEL=gpt-5.2 # optional, override default model
|
|
63
|
+
export FLUFF_CUTTER_ANTHROPIC_MODEL=claude-sonnet-4-5 # optional, override default model
|
|
55
64
|
```
|
|
56
65
|
|
|
57
66
|
## Usage
|
|
@@ -72,8 +81,8 @@ fluff-cutter analyze paper.pdf --provider anthropic
|
|
|
72
81
|
### Specify model
|
|
73
82
|
|
|
74
83
|
```bash
|
|
75
|
-
fluff-cutter analyze paper.pdf --provider openai --model gpt-
|
|
76
|
-
fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-
|
|
84
|
+
fluff-cutter analyze paper.pdf --provider openai --model gpt-5.2
|
|
85
|
+
fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-5
|
|
77
86
|
```
|
|
78
87
|
|
|
79
88
|
### Save output to file
|
|
@@ -82,12 +91,22 @@ fluff-cutter analyze paper.pdf --provider anthropic --model claude-sonnet-4-2025
|
|
|
82
91
|
fluff-cutter analyze paper.pdf --output analysis.md
|
|
83
92
|
```
|
|
84
93
|
|
|
94
|
+
### Long papers
|
|
95
|
+
|
|
96
|
+
For very long papers that exceed the model's token limit, you can limit the number of pages:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
fluff-cutter analyze paper.pdf --max-pages 30
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
If you don't specify `--max-pages` and the paper exceeds the token limit, it will automatically truncate to the first 50 pages and retry.
|
|
103
|
+
|
|
85
104
|
## Supported Providers
|
|
86
105
|
|
|
87
106
|
| Provider | Default Model | Environment Variable |
|
|
88
107
|
|----------|---------------|---------------------|
|
|
89
|
-
| OpenAI | gpt-
|
|
90
|
-
| Anthropic | claude-sonnet-4-
|
|
108
|
+
| OpenAI | gpt-5.2 | `OPENAI_API_KEY` |
|
|
109
|
+
| Anthropic | claude-sonnet-4-5 | `ANTHROPIC_API_KEY` |
|
|
91
110
|
|
|
92
111
|
Both providers now support native PDF input - no external dependencies like poppler needed.
|
|
93
112
|
|
|
@@ -96,8 +115,9 @@ Both providers now support native PDF input - no external dependencies like popp
|
|
|
96
115
|
Configuration is loaded with the following precedence (highest to lowest):
|
|
97
116
|
|
|
98
117
|
1. Command-line arguments (`--provider`, `--model`)
|
|
99
|
-
2. Environment variables
|
|
118
|
+
2. Environment variables (`FLUFF_CUTTER_PROVIDER`, `FLUFF_CUTTER_OPENAI_MODEL`, `FLUFF_CUTTER_ANTHROPIC_MODEL`)
|
|
100
119
|
3. Config file (`~/.config/fluff-cutter/config.json`)
|
|
120
|
+
4. Provider defaults (gpt-5.2 for OpenAI, claude-sonnet-4-5 for Anthropic)
|
|
101
121
|
|
|
102
122
|
## License
|
|
103
123
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
fluff_cutter/__init__.py,sha256=GXWAX_OIMxSKLlZGrtcldQfAh6Y7wnqGHqo9iQyk_K4,95
|
|
2
|
+
fluff_cutter/analyzer.py,sha256=EXyI6wffbFmIROc9jvR4wuvKZkVMzhbd3zbQ8e77FkQ,2008
|
|
3
|
+
fluff_cutter/cli.py,sha256=6OQ8JVj17i-WwNdj4_ame3RkiA9J4XpLtxmbIOeVRoo,9785
|
|
4
|
+
fluff_cutter/config.py,sha256=JZ_awXKYc3u8VjzwUm5uH9rtSHoHR2SG01eBo7AtzK4,3687
|
|
5
|
+
fluff_cutter/output.py,sha256=4lDOFdIXgJAx3at9giL4508nj_hEa9rJPaAwaWNOljg,1403
|
|
6
|
+
fluff_cutter/pdf.py,sha256=SdHC6jv5s5lfvpYal3ccWyk58_LdlsGL2xfrE1c5RBs,2533
|
|
7
|
+
fluff_cutter/providers/__init__.py,sha256=-WkCbWfdQPIJlOsB-o9C8aW1VM9sttM0gKFljHaJ5rI,217
|
|
8
|
+
fluff_cutter/providers/anthropic.py,sha256=QsBKbBpIE859Q0WId4-i0qDstEloOyiSPwgQxCnVw1I,1784
|
|
9
|
+
fluff_cutter/providers/base.py,sha256=6KkfAgh1jSy3IeXiI6mp0oIX4aQMs9188JWWf9g6WXk,1385
|
|
10
|
+
fluff_cutter/providers/openai.py,sha256=nCbvxJqDqPJisojw_5mQYBgvuVq5PCBFR49szWOK3f4,1561
|
|
11
|
+
fluff_cutter-0.2.0.dist-info/licenses/LICENSE,sha256=Web8HWLb3-BT76oD6gp0yLeRc-6trinrOCE03-NCsWM,1070
|
|
12
|
+
fluff_cutter-0.2.0.dist-info/METADATA,sha256=hbraGqqRhtH2-jup4t_LhPlHU6yNF0a1G6EEj_KNywc,3440
|
|
13
|
+
fluff_cutter-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
14
|
+
fluff_cutter-0.2.0.dist-info/entry_points.txt,sha256=XbYJJc_MN1PgHB47-NWk76BnQ5l0Ba0B5jb9vwAEU5Y,55
|
|
15
|
+
fluff_cutter-0.2.0.dist-info/top_level.txt,sha256=Hb1MmR3LbLIc9PXSm60Jn4a-fht1wwvCT5kxhnyB6VI,13
|
|
16
|
+
fluff_cutter-0.2.0.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
fluff_cutter/__init__.py,sha256=m54r3DTMukXFMTUNb6dqISi4-rYP-3cAwNlxO85B3WY,95
|
|
2
|
-
fluff_cutter/analyzer.py,sha256=Er_ZFva5x12evkg4jiOLhYllGFjcTlch7OSHz0b-SAQ,2006
|
|
3
|
-
fluff_cutter/cli.py,sha256=Txg4sAvbmzn1-HhViMeXWfOKJnqyuBItEfbHK--8Ytw,5354
|
|
4
|
-
fluff_cutter/config.py,sha256=Ly5ZKz8JDWNxUPJx8m9jOEcWoxF24LX64Wr0nXNkfU4,2918
|
|
5
|
-
fluff_cutter/output.py,sha256=vQblPNEdPs8jaeZT0RDw376FGPD9cqH39KMZlP5iDlg,1409
|
|
6
|
-
fluff_cutter/pdf.py,sha256=9HYEMRbYwaJeZPFPvvUEkNbSxLLt0uwz1izBK-UZdkg,992
|
|
7
|
-
fluff_cutter/providers/__init__.py,sha256=GHUJmPYf-f46LdiBN9RtCY25bJMIdp7Bp1PtIwbK_cw,217
|
|
8
|
-
fluff_cutter/providers/anthropic.py,sha256=rtE8Io3QLu-svw8tD72WoTpfvY2_RAiZaHTomrUA4bA,1791
|
|
9
|
-
fluff_cutter/providers/base.py,sha256=6KkfAgh1jSy3IeXiI6mp0oIX4aQMs9188JWWf9g6WXk,1385
|
|
10
|
-
fluff_cutter/providers/openai.py,sha256=ObYGNBRMkvnlmK6jbCTv8nN6ReVrCTmTlV1oneq-Ii4,1559
|
|
11
|
-
fluff_cutter-0.1.0.dist-info/licenses/LICENSE,sha256=Web8HWLb3-BT76oD6gp0yLeRc-6trinrOCE03-NCsWM,1070
|
|
12
|
-
fluff_cutter-0.1.0.dist-info/METADATA,sha256=jaEyBW2AKz9DMDV5h-IjnGClrHrosff0-PT8H1-v2fk,2528
|
|
13
|
-
fluff_cutter-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
14
|
-
fluff_cutter-0.1.0.dist-info/entry_points.txt,sha256=XbYJJc_MN1PgHB47-NWk76BnQ5l0Ba0B5jb9vwAEU5Y,55
|
|
15
|
-
fluff_cutter-0.1.0.dist-info/top_level.txt,sha256=Hb1MmR3LbLIc9PXSm60Jn4a-fht1wwvCT5kxhnyB6VI,13
|
|
16
|
-
fluff_cutter-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|