bibfixer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bibfixer-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Takashi Ishida
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ include README.md LICENSE
2
+ recursive-include bibfixer/prompts *.md
3
+
@@ -0,0 +1,131 @@
1
+ Metadata-Version: 2.4
2
+ Name: bibfixer
3
+ Version: 0.1.0
4
+ Summary: Fixes and standardizes BibTeX using LLM + web search
5
+ Author: Takashi Ishida
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Takashi Ishida
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Requires-Python: >=3.9
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: openai<2,>=1.107.0
32
+ Requires-Dist: bibtexparser<2,>=1.4.1
33
+ Dynamic: license-file
34
+
35
+ <div align="center">
36
+ <img src="logo.png#gh-light-mode-only" alt="" width="450"><img src="logo.png#gh-dark-mode-only" alt="" width="450">
37
+
38
+ [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
39
+ ![PRs Welcome](https://img.shields.io/badge/PRs-welcome-blue.svg)
40
+ ![Model](https://img.shields.io/badge/Model-GPT--5--mini-purple?logo=openai&logoColor=white)
41
+ ![Last Commit](https://img.shields.io/github/last-commit/takashiishida/bibfixer)
42
+ </div>
43
+
44
+ A Python tool that fixes and standardizes your BibTeX. It not only completes entries with accurate metadata via LLM + web search capabilities, but also enforces a consistent style based on your preferences (e.g., venue naming, title casing, author format, page ranges). This removes the tedious manual work of hunting down sources and cleaning messy entries (like those copied from Google Scholar), producing a clean, uniform bib file. A consistent style improves readability and leaves a stronger impression on readers and reviewers.
45
+
46
+ ## Installation
47
+
48
+ 1. Install (from PyPI):
49
+ ```bash
50
+ pip install bibfixer
51
+ ```
52
+
53
+ 2. Set up your OpenAI API key:
54
+ ```bash
55
+ export OPENAI_API_KEY='your-api-key-here'
56
+ ```
57
+
58
+ ## Usage
59
+
60
+ Basic usage (input is required via `-i/--input`):
61
+ ```bash
62
+ bibfixer -i sample_input.bib
63
+ ```
64
+
65
+ With output file:
66
+ ```bash
67
+ bibfixer -i sample_input.bib -o corrected.bib
68
+ ```
69
+
70
+ With additional formatting preferences (`-p`):
71
+ ```bash
72
+ bibfixer -i sample_input.bib -p "Use NeurIPS instead of NIPS"
73
+ ```
74
+
75
+ Use a custom prompt file (defaults to bundled `prompts/default.md`):
76
+ ```bash
77
+ bibfixer -i sample_input.bib --prompt-file prompts/default.md
78
+ ```
79
+
80
+ The complete revision instructions are in `prompts/default.md`. You can edit this file to match your style or point to another file using `--prompt-file`.
81
+
82
+ ## Examples
83
+
84
+ Example (1) Original bib entry. Authors are missing and "ai" is not capitalized.
85
+ ```bib
86
+ @article{bai2022constitutional,
87
+ author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and others},
88
+ journal = {arXiv preprint arXiv:2212.08073},
89
+ title = {Constitutional ai: Harmlessness from ai feedback},
90
+ year = {2022}
91
+ }
92
+ ```
93
+
94
+ Missing authors are added and title is capitalized properly:
95
+ ```bib
96
+ @article{bai2022constitutional,
97
+ author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and Chen, Carol and Olsson, Catherine and Olah, Christopher and Hernandez, Danny and Drain, Dawn and Ganguli, Deep and Li, Dustin and Tran-Johnson, Eli and Perez, Ethan and Kerr, Jamie and Mueller, Jared and Ladish, Jeffrey and Landau, Joshua and Ndousse, Kamal and Lukosuite, Kamile and Lovitt, Liane and Sellitto, Michael and Elhage, Nelson and Schiefer, Nicholas and Mercado, Noemi and DasSarma, Nova and Lasenby, Robert and Larson, Robin and Ringer, Sam and Johnston, Scott and Kravec, Shauna and El Showk, Sheer and Fort, Stanislav and Lanham, Tamera and Telleen-Lawton, Timothy and Conerly, Tom and Henighan, Tom and Hume, Tristan and Bowman, Samuel R. and Hatfield-Dodds, Zac and Mann, Ben and Amodei, Dario and Joseph, Nicholas and McCandlish, Sam and Brown, Tom and Kaplan, Jared},
98
+ title = {{Constitutional AI: Harmlessness from AI Feedback}},
99
+ journal = {arXiv preprint arXiv:2212.08073},
100
+ year = {2022}
101
+ }
102
+ ```
103
+
104
+ Example (2) Original bib entry. This shows the arXiv version but the paper was published in ICML. "llm" needs to be capitalized.
105
+ ```bib
106
+ @article{khan2024debating,
107
+ author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R and Rockt{\"a}schel, Tim and Perez, Ethan},
108
+ journal = {arXiv preprint arXiv:2402.06782},
109
+ title = {Debating with more persuasive llms leads to more truthful answers},
110
+ year = {2024}
111
+ }
112
+ ```
113
+
114
+ arXiv is replaced with the conference information and appropriate title:
115
+ ```bib
116
+ @inproceedings{khan2024debating,
117
+ author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R. and Rockt{\"a}schel, Tim and Perez, Ethan},
118
+ title = {{Debating with More Persuasive LLMs Leads to More Truthful Answers}},
119
+ booktitle = {Proceedings of the 41st International Conference on Machine Learning},
120
+ year = {2024},
121
+ volume = {235},
122
+ pages = {23662--23733}
123
+ }
124
+ ```
125
+
126
+ > [!WARNING]
127
+ > This tool uses LLM + web search and may occasionally produce incomplete or inaccurate metadata or formatting. Always review the final `.bib` before submission. To quickly compare input and output, you can run:
128
+ >
129
+ > ```bash
130
+ > diff -y --suppress-common-lines input.bib output.bib | less -R
131
+ > ```
@@ -0,0 +1,97 @@
1
+ <div align="center">
2
+ <img src="logo.png#gh-light-mode-only" alt="" width="450"><img src="logo.png#gh-dark-mode-only" alt="" width="450">
3
+
4
+ [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
5
+ ![PRs Welcome](https://img.shields.io/badge/PRs-welcome-blue.svg)
6
+ ![Model](https://img.shields.io/badge/Model-GPT--5--mini-purple?logo=openai&logoColor=white)
7
+ ![Last Commit](https://img.shields.io/github/last-commit/takashiishida/bibfixer)
8
+ </div>
9
+
10
+ A Python tool that fixes and standardizes your BibTeX. It not only completes entries with accurate metadata via LLM + web search capabilities, but also enforces a consistent style based on your preferences (e.g., venue naming, title casing, author format, page ranges). This removes the tedious manual work of hunting down sources and cleaning messy entries (like those copied from Google Scholar), producing a clean, uniform bib file. A consistent style improves readability and leaves a stronger impression on readers and reviewers.
11
+
12
+ ## Installation
13
+
14
+ 1. Install (from PyPI):
15
+ ```bash
16
+ pip install bibfixer
17
+ ```
18
+
19
+ 2. Set up your OpenAI API key:
20
+ ```bash
21
+ export OPENAI_API_KEY='your-api-key-here'
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ Basic usage (input is required via `-i/--input`):
27
+ ```bash
28
+ bibfixer -i sample_input.bib
29
+ ```
30
+
31
+ With output file:
32
+ ```bash
33
+ bibfixer -i sample_input.bib -o corrected.bib
34
+ ```
35
+
36
+ With additional formatting preferences (`-p`):
37
+ ```bash
38
+ bibfixer -i sample_input.bib -p "Use NeurIPS instead of NIPS"
39
+ ```
40
+
41
+ Use a custom prompt file (defaults to bundled `prompts/default.md`):
42
+ ```bash
43
+ bibfixer -i sample_input.bib --prompt-file prompts/default.md
44
+ ```
45
+
46
+ The complete revision instructions are in `prompts/default.md`. You can edit this file to match your style or point to another file using `--prompt-file`.
47
+
48
+ ## Examples
49
+
50
+ Example (1) Original bib entry. Authors are missing and "ai" is not capitalized.
51
+ ```bib
52
+ @article{bai2022constitutional,
53
+ author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and others},
54
+ journal = {arXiv preprint arXiv:2212.08073},
55
+ title = {Constitutional ai: Harmlessness from ai feedback},
56
+ year = {2022}
57
+ }
58
+ ```
59
+
60
+ Missing authors are added and title is capitalized properly:
61
+ ```bib
62
+ @article{bai2022constitutional,
63
+ author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and Chen, Carol and Olsson, Catherine and Olah, Christopher and Hernandez, Danny and Drain, Dawn and Ganguli, Deep and Li, Dustin and Tran-Johnson, Eli and Perez, Ethan and Kerr, Jamie and Mueller, Jared and Ladish, Jeffrey and Landau, Joshua and Ndousse, Kamal and Lukosuite, Kamile and Lovitt, Liane and Sellitto, Michael and Elhage, Nelson and Schiefer, Nicholas and Mercado, Noemi and DasSarma, Nova and Lasenby, Robert and Larson, Robin and Ringer, Sam and Johnston, Scott and Kravec, Shauna and El Showk, Sheer and Fort, Stanislav and Lanham, Tamera and Telleen-Lawton, Timothy and Conerly, Tom and Henighan, Tom and Hume, Tristan and Bowman, Samuel R. and Hatfield-Dodds, Zac and Mann, Ben and Amodei, Dario and Joseph, Nicholas and McCandlish, Sam and Brown, Tom and Kaplan, Jared},
64
+ title = {{Constitutional AI: Harmlessness from AI Feedback}},
65
+ journal = {arXiv preprint arXiv:2212.08073},
66
+ year = {2022}
67
+ }
68
+ ```
69
+
70
+ Example (2) Original bib entry. This shows the arXiv version but the paper was published in ICML. "llm" needs to be capitalized.
71
+ ```bib
72
+ @article{khan2024debating,
73
+ author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R and Rockt{\"a}schel, Tim and Perez, Ethan},
74
+ journal = {arXiv preprint arXiv:2402.06782},
75
+ title = {Debating with more persuasive llms leads to more truthful answers},
76
+ year = {2024}
77
+ }
78
+ ```
79
+
80
+ arXiv is replaced with the conference information and appropriate title:
81
+ ```bib
82
+ @inproceedings{khan2024debating,
83
+ author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R. and Rockt{\"a}schel, Tim and Perez, Ethan},
84
+ title = {{Debating with More Persuasive LLMs Leads to More Truthful Answers}},
85
+ booktitle = {Proceedings of the 41st International Conference on Machine Learning},
86
+ year = {2024},
87
+ volume = {235},
88
+ pages = {23662--23733}
89
+ }
90
+ ```
91
+
92
+ > [!WARNING]
93
+ > This tool uses LLM + web search and may occasionally produce incomplete or inaccurate metadata or formatting. Always review the final `.bib` before submission. To quickly compare input and output, you can run:
94
+ >
95
+ > ```bash
96
+ > diff -y --suppress-common-lines input.bib output.bib | less -R
97
+ > ```
@@ -0,0 +1,4 @@
1
+ from .agent import BibFixAgent
2
+
3
+ __all__ = ["BibFixAgent"]
4
+
@@ -0,0 +1,167 @@
1
+ import os
2
+ import sys
3
+ from typing import Optional, Dict, Any
4
+ import json
5
+ import bibtexparser
6
+ from bibtexparser.bwriter import BibTexWriter
7
+ from bibtexparser.bibdatabase import BibDatabase
8
+ from openai import OpenAI
9
+ from importlib import resources
10
+
11
+
12
+ class BibFixAgent:
13
+ def __init__(self, api_key: Optional[str] = None, prompt_file: Optional[str] = None):
14
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
15
+ if not self.api_key:
16
+ raise ValueError(
17
+ "OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as argument."
18
+ )
19
+
20
+ self.client = OpenAI(api_key=self.api_key)
21
+ self.model = "gpt-5-mini-2025-08-07"
22
+ self.prompt_file_path = prompt_file
23
+
24
+ def _load_instructions_from_file(self) -> Optional[str]:
25
+ if self.prompt_file_path:
26
+ try:
27
+ if os.path.exists(self.prompt_file_path):
28
+ with open(self.prompt_file_path, "r", encoding="utf-8") as f:
29
+ return f.read().strip() + "\n"
30
+ except Exception:
31
+ pass
32
+ try:
33
+ with resources.files("bibfixer.prompts").joinpath("default.md").open(
34
+ "r", encoding="utf-8"
35
+ ) as f:
36
+ return f.read().strip() + "\n"
37
+ except Exception:
38
+ return None
39
+
40
+ def parse_bibtex(self, bibtex_string: str) -> Dict[str, Any]:
41
+ try:
42
+ bib_database = bibtexparser.loads(bibtex_string)
43
+ if not bib_database.entries:
44
+ raise ValueError("No valid BibTeX entries found")
45
+ entry = bib_database.entries[0]
46
+ title = entry.get("title", "").strip("{}")
47
+ authors_str = entry.get("author", "")
48
+ if authors_str:
49
+ if " and " in authors_str:
50
+ first_author = authors_str.split(" and ")[0].strip()
51
+ elif "," in authors_str:
52
+ first_author = authors_str.split(",")[0].strip()
53
+ else:
54
+ first_author = authors_str.strip()
55
+ else:
56
+ first_author = ""
57
+ return {
58
+ "original_entry": entry,
59
+ "title": title,
60
+ "first_author": first_author,
61
+ "entry_type": entry.get("ENTRYTYPE", "article"),
62
+ }
63
+ except Exception as e:
64
+ raise ValueError(f"Failed to parse BibTeX: {str(e)}")
65
+
66
+ def revise_bibtex(self, bibtex_string: str, user_preferences: str = "") -> str:
67
+ parsed = self.parse_bibtex(bibtex_string)
68
+ prompt = self._create_prompt(bibtex_string, parsed, user_preferences)
69
+ try:
70
+ full_prompt = (
71
+ """You are a precise academic assistant that corrects and completes BibTeX entries. Always return valid BibTeX format.
72
+
73
+ """
74
+ + prompt
75
+ )
76
+ response = self.client.responses.create(
77
+ model=self.model, input=full_prompt, tools=[{"type": "web_search"}]
78
+ )
79
+ revised_bibtex = None
80
+ if hasattr(response, "output_text"):
81
+ revised_bibtex = getattr(response, "output_text", None)
82
+ elif hasattr(response, "__iter__"):
83
+ for item in response:
84
+ if hasattr(item, "type") and item.type == "message":
85
+ if hasattr(item, "content") and item.content:
86
+ for content_item in item.content:
87
+ if hasattr(content_item, "text"):
88
+ revised_bibtex = content_item.text
89
+ break
90
+ break
91
+ elif hasattr(response, "output"):
92
+ revised_bibtex = response.output
93
+ else:
94
+ revised_bibtex = str(response)
95
+ if not revised_bibtex:
96
+ raise ValueError("Could not extract BibTeX from response")
97
+ try:
98
+ bibtexparser.loads(revised_bibtex)
99
+ except Exception:
100
+ print(
101
+ "Warning: Response may not be valid BibTeX format", file=sys.stderr
102
+ )
103
+ return revised_bibtex
104
+ except Exception as e:
105
+ try:
106
+ print(
107
+ f"Note: Responses API failed ({str(e)}), falling back to chat completions API without web search",
108
+ file=sys.stderr,
109
+ )
110
+ response = self.client.chat.completions.create(
111
+ model=self.model,
112
+ messages=[
113
+ {
114
+ "role": "system",
115
+ "content": "You are a precise academic assistant that corrects and completes BibTeX entries. Always return valid BibTeX format. Use your knowledge to correct and complete the entry as best as you can.",
116
+ },
117
+ {"role": "user", "content": prompt},
118
+ ],
119
+ )
120
+ revised_bibtex = response.choices[0].message.content
121
+ try:
122
+ bibtexparser.loads(revised_bibtex)
123
+ except Exception:
124
+ print(
125
+ "Warning: Response may not be valid BibTeX format",
126
+ file=sys.stderr,
127
+ )
128
+ return revised_bibtex
129
+ except Exception as e2:
130
+ raise RuntimeError(
131
+ f"Failed to call OpenAI API: {str(e)} | Fallback also failed: {str(e2)}"
132
+ )
133
+
134
+ def _create_prompt(
135
+ self, original_bibtex: str, parsed: Dict[str, Any], preferences: str
136
+ ) -> str:
137
+ title = parsed["title"]
138
+ first_author = parsed["first_author"]
139
+ prompt = f"""Please search the web for the following academic paper and correct/complete its BibTeX entry:
140
+
141
+ Title: "{title}"
142
+ First Author: {first_author if first_author else "(unknown)"}
143
+
144
+ Original BibTeX entry:
145
+ ```bibtex
146
+ {original_bibtex}
147
+ ```
148
+ """
149
+ external_instructions = self._load_instructions_from_file()
150
+ if external_instructions:
151
+ prompt += "\n" + external_instructions
152
+ else:
153
+ print(
154
+ "Warning: prompt file not found or unreadable; proceeding without detailed instructions.",
155
+ file=sys.stderr,
156
+ )
157
+ if preferences:
158
+ prompt += f"""
159
+ 5. Apply these user preferences to the formatting:
160
+ {preferences}
161
+ """
162
+ prompt += """
163
+ Return ONLY the corrected BibTeX entry, properly formatted. Do not include any explanation or additional text.
164
+ """
165
+ return prompt
166
+
167
+
@@ -0,0 +1,120 @@
1
+ import sys
2
+ from typing import Dict, Any
3
+ import argparse
4
+ import bibtexparser
5
+ from bibtexparser.bwriter import BibTexWriter
6
+ from bibtexparser.bibdatabase import BibDatabase
7
+ from .agent import BibFixAgent
8
+
9
+
10
+ def main() -> None:
11
+ parser = argparse.ArgumentParser(
12
+ description="Revise BibTeX entries using GPT-5-mini with web search"
13
+ )
14
+ parser.add_argument(
15
+ "-i", "--input",
16
+ dest="input_file",
17
+ required=True,
18
+ help="Path to input .bib file",
19
+ )
20
+ parser.add_argument(
21
+ "-p", "--preferences", default="", help="User preferences for formatting"
22
+ )
23
+ parser.add_argument(
24
+ "--prompt-file",
25
+ dest="prompt_file",
26
+ default=None,
27
+ help="Path to instruction prompt (default: bundled prompts/default.md)",
28
+ )
29
+ parser.add_argument("-o", "--output", help="Output file (default: print to stdout)")
30
+ parser.add_argument(
31
+ "--api-key", help="OpenAI API key (or set OPENAI_API_KEY env var)"
32
+ )
33
+
34
+ args = parser.parse_args()
35
+
36
+ if not args.input_file.lower().endswith(".bib"):
37
+ print("Error: Input file must be a .bib file", file=sys.stderr)
38
+ sys.exit(1)
39
+
40
+ try:
41
+ with open(args.input_file, "r") as f:
42
+ bibtex_content = f.read()
43
+ except FileNotFoundError:
44
+ print(f"Error: File '{args.input_file}' not found", file=sys.stderr)
45
+ sys.exit(1)
46
+ except Exception as e:
47
+ print(f"Error reading file: {str(e)}", file=sys.stderr)
48
+ sys.exit(1)
49
+
50
+ try:
51
+ agent = BibFixAgent(api_key=args.api_key, prompt_file=args.prompt_file)
52
+ except ValueError as e:
53
+ print(f"Error: {str(e)}", file=sys.stderr)
54
+ sys.exit(1)
55
+
56
+ try:
57
+ db = bibtexparser.loads(bibtex_content)
58
+ entries = db.entries or []
59
+ if not entries:
60
+ print("Error: No valid BibTeX entries found", file=sys.stderr)
61
+ sys.exit(1)
62
+ except Exception as e:
63
+ print(f"Error parsing BibTeX: {str(e)}", file=sys.stderr)
64
+ sys.exit(1)
65
+
66
+ def _dump_single_entry(entry_dict: Dict[str, Any]) -> str:
67
+ single_db = BibDatabase()
68
+ single_db.entries = [entry_dict]
69
+ writer = BibTexWriter()
70
+ writer.order_entries_by = None
71
+ return writer.write(single_db)
72
+
73
+ revised_entries_text: list[str] = []
74
+ print(
75
+ f"Found {len(entries)} entr{'y' if len(entries)==1 else 'ies'}; processing sequentially...",
76
+ file=sys.stderr,
77
+ )
78
+ for idx, entry in enumerate(entries, start=1):
79
+ key = entry.get("ID", f"entry_{idx}")
80
+ print(f"Revising {idx}/{len(entries)}: {key}", file=sys.stderr)
81
+ original_entry_text = _dump_single_entry(entry)
82
+ separator = "=" * 80
83
+ print(separator)
84
+ print("--- BEFORE ---")
85
+ print(original_entry_text.strip())
86
+ try:
87
+ revised_text = agent.revise_bibtex(original_entry_text, args.preferences)
88
+ revised_entries_text.append(revised_text.strip())
89
+ final_text = revised_text
90
+ except Exception as e:
91
+ print(
92
+ f"Error revising entry '{key}': {str(e)} — keeping original",
93
+ file=sys.stderr,
94
+ )
95
+ revised_entries_text.append(original_entry_text.strip())
96
+ final_text = original_entry_text
97
+ print("--- AFTER ----")
98
+ print(final_text.strip())
99
+ print(separator)
100
+
101
+ combined = "\n\n".join(revised_entries_text) + "\n"
102
+
103
+ if args.output:
104
+ try:
105
+ with open(args.output, "w") as f:
106
+ f.write(combined)
107
+ print(
108
+ f"Revised {len(entries)} entries written to {args.output}",
109
+ file=sys.stderr,
110
+ )
111
+ except Exception as e:
112
+ print(f"Error writing output: {str(e)}", file=sys.stderr)
113
+ sys.exit(1)
114
+ else:
115
+ print(
116
+ "No output file specified. Preview shown above; not writing output file.",
117
+ file=sys.stderr,
118
+ )
119
+
120
+
@@ -0,0 +1,40 @@
1
+ Instructions:
2
+
3
+ 1) Find authoritative metadata
4
+ - Search reputable sources (publisher site, proceedings page, openreview).
5
+ - Prefer citing the peer‑reviewed journal or conference proceedings version over arXiv. Use arXiv only if no published version exists.
6
+ - Even if you are sure about what the correct information should be, make sure to search the web for the most up-to-date information.
7
+
8
+ 2) Verify and correct these fields
9
+ - Authors: full names, correct order. Use BibTeX format `Last, First` with `and` separators.
10
+ - Title: exact official title (be careful with capitalization, e.g., "ImageNet" instead of "Imagenet").
11
+ - Venue: full journal name or full conference proceedings name.
12
+ - Year: four digits.
13
+ - Pages: use en‑dash style `123--145` when available.
14
+ - Volume/Number: include for journal articles when available.
15
+ - Entry type: `@article` for journals; `@inproceedings` for conference papers; `@book` for books, other types only when clearly appropriate.
16
+
17
+ 3) Output formatting rules
18
+ - Do NOT change the citation key (the part after `@type{` and before the comma). This is because I am already using this specific key in the paper.
19
+ - Use double curly braces around the `title` value to preserve capitalization, e.g., `title = {{Attention Is All You Need}}`.
20
+ - For authors, prefer `Last, First` form and separate authors with ` and `.
21
+ - List all authors and do not use `et al.` or `and others` (even when there are hundreds of authors).
22
+ - Use the full conference name in `booktitle` (no acronyms), e.g., `Proceedings of the 41st International Conference on Machine Learning` instead of `Proceedings of the 41st ICML`. Do not include the acronym after the full name, e.g., do not write `Proceedings of the 41st International Conference on Machine Learning (ICML)`.
23
+ - For NeurIPS papers, the booktitle should be `Advances in Neural Information Processing Systems`, not `Proceedings of Neural Information Processing Systems`.
24
+ - Field order (when present):
25
+ `author`, `title`, `journal`/`booktitle`, `year`, `volume`, `number`, `pages`.
26
+ - Indentation similar to typical BibTeX style and no trailing comma on the last field.
27
+ - There is no formal proceedings in ICLR. Hence, start the page number from 1, e.g., `1--14`.
28
+ - If we need to cite a web article/blog, use the access date written in the original bib entry. If the access date is missing in the original bib entry, or if it is written with a placeholder like `Accessed YYYY-MM-DD`, use the date of today.
29
+
30
+ 4) Fields to OMIT
31
+ - Do not include: URL, PDF link, DOI, editors, abstract, keywords in `@article` and `@proceedings`. For other types, use your best judgement.
32
+ - If only an arXiv version exists, use `@article` and include `title`, `author`, `journal`, and `year`. For the `journal`, write `arXiv preprint arXiv:{ID}`, where you should write the arXiv ID for the placeholder.
33
+
34
+ 5) If uncertain
35
+ - Do not guess. Omit fields that cannot be verified from authoritative sources.
36
+
37
+ 6) Output requirement
38
+ - Return only a single, valid BibTeX entry. Do not include any explanations, prose, or Markdown code fences.
39
+
40
+
@@ -0,0 +1,131 @@
1
+ Metadata-Version: 2.4
2
+ Name: bibfixer
3
+ Version: 0.1.0
4
+ Summary: Fixes and standardizes BibTeX using LLM + web search
5
+ Author: Takashi Ishida
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Takashi Ishida
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Requires-Python: >=3.9
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: openai<2,>=1.107.0
32
+ Requires-Dist: bibtexparser<2,>=1.4.1
33
+ Dynamic: license-file
34
+
35
+ <div align="center">
36
+ <img src="logo.png#gh-light-mode-only" alt="" width="450"><img src="logo.png#gh-dark-mode-only" alt="" width="450">
37
+
38
+ [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
39
+ ![PRs Welcome](https://img.shields.io/badge/PRs-welcome-blue.svg)
40
+ ![Model](https://img.shields.io/badge/Model-GPT--5--mini-purple?logo=openai&logoColor=white)
41
+ ![Last Commit](https://img.shields.io/github/last-commit/takashiishida/bibfixer)
42
+ </div>
43
+
44
+ A Python tool that fixes and standardizes your BibTeX. It not only completes entries with accurate metadata via LLM + web search capabilities, but also enforces a consistent style based on your preferences (e.g., venue naming, title casing, author format, page ranges). This removes the tedious manual work of hunting down sources and cleaning messy entries (like those copied from Google Scholar), producing a clean, uniform bib file. A consistent style improves readability and leaves a stronger impression on readers and reviewers.
45
+
46
+ ## Installation
47
+
48
+ 1. Install (from PyPI):
49
+ ```bash
50
+ pip install bibfixer
51
+ ```
52
+
53
+ 2. Set up your OpenAI API key:
54
+ ```bash
55
+ export OPENAI_API_KEY='your-api-key-here'
56
+ ```
57
+
58
+ ## Usage
59
+
60
+ Basic usage (input is required via `-i/--input`):
61
+ ```bash
62
+ bibfixer -i sample_input.bib
63
+ ```
64
+
65
+ With output file:
66
+ ```bash
67
+ bibfixer -i sample_input.bib -o corrected.bib
68
+ ```
69
+
70
+ With additional formatting preferences (`-p`):
71
+ ```bash
72
+ bibfixer -i sample_input.bib -p "Use NeurIPS instead of NIPS"
73
+ ```
74
+
75
+ Use a custom prompt file (defaults to bundled `prompts/default.md`):
76
+ ```bash
77
+ bibfixer -i sample_input.bib --prompt-file prompts/default.md
78
+ ```
79
+
80
+ The complete revision instructions are in `prompts/default.md`. You can edit this file to match your style or point to another file using `--prompt-file`.
81
+
82
+ ## Examples
83
+
84
+ Example (1) Original bib entry. Authors are missing and "ai" is not capitalized.
85
+ ```bib
86
+ @article{bai2022constitutional,
87
+ author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and others},
88
+ journal = {arXiv preprint arXiv:2212.08073},
89
+ title = {Constitutional ai: Harmlessness from ai feedback},
90
+ year = {2022}
91
+ }
92
+ ```
93
+
94
+ Missing authors are added and title is capitalized properly:
95
+ ```bib
96
+ @article{bai2022constitutional,
97
+ author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and Chen, Carol and Olsson, Catherine and Olah, Christopher and Hernandez, Danny and Drain, Dawn and Ganguli, Deep and Li, Dustin and Tran-Johnson, Eli and Perez, Ethan and Kerr, Jamie and Mueller, Jared and Ladish, Jeffrey and Landau, Joshua and Ndousse, Kamal and Lukosuite, Kamile and Lovitt, Liane and Sellitto, Michael and Elhage, Nelson and Schiefer, Nicholas and Mercado, Noemi and DasSarma, Nova and Lasenby, Robert and Larson, Robin and Ringer, Sam and Johnston, Scott and Kravec, Shauna and El Showk, Sheer and Fort, Stanislav and Lanham, Tamera and Telleen-Lawton, Timothy and Conerly, Tom and Henighan, Tom and Hume, Tristan and Bowman, Samuel R. and Hatfield-Dodds, Zac and Mann, Ben and Amodei, Dario and Joseph, Nicholas and McCandlish, Sam and Brown, Tom and Kaplan, Jared},
98
+ title = {{Constitutional AI: Harmlessness from AI Feedback}},
99
+ journal = {arXiv preprint arXiv:2212.08073},
100
+ year = {2022}
101
+ }
102
+ ```
103
+
104
+ Example (2) Original bib entry. This shows the arXiv version but the paper was published in ICML. "llm" needs to be capitalized.
105
+ ```bib
106
+ @article{khan2024debating,
107
+ author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R and Rockt{\"a}schel, Tim and Perez, Ethan},
108
+ journal = {arXiv preprint arXiv:2402.06782},
109
+ title = {Debating with more persuasive llms leads to more truthful answers},
110
+ year = {2024}
111
+ }
112
+ ```
113
+
114
+ arXiv is replaced with the conference information and appropriate title:
115
+ ```bib
116
+ @inproceedings{khan2024debating,
117
+ author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R. and Rockt{\"a}schel, Tim and Perez, Ethan},
118
+ title = {{Debating with More Persuasive LLMs Leads to More Truthful Answers}},
119
+ booktitle = {Proceedings of the 41st International Conference on Machine Learning},
120
+ year = {2024},
121
+ volume = {235},
122
+ pages = {23662--23733}
123
+ }
124
+ ```
125
+
126
+ > [!WARNING]
127
+ > This tool uses LLM + web search and may occasionally produce incomplete or inaccurate metadata or formatting. Always review the final `.bib` before submission. To quickly compare input and output, you can run:
128
+ >
129
+ > ```bash
130
+ > diff -y --suppress-common-lines input.bib output.bib | less -R
131
+ > ```
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ pyproject.toml
5
+ bibfixer/__init__.py
6
+ bibfixer/agent.py
7
+ bibfixer/cli.py
8
+ bibfixer.egg-info/PKG-INFO
9
+ bibfixer.egg-info/SOURCES.txt
10
+ bibfixer.egg-info/dependency_links.txt
11
+ bibfixer.egg-info/entry_points.txt
12
+ bibfixer.egg-info/requires.txt
13
+ bibfixer.egg-info/top_level.txt
14
+ bibfixer/prompts/default.md
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ bibfixer = bibfixer.cli:main
@@ -0,0 +1,2 @@
1
+ openai<2,>=1.107.0
2
+ bibtexparser<2,>=1.4.1
@@ -0,0 +1 @@
1
+ bibfixer
@@ -0,0 +1,27 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "bibfixer"
7
+ version = "0.1.0"
8
+ description = "Fixes and standardizes BibTeX using LLM + web search"
9
+ readme = "README.md"
10
+ license = { file = "LICENSE" }
11
+ authors = [{ name = "Takashi Ishida" }]
12
+ requires-python = ">=3.9"
13
+ dependencies = [
14
+ "openai>=1.107.0,<2",
15
+ "bibtexparser>=1.4.1,<2",
16
+ ]
17
+
18
+ [project.scripts]
19
+ bibfixer = "bibfixer.cli:main"
20
+
21
+ [tool.setuptools]
22
+ packages = ["bibfixer"]
23
+
24
+ [tool.setuptools.package-data]
25
+ bibfixer = ["prompts/default.md"]
26
+
27
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+