PyPI - bibfixer - Versions diffs - 0.1.0__tar.gz - Mend

bibfixer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

bibfixer-0.1.0/LICENSE +21 -0
bibfixer-0.1.0/MANIFEST.in +3 -0
bibfixer-0.1.0/PKG-INFO +131 -0
bibfixer-0.1.0/README.md +97 -0
bibfixer-0.1.0/bibfixer/__init__.py +4 -0
bibfixer-0.1.0/bibfixer/agent.py +167 -0
bibfixer-0.1.0/bibfixer/cli.py +120 -0
bibfixer-0.1.0/bibfixer/prompts/default.md +40 -0
bibfixer-0.1.0/bibfixer.egg-info/PKG-INFO +131 -0
bibfixer-0.1.0/bibfixer.egg-info/SOURCES.txt +14 -0
bibfixer-0.1.0/bibfixer.egg-info/dependency_links.txt +1 -0
bibfixer-0.1.0/bibfixer.egg-info/entry_points.txt +2 -0
bibfixer-0.1.0/bibfixer.egg-info/requires.txt +2 -0
bibfixer-0.1.0/bibfixer.egg-info/top_level.txt +1 -0
bibfixer-0.1.0/pyproject.toml +27 -0
bibfixer-0.1.0/setup.cfg +4 -0

bibfixer-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Takashi Ishida
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

bibfixer-0.1.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,3 @@
+include README.md LICENSE
+recursive-include bibfixer/prompts *.md

bibfixer-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,131 @@
+Metadata-Version: 2.4
+Name: bibfixer
+Version: 0.1.0
+Summary: Fixes and standardizes BibTeX using LLM + web search
+Author: Takashi Ishida
+License: MIT License
+        Copyright (c) 2025 Takashi Ishida
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: openai<2,>=1.107.0
+Requires-Dist: bibtexparser<2,>=1.4.1
+Dynamic: license-file
+<div align="center">
+<img src="logo.png#gh-light-mode-only" alt="" width="450"><img src="logo.png#gh-dark-mode-only" alt="" width="450">
+[![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
+![PRs Welcome](https://img.shields.io/badge/PRs-welcome-blue.svg)
+![Model](https://img.shields.io/badge/Model-GPT--5--mini-purple?logo=openai&logoColor=white)
+![Last Commit](https://img.shields.io/github/last-commit/takashiishida/bibfixer)
+</div>
+A Python tool that fixes and standardizes your BibTeX. It not only completes entries with accurate metadata via LLM + web search capabilities, but also enforces a consistent style based on your preferences (e.g., venue naming, title casing, author format, page ranges). This removes the tedious manual work of hunting down sources and cleaning messy entries (like those copied from Google Scholar), producing a clean, uniform bib file. A consistent style improves readability and leaves a stronger impression on readers and reviewers.
+## Installation
+1. Install (from PyPI):
+```bash
+pip install bibfixer
+```
+2. Set up your OpenAI API key:
+```bash
+export OPENAI_API_KEY='your-api-key-here'
+```
+## Usage
+Basic usage (input is required via `-i/--input`):
+```bash
+bibfixer -i sample_input.bib
+```
+With output file:
+```bash
+bibfixer -i sample_input.bib -o corrected.bib
+```
+With additional formatting preferences (`-p`):
+```bash
+bibfixer -i sample_input.bib -p "Use NeurIPS instead of NIPS"
+```
+Use a custom prompt file (defaults to bundled `prompts/default.md`):
+```bash
+bibfixer -i sample_input.bib --prompt-file prompts/default.md
+```
+The complete revision instructions are in `prompts/default.md`. You can edit this file to match your style or point to another file using `--prompt-file`.
+## Examples
+Example (1) Original bib entry. Authors are missing and "ai" is not capitalized.
+```bib
+@article{bai2022constitutional,
+ author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and others},
+ journal = {arXiv preprint arXiv:2212.08073},
+ title = {Constitutional ai: Harmlessness from ai feedback},
+ year = {2022}
+}
+```
+Missing authors are added and title is capitalized properly:
+```bib
+@article{bai2022constitutional,
+  author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and Chen, Carol and Olsson, Catherine and Olah, Christopher and Hernandez, Danny and Drain, Dawn and Ganguli, Deep and Li, Dustin and Tran-Johnson, Eli and Perez, Ethan and Kerr, Jamie and Mueller, Jared and Ladish, Jeffrey and Landau, Joshua and Ndousse, Kamal and Lukosuite, Kamile and Lovitt, Liane and Sellitto, Michael and Elhage, Nelson and Schiefer, Nicholas and Mercado, Noemi and DasSarma, Nova and Lasenby, Robert and Larson, Robin and Ringer, Sam and Johnston, Scott and Kravec, Shauna and El Showk, Sheer and Fort, Stanislav and Lanham, Tamera and Telleen-Lawton, Timothy and Conerly, Tom and Henighan, Tom and Hume, Tristan and Bowman, Samuel R. and Hatfield-Dodds, Zac and Mann, Ben and Amodei, Dario and Joseph, Nicholas and McCandlish, Sam and Brown, Tom and Kaplan, Jared},
+  title = {{Constitutional AI: Harmlessness from AI Feedback}},
+  journal = {arXiv preprint arXiv:2212.08073},
+  year = {2022}
+}
+```
+Example (2) Original bib entry. This shows the arXiv version but the paper was published in ICML. "llm" needs to be capitalized.
+```bib
+@article{khan2024debating,
+ author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R and Rockt{\"a}schel, Tim and Perez, Ethan},
+ journal = {arXiv preprint arXiv:2402.06782},
+ title = {Debating with more persuasive llms leads to more truthful answers},
+ year = {2024}
+}
+```
+arXiv is replaced with the conference information and appropriate title:
+```bib
+@inproceedings{khan2024debating,
+  author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R. and Rockt{\"a}schel, Tim and Perez, Ethan},
+  title = {{Debating with More Persuasive LLMs Leads to More Truthful Answers}},
+  booktitle = {Proceedings of the 41st International Conference on Machine Learning},
+  year = {2024},
+  volume = {235},
+  pages = {23662--23733}
+}
+```
+> [!WARNING]
+> This tool uses LLM + web search and may occasionally produce incomplete or inaccurate metadata or formatting. Always review the final `.bib` before submission. To quickly compare input and output, you can run:
+>
+> ```bash
+> diff -y --suppress-common-lines input.bib output.bib | less -R
+> ```

bibfixer-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,97 @@
+<div align="center">
+<img src="logo.png#gh-light-mode-only" alt="" width="450"><img src="logo.png#gh-dark-mode-only" alt="" width="450">
+[![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
+![PRs Welcome](https://img.shields.io/badge/PRs-welcome-blue.svg)
+![Model](https://img.shields.io/badge/Model-GPT--5--mini-purple?logo=openai&logoColor=white)
+![Last Commit](https://img.shields.io/github/last-commit/takashiishida/bibfixer)
+</div>
+A Python tool that fixes and standardizes your BibTeX. It not only completes entries with accurate metadata via LLM + web search capabilities, but also enforces a consistent style based on your preferences (e.g., venue naming, title casing, author format, page ranges). This removes the tedious manual work of hunting down sources and cleaning messy entries (like those copied from Google Scholar), producing a clean, uniform bib file. A consistent style improves readability and leaves a stronger impression on readers and reviewers.
+## Installation
+1. Install (from PyPI):
+```bash
+pip install bibfixer
+```
+2. Set up your OpenAI API key:
+```bash
+export OPENAI_API_KEY='your-api-key-here'
+```
+## Usage
+Basic usage (input is required via `-i/--input`):
+```bash
+bibfixer -i sample_input.bib
+```
+With output file:
+```bash
+bibfixer -i sample_input.bib -o corrected.bib
+```
+With additional formatting preferences (`-p`):
+```bash
+bibfixer -i sample_input.bib -p "Use NeurIPS instead of NIPS"
+```
+Use a custom prompt file (defaults to bundled `prompts/default.md`):
+```bash
+bibfixer -i sample_input.bib --prompt-file prompts/default.md
+```
+The complete revision instructions are in `prompts/default.md`. You can edit this file to match your style or point to another file using `--prompt-file`.
+## Examples
+Example (1) Original bib entry. Authors are missing and "ai" is not capitalized.
+```bib
+@article{bai2022constitutional,
+ author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and others},
+ journal = {arXiv preprint arXiv:2212.08073},
+ title = {Constitutional ai: Harmlessness from ai feedback},
+ year = {2022}
+}
+```
+Missing authors are added and title is capitalized properly:
+```bib
+@article{bai2022constitutional,
+  author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and Chen, Carol and Olsson, Catherine and Olah, Christopher and Hernandez, Danny and Drain, Dawn and Ganguli, Deep and Li, Dustin and Tran-Johnson, Eli and Perez, Ethan and Kerr, Jamie and Mueller, Jared and Ladish, Jeffrey and Landau, Joshua and Ndousse, Kamal and Lukosuite, Kamile and Lovitt, Liane and Sellitto, Michael and Elhage, Nelson and Schiefer, Nicholas and Mercado, Noemi and DasSarma, Nova and Lasenby, Robert and Larson, Robin and Ringer, Sam and Johnston, Scott and Kravec, Shauna and El Showk, Sheer and Fort, Stanislav and Lanham, Tamera and Telleen-Lawton, Timothy and Conerly, Tom and Henighan, Tom and Hume, Tristan and Bowman, Samuel R. and Hatfield-Dodds, Zac and Mann, Ben and Amodei, Dario and Joseph, Nicholas and McCandlish, Sam and Brown, Tom and Kaplan, Jared},
+  title = {{Constitutional AI: Harmlessness from AI Feedback}},
+  journal = {arXiv preprint arXiv:2212.08073},
+  year = {2022}
+}
+```
+Example (2) Original bib entry. This shows the arXiv version but the paper was published in ICML. "llm" needs to be capitalized.
+```bib
+@article{khan2024debating,
+ author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R and Rockt{\"a}schel, Tim and Perez, Ethan},
+ journal = {arXiv preprint arXiv:2402.06782},
+ title = {Debating with more persuasive llms leads to more truthful answers},
+ year = {2024}
+}
+```
+arXiv is replaced with the conference information and appropriate title:
+```bib
+@inproceedings{khan2024debating,
+  author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R. and Rockt{\"a}schel, Tim and Perez, Ethan},
+  title = {{Debating with More Persuasive LLMs Leads to More Truthful Answers}},
+  booktitle = {Proceedings of the 41st International Conference on Machine Learning},
+  year = {2024},
+  volume = {235},
+  pages = {23662--23733}
+}
+```
+> [!WARNING]
+> This tool uses LLM + web search and may occasionally produce incomplete or inaccurate metadata or formatting. Always review the final `.bib` before submission. To quickly compare input and output, you can run:
+>
+> ```bash
+> diff -y --suppress-common-lines input.bib output.bib | less -R
+> ```

bibfixer-0.1.0/bibfixer/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .agent import BibFixAgent
+__all__ = ["BibFixAgent"]

bibfixer-0.1.0/bibfixer/agent.py ADDED Viewed

@@ -0,0 +1,167 @@
+import os
+import sys
+from typing import Optional, Dict, Any
+import json
+import bibtexparser
+from bibtexparser.bwriter import BibTexWriter
+from bibtexparser.bibdatabase import BibDatabase
+from openai import OpenAI
+from importlib import resources
+class BibFixAgent:
+    def __init__(self, api_key: Optional[str] = None, prompt_file: Optional[str] = None):
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        if not self.api_key:
+            raise ValueError(
+                "OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as argument."
+            )
+        self.client = OpenAI(api_key=self.api_key)
+        self.model = "gpt-5-mini-2025-08-07"
+        self.prompt_file_path = prompt_file
+    def _load_instructions_from_file(self) -> Optional[str]:
+        if self.prompt_file_path:
+            try:
+                if os.path.exists(self.prompt_file_path):
+                    with open(self.prompt_file_path, "r", encoding="utf-8") as f:
+                        return f.read().strip() + "\n"
+            except Exception:
+                pass
+        try:
+            with resources.files("bibfixer.prompts").joinpath("default.md").open(
+                "r", encoding="utf-8"
+            ) as f:
+                return f.read().strip() + "\n"
+        except Exception:
+            return None
+    def parse_bibtex(self, bibtex_string: str) -> Dict[str, Any]:
+        try:
+            bib_database = bibtexparser.loads(bibtex_string)
+            if not bib_database.entries:
+                raise ValueError("No valid BibTeX entries found")
+            entry = bib_database.entries[0]
+            title = entry.get("title", "").strip("{}")
+            authors_str = entry.get("author", "")
+            if authors_str:
+                if " and " in authors_str:
+                    first_author = authors_str.split(" and ")[0].strip()
+                elif "," in authors_str:
+                    first_author = authors_str.split(",")[0].strip()
+                else:
+                    first_author = authors_str.strip()
+            else:
+                first_author = ""
+            return {
+                "original_entry": entry,
+                "title": title,
+                "first_author": first_author,
+                "entry_type": entry.get("ENTRYTYPE", "article"),
+            }
+        except Exception as e:
+            raise ValueError(f"Failed to parse BibTeX: {str(e)}")
+    def revise_bibtex(self, bibtex_string: str, user_preferences: str = "") -> str:
+        parsed = self.parse_bibtex(bibtex_string)
+        prompt = self._create_prompt(bibtex_string, parsed, user_preferences)
+        try:
+            full_prompt = (
+                """You are a precise academic assistant that corrects and completes BibTeX entries. Always return valid BibTeX format.
+"""
+                + prompt
+            )
+            response = self.client.responses.create(
+                model=self.model, input=full_prompt, tools=[{"type": "web_search"}]
+            )
+            revised_bibtex = None
+            if hasattr(response, "output_text"):
+                revised_bibtex = getattr(response, "output_text", None)
+            elif hasattr(response, "__iter__"):
+                for item in response:
+                    if hasattr(item, "type") and item.type == "message":
+                        if hasattr(item, "content") and item.content:
+                            for content_item in item.content:
+                                if hasattr(content_item, "text"):
+                                    revised_bibtex = content_item.text
+                                    break
+                        break
+            elif hasattr(response, "output"):
+                revised_bibtex = response.output
+            else:
+                revised_bibtex = str(response)
+            if not revised_bibtex:
+                raise ValueError("Could not extract BibTeX from response")
+            try:
+                bibtexparser.loads(revised_bibtex)
+            except Exception:
+                print(
+                    "Warning: Response may not be valid BibTeX format", file=sys.stderr
+                )
+            return revised_bibtex
+        except Exception as e:
+            try:
+                print(
+                    f"Note: Responses API failed ({str(e)}), falling back to chat completions API without web search",
+                    file=sys.stderr,
+                )
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": "You are a precise academic assistant that corrects and completes BibTeX entries. Always return valid BibTeX format. Use your knowledge to correct and complete the entry as best as you can.",
+                        },
+                        {"role": "user", "content": prompt},
+                    ],
+                )
+                revised_bibtex = response.choices[0].message.content
+                try:
+                    bibtexparser.loads(revised_bibtex)
+                except Exception:
+                    print(
+                        "Warning: Response may not be valid BibTeX format",
+                        file=sys.stderr,
+                    )
+                return revised_bibtex
+            except Exception as e2:
+                raise RuntimeError(
+                    f"Failed to call OpenAI API: {str(e)} | Fallback also failed: {str(e2)}"
+                )
+    def _create_prompt(
+        self, original_bibtex: str, parsed: Dict[str, Any], preferences: str
+    ) -> str:
+        title = parsed["title"]
+        first_author = parsed["first_author"]
+        prompt = f"""Please search the web for the following academic paper and correct/complete its BibTeX entry:
+Title: "{title}"
+First Author: {first_author if first_author else "(unknown)"}
+Original BibTeX entry:
+```bibtex
+{original_bibtex}
+```
+"""
+        external_instructions = self._load_instructions_from_file()
+        if external_instructions:
+            prompt += "\n" + external_instructions
+        else:
+            print(
+                "Warning: prompt file not found or unreadable; proceeding without detailed instructions.",
+                file=sys.stderr,
+            )
+        if preferences:
+            prompt += f"""
+5. Apply these user preferences to the formatting:
+{preferences}
+"""
+        prompt += """
+Return ONLY the corrected BibTeX entry, properly formatted. Do not include any explanation or additional text.
+"""
+        return prompt

bibfixer-0.1.0/bibfixer/cli.py ADDED Viewed

@@ -0,0 +1,120 @@
+import sys
+from typing import Dict, Any
+import argparse
+import bibtexparser
+from bibtexparser.bwriter import BibTexWriter
+from bibtexparser.bibdatabase import BibDatabase
+from .agent import BibFixAgent
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Revise BibTeX entries using GPT-5-mini with web search"
+    )
+    parser.add_argument(
+        "-i", "--input",
+        dest="input_file",
+        required=True,
+        help="Path to input .bib file",
+    )
+    parser.add_argument(
+        "-p", "--preferences", default="", help="User preferences for formatting"
+    )
+    parser.add_argument(
+        "--prompt-file",
+        dest="prompt_file",
+        default=None,
+        help="Path to instruction prompt (default: bundled prompts/default.md)",
+    )
+    parser.add_argument("-o", "--output", help="Output file (default: print to stdout)")
+    parser.add_argument(
+        "--api-key", help="OpenAI API key (or set OPENAI_API_KEY env var)"
+    )
+    args = parser.parse_args()
+    if not args.input_file.lower().endswith(".bib"):
+        print("Error: Input file must be a .bib file", file=sys.stderr)
+        sys.exit(1)
+    try:
+        with open(args.input_file, "r") as f:
+            bibtex_content = f.read()
+    except FileNotFoundError:
+        print(f"Error: File '{args.input_file}' not found", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error reading file: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+    try:
+        agent = BibFixAgent(api_key=args.api_key, prompt_file=args.prompt_file)
+    except ValueError as e:
+        print(f"Error: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+    try:
+        db = bibtexparser.loads(bibtex_content)
+        entries = db.entries or []
+        if not entries:
+            print("Error: No valid BibTeX entries found", file=sys.stderr)
+            sys.exit(1)
+    except Exception as e:
+        print(f"Error parsing BibTeX: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+    def _dump_single_entry(entry_dict: Dict[str, Any]) -> str:
+        single_db = BibDatabase()
+        single_db.entries = [entry_dict]
+        writer = BibTexWriter()
+        writer.order_entries_by = None
+        return writer.write(single_db)
+    revised_entries_text: list[str] = []
+    print(
+        f"Found {len(entries)} entr{'y' if len(entries)==1 else 'ies'}; processing sequentially...",
+        file=sys.stderr,
+    )
+    for idx, entry in enumerate(entries, start=1):
+        key = entry.get("ID", f"entry_{idx}")
+        print(f"Revising {idx}/{len(entries)}: {key}", file=sys.stderr)
+        original_entry_text = _dump_single_entry(entry)
+        separator = "=" * 80
+        print(separator)
+        print("--- BEFORE ---")
+        print(original_entry_text.strip())
+        try:
+            revised_text = agent.revise_bibtex(original_entry_text, args.preferences)
+            revised_entries_text.append(revised_text.strip())
+            final_text = revised_text
+        except Exception as e:
+            print(
+                f"Error revising entry '{key}': {str(e)} — keeping original",
+                file=sys.stderr,
+            )
+            revised_entries_text.append(original_entry_text.strip())
+            final_text = original_entry_text
+        print("--- AFTER ----")
+        print(final_text.strip())
+        print(separator)
+    combined = "\n\n".join(revised_entries_text) + "\n"
+    if args.output:
+        try:
+            with open(args.output, "w") as f:
+                f.write(combined)
+            print(
+                f"Revised {len(entries)} entries written to {args.output}",
+                file=sys.stderr,
+            )
+        except Exception as e:
+            print(f"Error writing output: {str(e)}", file=sys.stderr)
+            sys.exit(1)
+    else:
+        print(
+            "No output file specified. Preview shown above; not writing output file.",
+            file=sys.stderr,
+        )

bibfixer-0.1.0/bibfixer/prompts/default.md ADDED Viewed

@@ -0,0 +1,40 @@
+Instructions:
+1) Find authoritative metadata
+   - Search reputable sources (publisher site, proceedings page, openreview).
+   - Prefer citing the peer‑reviewed journal or conference proceedings version over arXiv. Use arXiv only if no published version exists.
+   - Even if you are sure about what the correct information should be, make sure to search the web for the most up-to-date information.
+2) Verify and correct these fields
+   - Authors: full names, correct order. Use BibTeX format `Last, First` with `and` separators.
+   - Title: exact official title (be careful with capitalization, e.g., "ImageNet" instead of "Imagenet").
+   - Venue: full journal name or full conference proceedings name.
+   - Year: four digits.
+   - Pages: use en‑dash style `123--145` when available.
+   - Volume/Number: include for journal articles when available.
+   - Entry type: `@article` for journals; `@inproceedings` for conference papers; `@book` for books, other types only when clearly appropriate.
+3) Output formatting rules
+   - Do NOT change the citation key (the part after `@type{` and before the comma). This is because I am already using this specific key in the paper.
+   - Use double curly braces around the `title` value to preserve capitalization, e.g., `title = {{Attention Is All You Need}}`.
+   - For authors, prefer `Last, First` form and separate authors with ` and `.
+   - List all authors and do not use `et al.` or `and others` (even when there are hundreds of authors).
+   - Use the full conference name in `booktitle` (no acronyms), e.g., `Proceedings of the 41st International Conference on Machine Learning` instead of `Proceedings of the 41st ICML`. Do not include the acronym after the full name, e.g., do not write `Proceedings of the 41st International Conference on Machine Learning (ICML)`.
+   - For NeurIPS papers, the booktitle should be `Advances in Neural Information Processing Systems`, not `Proceedings of Neural Information Processing Systems`.
+   - Field order (when present):
+     `author`, `title`, `journal`/`booktitle`, `year`, `volume`, `number`, `pages`.
+   - Indentation similar to typical BibTeX style and no trailing comma on the last field.
+   - There is no formal proceedings in ICLR. Hence, start the page number from 1, e.g., `1--14`.
+   - If we need to cite a web article/blog, use the access date written in the original bib entry. If the access date is missing in the original bib entry, or if it is written with a placeholder like `Accessed YYYY-MM-DD`, use the date of today.
+4) Fields to OMIT
+   - Do not include: URL, PDF link, DOI, editors, abstract, keywords in `@article` and `@proceedings`. For other types, use your best judgement.
+   - If only an arXiv version exists, use `@article` and include `title`, `author`, `journal`, and `year`. For the `journal`, write `arXiv preprint arXiv:{ID}`, where you should write the arXiv ID for the placeholder.
+5) If uncertain
+   - Do not guess. Omit fields that cannot be verified from authoritative sources.
+6) Output requirement
+   - Return only a single, valid BibTeX entry. Do not include any explanations, prose, or Markdown code fences.

bibfixer-0.1.0/bibfixer.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,131 @@
+Metadata-Version: 2.4
+Name: bibfixer
+Version: 0.1.0
+Summary: Fixes and standardizes BibTeX using LLM + web search
+Author: Takashi Ishida
+License: MIT License
+        Copyright (c) 2025 Takashi Ishida
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: openai<2,>=1.107.0
+Requires-Dist: bibtexparser<2,>=1.4.1
+Dynamic: license-file
+<div align="center">
+<img src="logo.png#gh-light-mode-only" alt="" width="450"><img src="logo.png#gh-dark-mode-only" alt="" width="450">
+[![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
+![PRs Welcome](https://img.shields.io/badge/PRs-welcome-blue.svg)
+![Model](https://img.shields.io/badge/Model-GPT--5--mini-purple?logo=openai&logoColor=white)
+![Last Commit](https://img.shields.io/github/last-commit/takashiishida/bibfixer)
+</div>
+A Python tool that fixes and standardizes your BibTeX. It not only completes entries with accurate metadata via LLM + web search capabilities, but also enforces a consistent style based on your preferences (e.g., venue naming, title casing, author format, page ranges). This removes the tedious manual work of hunting down sources and cleaning messy entries (like those copied from Google Scholar), producing a clean, uniform bib file. A consistent style improves readability and leaves a stronger impression on readers and reviewers.
+## Installation
+1. Install (from PyPI):
+```bash
+pip install bibfixer
+```
+2. Set up your OpenAI API key:
+```bash
+export OPENAI_API_KEY='your-api-key-here'
+```
+## Usage
+Basic usage (input is required via `-i/--input`):
+```bash
+bibfixer -i sample_input.bib
+```
+With output file:
+```bash
+bibfixer -i sample_input.bib -o corrected.bib
+```
+With additional formatting preferences (`-p`):
+```bash
+bibfixer -i sample_input.bib -p "Use NeurIPS instead of NIPS"
+```
+Use a custom prompt file (defaults to bundled `prompts/default.md`):
+```bash
+bibfixer -i sample_input.bib --prompt-file prompts/default.md
+```
+The complete revision instructions are in `prompts/default.md`. You can edit this file to match your style or point to another file using `--prompt-file`.
+## Examples
+Example (1) Original bib entry. Authors are missing and "ai" is not capitalized.
+```bib
+@article{bai2022constitutional,
+ author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and others},
+ journal = {arXiv preprint arXiv:2212.08073},
+ title = {Constitutional ai: Harmlessness from ai feedback},
+ year = {2022}
+}
+```
+Missing authors are added and title is capitalized properly:
+```bib
+@article{bai2022constitutional,
+  author = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and Chen, Carol and Olsson, Catherine and Olah, Christopher and Hernandez, Danny and Drain, Dawn and Ganguli, Deep and Li, Dustin and Tran-Johnson, Eli and Perez, Ethan and Kerr, Jamie and Mueller, Jared and Ladish, Jeffrey and Landau, Joshua and Ndousse, Kamal and Lukosuite, Kamile and Lovitt, Liane and Sellitto, Michael and Elhage, Nelson and Schiefer, Nicholas and Mercado, Noemi and DasSarma, Nova and Lasenby, Robert and Larson, Robin and Ringer, Sam and Johnston, Scott and Kravec, Shauna and El Showk, Sheer and Fort, Stanislav and Lanham, Tamera and Telleen-Lawton, Timothy and Conerly, Tom and Henighan, Tom and Hume, Tristan and Bowman, Samuel R. and Hatfield-Dodds, Zac and Mann, Ben and Amodei, Dario and Joseph, Nicholas and McCandlish, Sam and Brown, Tom and Kaplan, Jared},
+  title = {{Constitutional AI: Harmlessness from AI Feedback}},
+  journal = {arXiv preprint arXiv:2212.08073},
+  year = {2022}
+}
+```
+Example (2) Original bib entry. This shows the arXiv version but the paper was published in ICML. "llm" needs to be capitalized.
+```bib
+@article{khan2024debating,
+ author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R and Rockt{\"a}schel, Tim and Perez, Ethan},
+ journal = {arXiv preprint arXiv:2402.06782},
+ title = {Debating with more persuasive llms leads to more truthful answers},
+ year = {2024}
+}
+```
+arXiv is replaced with the conference information and appropriate title:
+```bib
+@inproceedings{khan2024debating,
+  author = {Khan, Akbir and Hughes, John and Valentine, Dan and Ruis, Laura and Sachan, Kshitij and Radhakrishnan, Ansh and Grefenstette, Edward and Bowman, Samuel R. and Rockt{\"a}schel, Tim and Perez, Ethan},
+  title = {{Debating with More Persuasive LLMs Leads to More Truthful Answers}},
+  booktitle = {Proceedings of the 41st International Conference on Machine Learning},
+  year = {2024},
+  volume = {235},
+  pages = {23662--23733}
+}
+```
+> [!WARNING]
+> This tool uses LLM + web search and may occasionally produce incomplete or inaccurate metadata or formatting. Always review the final `.bib` before submission. To quickly compare input and output, you can run:
+>
+> ```bash
+> diff -y --suppress-common-lines input.bib output.bib | less -R
+> ```

bibfixer-0.1.0/bibfixer.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,14 @@
+LICENSE
+MANIFEST.in
+README.md
+pyproject.toml
+bibfixer/__init__.py
+bibfixer/agent.py
+bibfixer/cli.py
+bibfixer.egg-info/PKG-INFO
+bibfixer.egg-info/SOURCES.txt
+bibfixer.egg-info/dependency_links.txt
+bibfixer.egg-info/entry_points.txt
+bibfixer.egg-info/requires.txt
+bibfixer.egg-info/top_level.txt
+bibfixer/prompts/default.md

bibfixer-0.1.0/bibfixer.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

bibfixer-0.1.0/bibfixer.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ bibfixer = bibfixer.cli:main

bibfixer-0.1.0/bibfixer.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ openai<2,>=1.107.0
2	+ bibtexparser<2,>=1.4.1

bibfixer-0.1.0/bibfixer.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ bibfixer

bibfixer-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,27 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "bibfixer"
+version = "0.1.0"
+description = "Fixes and standardizes BibTeX using LLM + web search"
+readme = "README.md"
+license = { file = "LICENSE" }
+authors = [{ name = "Takashi Ishida" }]
+requires-python = ">=3.9"
+dependencies = [
+  "openai>=1.107.0,<2",
+  "bibtexparser>=1.4.1,<2",
+]
+[project.scripts]
+bibfixer = "bibfixer.cli:main"
+[tool.setuptools]
+packages = ["bibfixer"]
+[tool.setuptools.package-data]
+bibfixer = ["prompts/default.md"]

bibfixer-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0