labmate-mcp 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
labmate_mcp/writing.py ADDED
@@ -0,0 +1,1488 @@
1
+ """
2
+ labmate-mcp writing & publication support module.
3
+
4
+ Tools for the final phase of the research workflow:
5
+ - Citation formatting via Crossref content negotiation
6
+ - Experimental section templates for common reaction types
7
+ - Journal submission formatting guides
8
+ - Supporting Information (SI) checklists
9
+ - Standard chemistry abbreviations & symbols
10
+ - Thesis writing section guides
11
+ - Molecular formula formatting (LaTeX, HTML, Unicode)
12
+ - IUPAC nomenclature lookup (PubChem)
13
+
14
+ No API keys required for most functions. Crossref and PubChem calls are free.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import re
21
+ from typing import Any
22
+
23
+ import httpx
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ # =============================================================================
29
+ # Citation formatting via Crossref content negotiation
30
+ # =============================================================================
31
+
32
+ # CSL style → Crossref content-negotiation style parameter
33
+ CSL_STYLES: dict[str, str] = {
34
+ "acs": "american-chemical-society",
35
+ "acs-nano": "acs-nano",
36
+ "jacs": "american-chemical-society",
37
+ "apa": "apa",
38
+ "apa7": "apa-7th-edition",
39
+ "rsc": "royal-society-of-chemistry",
40
+ "nature": "nature",
41
+ "science": "science",
42
+ "angew": "angewandte-chemie",
43
+ "angewandte": "angewandte-chemie",
44
+ "ieee": "ieee",
45
+ "vancouver": "vancouver",
46
+ "chicago": "chicago-author-date",
47
+ "harvard": "harvard-cite-them-right",
48
+ "cell": "cell",
49
+ "pnas": "proceedings-of-the-national-academy-of-sciences",
50
+ "elsevier": "elsevier-harvard",
51
+ "springer": "springer-basic-author-date",
52
+ "wiley": "american-chemical-society",
53
+ "mla": "modern-language-association",
54
+ }
55
+
56
+
57
+ async def format_citation(
58
+ doi: str,
59
+ style: str = "acs",
60
+ locale: str = "en-US",
61
+ ) -> dict:
62
+ """
63
+ Format a DOI as a citation in the requested style using Crossref content negotiation.
64
+
65
+ Args:
66
+ doi: DOI string (with or without https://doi.org/ prefix)
67
+ style: Citation style (acs, apa, rsc, nature, angew, ieee, vancouver, etc.)
68
+ locale: Locale for formatting (default: en-US)
69
+
70
+ Returns:
71
+ dict with 'citation', 'style', 'doi' keys.
72
+ """
73
+ doi = doi.strip()
74
+ if doi.startswith("http"):
75
+ doi = doi.split("doi.org/")[-1]
76
+
77
+ csl_style = CSL_STYLES.get(style.lower(), style.lower())
78
+
79
+ url = f"https://api.crossref.org/works/{doi}/transform"
80
+ headers = {
81
+ "Accept": f"text/x-bibliography; style={csl_style}; locale={locale}",
82
+ "User-Agent": "labmate-mcp/7.0.0 (mailto:labmate@scholarly.dev)",
83
+ }
84
+
85
+ try:
86
+ async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
87
+ resp = await client.get(url, headers=headers)
88
+ if resp.status_code == 200:
89
+ citation = resp.text.strip()
90
+ return {
91
+ "doi": doi,
92
+ "style": style,
93
+ "csl_style": csl_style,
94
+ "citation": citation,
95
+ }
96
+ elif resp.status_code == 404:
97
+ return {"error": f"DOI not found: {doi}"}
98
+ else:
99
+ return {"error": f"Crossref returned {resp.status_code}", "doi": doi}
100
+ except Exception as e:
101
+ return {"error": f"Network error: {e}", "doi": doi}
102
+
103
+
104
+ async def build_bibliography(
105
+ dois: list[str],
106
+ style: str = "acs",
107
+ numbered: bool = True,
108
+ locale: str = "en-US",
109
+ ) -> dict:
110
+ """
111
+ Build a formatted bibliography from a list of DOIs.
112
+
113
+ Returns dict with 'bibliography' (formatted string) and 'entries' (individual citations).
114
+ """
115
+ entries = []
116
+ errors = []
117
+
118
+ for i, doi in enumerate(dois[:100]): # cap at 100
119
+ result = await format_citation(doi, style=style, locale=locale)
120
+ if "error" in result:
121
+ errors.append({"doi": doi, "error": result["error"]})
122
+ else:
123
+ entries.append({
124
+ "index": i + 1,
125
+ "doi": doi,
126
+ "citation": result["citation"],
127
+ })
128
+
129
+ # Build formatted bibliography
130
+ lines = []
131
+ for entry in entries:
132
+ if numbered:
133
+ lines.append(f"({entry['index']}) {entry['citation']}")
134
+ else:
135
+ lines.append(entry["citation"])
136
+
137
+ return {
138
+ "style": style,
139
+ "num_entries": len(entries),
140
+ "num_errors": len(errors),
141
+ "bibliography": "\n\n".join(lines),
142
+ "entries": entries,
143
+ "errors": errors if errors else None,
144
+ }
145
+
146
+
147
+ # =============================================================================
148
+ # IUPAC name ↔ SMILES via PubChem PUG-REST
149
+ # =============================================================================
150
+
151
+ PUBCHEM_REST = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
152
+
153
+
154
+ async def iupac_from_smiles(smiles: str) -> dict:
155
+ """Look up IUPAC name from a SMILES string via PubChem."""
156
+ url = f"{PUBCHEM_REST}/compound/smiles/property/IUPACName,MolecularFormula,MolecularWeight,IsomericSMILES/JSON"
157
+ params = {"smiles": smiles}
158
+ try:
159
+ async with httpx.AsyncClient(timeout=15) as client:
160
+ resp = await client.get(url, params=params)
161
+ if resp.status_code == 200:
162
+ data = resp.json()
163
+ props = data.get("PropertyTable", {}).get("Properties", [{}])[0]
164
+ return {
165
+ "input_smiles": smiles,
166
+ "iupac_name": props.get("IUPACName"),
167
+ "formula": props.get("MolecularFormula"),
168
+ "molecular_weight": props.get("MolecularWeight"),
169
+ "canonical_smiles": props.get("IsomericSMILES"),
170
+ }
171
+ else:
172
+ return {"error": f"PubChem returned {resp.status_code}. Compound may not be in database.", "smiles": smiles}
173
+ except Exception as e:
174
+ return {"error": str(e), "smiles": smiles}
175
+
176
+
177
+ async def smiles_from_name(name: str) -> dict:
178
+ """Look up SMILES and properties from a chemical name via PubChem."""
179
+ import urllib.parse
180
+ encoded = urllib.parse.quote(name, safe="")
181
+ url = f"{PUBCHEM_REST}/compound/name/{encoded}/property/IsomericSMILES,CanonicalSMILES,IUPACName,MolecularFormula,MolecularWeight,InChI,InChIKey/JSON"
182
+ try:
183
+ async with httpx.AsyncClient(timeout=15) as client:
184
+ resp = await client.get(url)
185
+ if resp.status_code == 200:
186
+ data = resp.json()
187
+ props = data.get("PropertyTable", {}).get("Properties", [{}])[0]
188
+ return {
189
+ "input_name": name,
190
+ "isomeric_smiles": props.get("IsomericSMILES"),
191
+ "canonical_smiles": props.get("CanonicalSMILES"),
192
+ "iupac_name": props.get("IUPACName"),
193
+ "formula": props.get("MolecularFormula"),
194
+ "molecular_weight": props.get("MolecularWeight"),
195
+ "inchi": props.get("InChI"),
196
+ "inchi_key": props.get("InChIKey"),
197
+ }
198
+ elif resp.status_code == 404:
199
+ return {"error": f"Compound not found: '{name}'"}
200
+ else:
201
+ return {"error": f"PubChem returned {resp.status_code}", "name": name}
202
+ except Exception as e:
203
+ return {"error": str(e), "name": name}
204
+
205
+
206
+ # =============================================================================
207
+ # Molecular formula formatting
208
+ # =============================================================================
209
+
210
+
211
+ def format_molecular_formula(
212
+ formula: str,
213
+ output_format: str = "unicode",
214
+ ) -> dict:
215
+ """
216
+ Format a molecular formula with proper subscripts.
217
+
218
+ Args:
219
+ formula: e.g., "C9H8O4", "Ca(OH)2", "Fe2O3"
220
+ output_format: "unicode", "latex", "html", or "plain"
221
+
222
+ Returns dict with formatted string.
223
+ """
224
+ f = formula.strip()
225
+
226
+ if output_format == "latex":
227
+ # C9H8O4 → C$_{9}$H$_{8}$O$_{4}$
228
+ out = re.sub(r"(\d+)", r"$_{\1}$", f)
229
+ # Also handle in \ce{} notation
230
+ ce = re.sub(r"(\d+)", r"_{\1}", f)
231
+ return {"formula": f, "latex_inline": out, "latex_ce": f"\\ce{{{ce}}}", "format": "latex"}
232
+
233
+ elif output_format == "html":
234
+ out = re.sub(r"(\d+)", r"<sub>\1</sub>", f)
235
+ return {"formula": f, "html": out, "format": "html"}
236
+
237
+ elif output_format == "unicode":
238
+ subscript_map = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
239
+ out = ""
240
+ for ch in f:
241
+ if ch.isdigit():
242
+ out += ch.translate(subscript_map)
243
+ else:
244
+ out += ch
245
+ return {"formula": f, "unicode": out, "format": "unicode"}
246
+
247
+ else: # plain
248
+ return {"formula": f, "plain": f, "format": "plain"}
249
+
250
+
251
+ # =============================================================================
252
+ # Experimental section templates
253
+ # =============================================================================
254
+
255
+ EXPERIMENTAL_TEMPLATES: dict[str, dict] = {}
256
+
257
+
258
+ def _et(name, *, aliases=None, category="", template="", notes="", safety=""):
259
+ key = name.lower()
260
+ EXPERIMENTAL_TEMPLATES[key] = {
261
+ "name": name,
262
+ "aliases": aliases or [],
263
+ "category": category,
264
+ "template": template,
265
+ "notes": notes,
266
+ "safety": safety,
267
+ }
268
+ for a in (aliases or []):
269
+ EXPERIMENTAL_TEMPLATES[a.lower()] = EXPERIMENTAL_TEMPLATES[key]
270
+
271
+
272
+ # --- Cross-Coupling Reactions ---
273
+
274
+ _et("Suzuki Coupling",
275
+ aliases=["suzuki-miyaura", "suzuki"],
276
+ category="Cross-Coupling",
277
+ template="""**{product_name}**
278
+
279
+ A flame-dried round-bottom flask equipped with a magnetic stir bar was charged with {aryl_halide} ({aryl_halide_mass} mg, {aryl_halide_mmol} mmol, {aryl_halide_equiv} equiv), {boronic_acid} ({boronic_acid_mass} mg, {boronic_acid_mmol} mmol, {boronic_acid_equiv} equiv), Pd(PPh₃)₄ ({cat_mass} mg, {cat_mmol} mmol, {cat_mol_percent} mol%), and {base} ({base_mass} mg, {base_mmol} mmol, {base_equiv} equiv). The flask was evacuated and backfilled with N₂ (3×). {solvent} ({solvent_volume} mL) was added, and the reaction mixture was stirred at {temperature} °C for {time} h. The reaction was monitored by TLC ({tlc_system}). Upon completion, the mixture was cooled to room temperature, diluted with EtOAc ({workup_volume} mL), and washed with H₂O ({workup_volume} mL) and brine ({workup_volume} mL). The organic layer was dried over Na₂SO₄, filtered, and concentrated under reduced pressure. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound as a {product_appearance} ({product_mass} mg, {yield_percent}%).
280
+
281
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
282
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
283
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.
284
+ {mp_or_rotation}""",
285
+ notes="Replace placeholders with actual values. For Pd(dppf)Cl₂ catalyst, use degassed dioxane/H₂O (4:1).",
286
+ safety="Pd catalysts: avoid inhalation. Boronic acids: irritants. Use fume hood.")
287
+
288
+ _et("Sonogashira Coupling",
289
+ aliases=["sonogashira"],
290
+ category="Cross-Coupling",
291
+ template="""**{product_name}**
292
+
293
+ A Schlenk flask was charged with {aryl_halide} ({aryl_halide_mass} mg, {aryl_halide_mmol} mmol, {aryl_halide_equiv} equiv), PdCl₂(PPh₃)₂ ({pd_mass} mg, {pd_mmol} mmol, {pd_mol_percent} mol%), and CuI ({cui_mass} mg, {cui_mmol} mmol, {cui_mol_percent} mol%). The flask was evacuated and backfilled with N₂ (3×). Degassed {solvent} ({solvent_volume} mL) and {amine_base} ({amine_volume} mL) were added via syringe. {alkyne} ({alkyne_mass} mg, {alkyne_mmol} mmol, {alkyne_equiv} equiv) was then added dropwise. The mixture was stirred at {temperature} °C for {time} h. The reaction was filtered through Celite, washed with EtOAc, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%).
294
+
295
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
296
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
297
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
298
+ notes="Use degassed solvents. CuI amount typically 5-10 mol%. Et₃N or iPr₂NH as base.",
299
+ safety="CuI: toxic. Pd compounds: avoid inhalation.")
300
+
301
+ _et("Buchwald-Hartwig Amination",
302
+ aliases=["buchwald-hartwig", "c-n coupling"],
303
+ category="Cross-Coupling",
304
+ template="""**{product_name}**
305
+
306
+ An oven-dried Schlenk tube was charged with Pd₂(dba)₃ ({pd_mass} mg, {pd_mmol} mmol, {pd_mol_percent} mol%), {ligand} ({lig_mass} mg, {lig_mmol} mmol, {lig_equiv} equiv), and {base} ({base_mass} mg, {base_mmol} mmol, {base_equiv} equiv). The tube was evacuated and backfilled with N₂ (3×). {aryl_halide} ({aryl_halide_mass} mg, {aryl_halide_mmol} mmol, {aryl_halide_equiv} equiv), {amine} ({amine_mass} mg, {amine_mmol} mmol, {amine_equiv} equiv), and {solvent} ({solvent_volume} mL) were added. The reaction was heated to {temperature} °C and stirred for {time} h. After cooling, the mixture was diluted with EtOAc, filtered through Celite, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%).
307
+
308
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
309
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
310
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
311
+ notes="Common ligands: BINAP, XPhos, SPhos, RuPhos, DavePhos, BrettPhos. Base: NaOtBu or Cs₂CO₃.",
312
+ safety="Pd catalysts: avoid inhalation. Strong bases: moisture-sensitive. Use glovebox or Schlenk line.")
313
+
314
+ # --- Reductions ---
315
+
316
+ _et("Sodium Borohydride Reduction",
317
+ aliases=["nabh4 reduction", "nabh4", "borohydride reduction"],
318
+ category="Reduction",
319
+ template="""**{product_name}**
320
+
321
+ To a solution of {substrate} ({substrate_mass} mg, {substrate_mmol} mmol, {substrate_equiv} equiv) in {solvent} ({solvent_volume} mL) at {temperature} °C was added NaBH₄ ({nabh4_mass} mg, {nabh4_mmol} mmol, {nabh4_equiv} equiv) portionwise over {addition_time} min. The reaction was stirred at {temperature} °C for {time} h. The reaction was quenched by careful addition of saturated NH₄Cl solution ({quench_volume} mL) at 0 °C and extracted with {extraction_solvent} (3 × {extraction_volume} mL). The combined organic layers were washed with brine, dried over Na₂SO₄, filtered, and concentrated under reduced pressure. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound as a {product_appearance} ({product_mass} mg, {yield_percent}%).
322
+
323
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
324
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
325
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
326
+ notes="Quench carefully — H₂ evolution. For ketone selectivity over ester, use MeOH at −78 °C. CeCl₃ for Luche conditions.",
327
+ safety="NaBH₄: flammable solid, water-reactive. Quench generates H₂ — open vessel, no sparks.")
328
+
329
+ _et("LiAlH4 Reduction",
330
+ aliases=["lah reduction", "lialh4", "lithium aluminium hydride reduction"],
331
+ category="Reduction",
332
+ template="""**{product_name}**
333
+
334
+ To a suspension of LiAlH₄ ({lah_mass} mg, {lah_mmol} mmol, {lah_equiv} equiv) in dry {solvent} ({solvent_volume} mL) at 0 °C under N₂ was added a solution of {substrate} ({substrate_mass} mg, {substrate_mmol} mmol, {substrate_equiv} equiv) in dry {solvent} ({substrate_solvent_volume} mL) dropwise via addition funnel over {addition_time} min. The reaction was allowed to warm to {temperature} °C and stirred for {time} h. The reaction was cooled to 0 °C and carefully quenched by sequential addition of H₂O ({fieser_water} mL), 15% NaOH ({fieser_naoh} mL), and H₂O ({fieser_water2} mL) [Fieser workup]. The resulting white precipitate was filtered through Celite and washed with {solvent}. The filtrate was dried over Na₂SO₄, filtered, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%).
335
+
336
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
337
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
338
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
339
+ notes="Fieser workup: for each gram of LiAlH₄, add x mL H₂O, x mL 15% NaOH, 3x mL H₂O. Alternative: Rochelle's salt (Na/K tartrate) workup.",
340
+ safety="LiAlH₄: pyrophoric, water-reactive. Anhydrous conditions mandatory. Quench under N₂ at 0 °C. Fire extinguisher on hand.")
341
+
342
+ _et("Catalytic Hydrogenation",
343
+ aliases=["hydrogenation", "pd/c hydrogenation", "h2 reduction"],
344
+ category="Reduction",
345
+ template="""**{product_name}**
346
+
347
+ A round-bottom flask was charged with {substrate} ({substrate_mass} mg, {substrate_mmol} mmol), Pd/C (10 wt%, {cat_mass} mg, {cat_loading} mol% Pd), and {solvent} ({solvent_volume} mL). The flask was evacuated and backfilled with H₂ (balloon, 3×). The reaction was stirred at room temperature under H₂ atmosphere (1 atm, balloon) for {time} h. The reaction was filtered through a pad of Celite, washed with {solvent} ({wash_volume} mL), and concentrated under reduced pressure to afford the title compound as a {product_appearance} ({product_mass} mg, {yield_percent}%).
348
+
349
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
350
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
351
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
352
+ notes="Pd/C: 10% w/w typical, can use 5-20%. Solvents: MeOH, EtOAc, EtOH. For debenzylation, use same conditions. Pd(OH)₂/C (Pearlman's catalyst) for stubborn substrates.",
353
+ safety="Pd/C: pyrophoric when dry. Always add solvent first. Never expose dry Pd/C to H₂. Filter away from open flames.")
354
+
355
+ # --- Oxidations ---
356
+
357
+ _et("Dess-Martin Oxidation",
358
+ aliases=["dmp oxidation", "dess-martin"],
359
+ category="Oxidation",
360
+ template="""**{product_name}**
361
+
362
+ To a solution of {substrate} ({substrate_mass} mg, {substrate_mmol} mmol, 1.0 equiv) in CH₂Cl₂ ({solvent_volume} mL) at room temperature was added Dess-Martin periodinane ({dmp_mass} mg, {dmp_mmol} mmol, {dmp_equiv} equiv). The reaction was stirred at room temperature for {time} h, then quenched by addition of a 1:1 mixture of saturated NaHCO₃ and saturated Na₂S₂O₃ ({quench_volume} mL). The layers were separated, and the aqueous layer was extracted with CH₂Cl₂ (3 × {extraction_volume} mL). The combined organic layers were washed with brine, dried over Na₂SO₄, filtered, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%).
363
+
364
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
365
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
366
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
367
+ notes="1.1-1.5 equiv DMP. Na₂S₂O₃ reduces excess periodinane and iodinane byproducts. No over-oxidation to carboxylic acid.",
368
+ safety="DMP: oxidizer, shock-sensitive when dry. Store cold. Waste: treat with Na₂S₂O₃ before disposal.")
369
+
370
+ _et("Swern Oxidation",
371
+ aliases=["swern"],
372
+ category="Oxidation",
373
+ template="""**{product_name}**
374
+
375
+ To a solution of oxalyl chloride ({oxchloride_volume} μL, {oxchloride_mmol} mmol, {oxchloride_equiv} equiv) in CH₂Cl₂ ({solvent_volume} mL) at −78 °C was added DMSO ({dmso_volume} μL, {dmso_mmol} mmol, {dmso_equiv} equiv) dropwise. The mixture was stirred for 15 min, then a solution of {substrate} ({substrate_mass} mg, {substrate_mmol} mmol, 1.0 equiv) in CH₂Cl₂ ({substrate_solvent_volume} mL) was added dropwise. The reaction was stirred at −78 °C for {time} min, then Et₃N ({et3n_volume} mL, {et3n_mmol} mmol, {et3n_equiv} equiv) was added. The reaction was allowed to warm to room temperature over 1 h, then diluted with CH₂Cl₂ ({dilute_volume} mL) and washed with H₂O, 1 M HCl, saturated NaHCO₃, and brine. The organic layer was dried over Na₂SO₄, filtered, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%).
376
+
377
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
378
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
379
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
380
+ notes="Order of addition critical: (COCl)₂ first, then DMSO, then substrate, then Et₃N. Keep at −78 °C until Et₃N added.",
381
+ safety="Oxalyl chloride: highly toxic, corrosive, lachrymator. DMSO/oxalyl chloride: exothermic at >−60 °C. Use dry ice/acetone bath. Fume hood mandatory.")
382
+
383
+ # --- Amide Coupling ---
384
+
385
+ _et("Amide Coupling",
386
+ aliases=["peptide coupling", "hatu coupling", "edc coupling", "amidation"],
387
+ category="Amide Bond Formation",
388
+ template="""**{product_name}**
389
+
390
+ To a solution of {acid} ({acid_mass} mg, {acid_mmol} mmol, {acid_equiv} equiv) in {solvent} ({solvent_volume} mL) at {temperature} °C was added {coupling_reagent} ({coupling_mass} mg, {coupling_mmol} mmol, {coupling_equiv} equiv) and {base} ({base_volume} μL, {base_mmol} mmol, {base_equiv} equiv). The mixture was stirred for {preactivation_time} min, then {amine} ({amine_mass} mg, {amine_mmol} mmol, {amine_equiv} equiv) was added. The reaction was stirred at room temperature for {time} h. The reaction was diluted with EtOAc ({dilute_volume} mL) and washed sequentially with 1 M HCl, saturated NaHCO₃, and brine. The organic layer was dried over Na₂SO₄, filtered, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%).
391
+
392
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
393
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
394
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
395
+ notes="Coupling reagents: HATU > HBTU > TBTU > EDC·HCl/HOBt. Bases: DIPEA (iPr₂NEt), NMM, Et₃N. Solvents: DMF, CH₂Cl₂, or DMF/CH₂Cl₂ mixtures. Pre-activation with coupling reagent + base for 5-15 min improves yields.",
396
+ safety="HATU/HBTU: skin sensitizers, irritants. DMF: reproductive toxicant — avoid skin contact.")
397
+
398
+ # --- SPPS ---
399
+
400
+ _et("Solid-Phase Peptide Synthesis",
401
+ aliases=["spps", "fmoc spps", "peptide synthesis"],
402
+ category="Peptide Chemistry",
403
+ template="""**Solid-Phase Peptide Synthesis of {peptide_name}**
404
+
405
+ Peptide synthesis was performed manually on a {resin_type} resin (loading: {resin_loading} mmol/g, {resin_mass} mg, {scale} mmol scale) using standard Fmoc/tBu chemistry.
406
+
407
+ *Fmoc Deprotection*: The resin was treated with 20% piperidine in DMF (2 × {deprot_volume} mL, 5 + 15 min) and washed with DMF (5 × {wash_volume} mL).
408
+
409
+ *Amino Acid Coupling*: Fmoc-{aa_name}-OH ({aa_equiv} equiv), {coupling_reagent} ({coupling_equiv} equiv), and {base} ({base_equiv} equiv) were dissolved in DMF ({coupling_volume} mL) and added to the resin. The mixture was agitated for {coupling_time} min at room temperature. Coupling completion was monitored by the Kaiser (ninhydrin) test. If positive, the coupling was repeated.
410
+
411
+ *Wash Cycles*: Between each step: DMF (5×), CH₂Cl₂ (3×), DMF (3×).
412
+
413
+ *Cleavage*: The peptide was cleaved from the resin using TFA/{scavenger_cocktail} ({cleavage_ratio}, {cleavage_volume} mL) for {cleavage_time} h at room temperature. The resin was filtered off, and the filtrate was concentrated under N₂ flow. The crude peptide was precipitated with cold diethyl ether ({ether_volume} mL), centrifuged (4000 rpm, 5 min), and the pellet was washed with cold ether (2×). The crude peptide was dissolved in {dissolve_solvent} and lyophilized to afford a {product_appearance} ({crude_mass} mg).
414
+
415
+ *Purification*: The crude peptide was purified by preparative RP-HPLC (Column: {hplc_column}; gradient: {hplc_gradient}; flow rate: {hplc_flow} mL/min; detection: λ = 220/280 nm). Pure fractions were pooled and lyophilized to afford the title peptide as a {pure_appearance} ({pure_mass} mg, {yield_percent}% overall).
416
+
417
+ Analytical HPLC: tR = {hplc_rt} min (purity: {hplc_purity}%, {hplc_conditions}).
418
+ MALDI-TOF MS: m/z calcd for {ms_formula} [M+H]⁺ {ms_calcd}, found {ms_found}.
419
+ {additional_characterization}""",
420
+ notes="Scavenger cocktails: TFA/TIS/H₂O (95:2.5:2.5) standard. For Cys-containing: TFA/TIS/H₂O/EDT (94:1:2.5:2.5). For Met/Trp-containing: add EDT or DODT. Kaiser test: ninhydrin/pyridine/KCN; blue = free amine (incomplete coupling), yellow = complete.",
421
+ safety="TFA: highly corrosive, strong acid. Piperidine: highly toxic, flammable. EDT: foul-smelling thiol. Use fume hood for all operations.")
422
+
423
+ # --- Click Chemistry ---
424
+
425
+ _et("CuAAC Click Reaction",
426
+ aliases=["click chemistry", "copper click", "cuaac", "azide-alkyne cycloaddition"],
427
+ category="Cycloaddition",
428
+ template="""**{product_name}**
429
+
430
+ To a solution of {azide} ({azide_mass} mg, {azide_mmol} mmol, {azide_equiv} equiv) and {alkyne} ({alkyne_mass} mg, {alkyne_mmol} mmol, {alkyne_equiv} equiv) in {solvent} ({solvent_volume} mL) was added CuSO₄·5H₂O ({cuso4_mass} mg, {cuso4_mmol} mmol, {cu_mol_percent} mol%) and sodium ascorbate ({asc_mass} mg, {asc_mmol} mmol, {asc_equiv} equiv). The mixture was stirred at room temperature for {time} h. The reaction was diluted with EtOAc ({dilute_volume} mL), washed with saturated NH₄Cl ({wash_volume} mL), H₂O, and brine. The organic layer was dried over Na₂SO₄, filtered, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the 1,4-disubstituted 1,2,3-triazole product ({product_mass} mg, {yield_percent}%).
431
+
432
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
433
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
434
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
435
+ notes="CuSO₄/sodium ascorbate (in situ Cu(I)). Typically 5-10 mol% Cu. Solvents: t-BuOH/H₂O (1:1), DMF/H₂O, DMSO. TBTA ligand accelerates. Exclusively 1,4-regioisomer.",
436
+ safety="Organic azides: potentially explosive — never isolate low-MW azides. CuSO₄: irritant. Ascorbate: safe.")
437
+
438
+ # --- Protection/Deprotection ---
439
+
440
+ _et("Boc Deprotection",
441
+ aliases=["boc removal", "tfa deprotection", "boc cleavage"],
442
+ category="Protection/Deprotection",
443
+ template="""**{product_name}**
444
+
445
+ To a solution of {substrate} ({substrate_mass} mg, {substrate_mmol} mmol) in CH₂Cl₂ ({solvent_volume} mL) at 0 °C was added TFA ({tfa_volume} mL, {tfa_equiv} equiv). The reaction was stirred at room temperature for {time} h. The reaction was concentrated under reduced pressure. The residue was azeotroped with toluene (3×) to remove residual TFA, affording the TFA salt of the title compound as a {product_appearance} ({product_mass} mg, {yield_percent}%).
446
+
447
+ {characterization}""",
448
+ notes="Typical: 25-50% TFA in CH₂Cl₂ (v/v). For acid-sensitive substrates: 4 M HCl in dioxane or TMSOTf/2,6-lutidine. Free-base by dissolving in CH₂Cl₂ and washing with saturated NaHCO₃.",
449
+ safety="TFA: highly corrosive. Use fume hood. Neutralize waste with NaHCO₃ before disposal.")
450
+
451
+ _et("TBS Protection",
452
+ aliases=["tbs silylation", "tbdms protection", "silyl protection"],
453
+ category="Protection/Deprotection",
454
+ template="""**{product_name}**
455
+
456
+ To a solution of {substrate} ({substrate_mass} mg, {substrate_mmol} mmol, 1.0 equiv) and imidazole ({imid_mass} mg, {imid_mmol} mmol, {imid_equiv} equiv) in {solvent} ({solvent_volume} mL) at {temperature} °C was added TBSCl ({tbs_mass} mg, {tbs_mmol} mmol, {tbs_equiv} equiv). The reaction was stirred at room temperature for {time} h. The reaction was quenched with H₂O ({quench_volume} mL) and extracted with {extraction_solvent} (3 × {extraction_volume} mL). The combined organic layers were washed with brine, dried over Na₂SO₄, filtered, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%).
457
+
458
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
459
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
460
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
461
+ notes="TBSCl/imidazole in DMF for 1° alcohols. For 2°: TBSOTf/2,6-lutidine in CH₂Cl₂ at −78°C to 0°C. DMAP catalytic amount can accelerate.",
462
+ safety="TBSCl: corrosive. Imidazole: irritant.")
463
+
464
+ _et("TBAF Desilylation",
465
+ aliases=["tbaf deprotection", "tbs removal", "silyl removal"],
466
+ category="Protection/Deprotection",
467
+ template="""**{product_name}**
468
+
469
+ To a solution of {substrate} ({substrate_mass} mg, {substrate_mmol} mmol, 1.0 equiv) in THF ({solvent_volume} mL) at {temperature} °C was added TBAF (1.0 M in THF, {tbaf_volume} mL, {tbaf_mmol} mmol, {tbaf_equiv} equiv). The reaction was stirred at room temperature for {time} h, then quenched with saturated NH₄Cl ({quench_volume} mL). The mixture was extracted with EtOAc (3 × {extraction_volume} mL), washed with brine, dried over Na₂SO₄, filtered, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%).
470
+
471
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
472
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
473
+ notes="1.0-2.0 equiv TBAF. Add AcOH to buffer if substrate is base-sensitive. Alternative: HF·pyridine (Olah's reagent) for hindered silyl ethers.",
474
+ safety="TBAF: corrosive, hygroscopic. HF·pyridine: extremely dangerous — HF burns are life-threatening.")
475
+
476
+ # --- Recrystallization & Purification ---
477
+
478
+ _et("Recrystallization",
479
+ aliases=["recrystallization", "crystallization"],
480
+ category="Purification",
481
+ template="""**Recrystallization of {compound_name}**
482
+
483
+ Crude {compound_name} ({crude_mass} mg) was dissolved in minimum hot {solvent} (~{dissolve_temp} °C, {solvent_volume} mL). The solution was allowed to cool slowly to room temperature, then placed at {cool_temp} °C for {cool_time} h. The resulting crystals were collected by vacuum filtration, washed with cold {wash_solvent} ({wash_volume} mL), and dried under vacuum to afford pure {compound_name} as {crystal_appearance} ({product_mass} mg, {recovery_percent}% recovery).
484
+
485
+ mp: {melting_point} °C.
486
+ {additional_characterization}""",
487
+ notes="Solvent pairs: EtOAc/hexanes, CH₂Cl₂/hexanes, MeOH/H₂O, acetone/hexanes, EtOH/H₂O. Seed crystals improve nucleation. Slow cooling = larger, purer crystals.",
488
+ safety="Use appropriate solvent safety precautions. Hot solvent: burn risk.")
489
+
490
+ # --- General Procedures ---
491
+
492
+ _et("Wittig Reaction",
493
+ aliases=["wittig olefination", "wittig"],
494
+ category="C-C Bond Formation",
495
+ template="""**{product_name}**
496
+
497
+ To a suspension of {phosphonium_salt} ({salt_mass} mg, {salt_mmol} mmol, {salt_equiv} equiv) in THF ({solvent_volume} mL) at {base_temp} °C was added {base} ({base_volume} mL/mg, {base_mmol} mmol, {base_equiv} equiv) dropwise. The resulting orange/red ylide solution was stirred for {ylide_time} min. A solution of {aldehyde} ({aldehyde_mass} mg, {aldehyde_mmol} mmol, 1.0 equiv) in THF ({aldehyde_solvent_volume} mL) was added dropwise at {addition_temp} °C. The reaction was stirred at {reaction_temp} °C for {time} h. The reaction was quenched with saturated NH₄Cl, extracted with EtOAc (3×), washed with brine, dried over Na₂SO₄, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%, E:Z = {ez_ratio}).
498
+
499
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
500
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
501
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
502
+ notes="Non-stabilized ylides: preferentially Z-alkene. Stabilized ylides: preferentially E-alkene. Bases: n-BuLi, NaHMDS, KHMDS. For E-selective olefination, use HWE (Horner-Wadsworth-Emmons) instead.",
503
+ safety="n-BuLi: pyrophoric. Use syringe techniques under inert atmosphere.")
504
+
505
+ _et("Grignard Reaction",
506
+ aliases=["grignard addition", "grignard"],
507
+ category="C-C Bond Formation",
508
+ template="""**{product_name}**
509
+
510
+ Mg turnings ({mg_mass} mg, {mg_mmol} mmol, {mg_equiv} equiv) were flame-dried under vacuum in a round-bottom flask. Dry THF ({thf_volume} mL) and a crystal of I₂ were added. {halide} ({halide_volume} μL, {halide_mmol} mmol, {halide_equiv} equiv) in THF ({halide_thf_volume} mL) was added dropwise at a rate to maintain gentle reflux. After addition, the mixture was refluxed for {grignard_time} h to ensure complete consumption of Mg. The Grignard reagent was cooled to {addition_temp} °C, and a solution of {electrophile} ({electrophile_mass} mg, {electrophile_mmol} mmol, 1.0 equiv) in THF ({electrophile_thf_volume} mL) was added dropwise. The reaction was stirred at {reaction_temp} °C for {time} h, then quenched with saturated NH₄Cl at 0 °C. The mixture was extracted with EtOAc (3×), washed with brine, dried over Na₂SO₄, and concentrated. Purification by column chromatography (SiO₂, {column_eluent}) afforded the title compound ({product_mass} mg, {yield_percent}%).
511
+
512
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
513
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
514
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
515
+ notes="Anhydrous conditions essential. Activation: I₂, 1,2-dibromoethane, or DIBAL-H. Solvents: THF or Et₂O. Titrate Grignard with menthol/1,10-phenanthroline before use.",
516
+ safety="Grignard formation is exothermic — control addition rate. Mg turnings: flammable. Anhydrous ether/THF: fire hazard.")
517
+
518
+ _et("Mitsunobu Reaction",
519
+ aliases=["mitsunobu"],
520
+ category="Substitution",
521
+ template="""**{product_name}**
522
+
523
+ To a solution of {alcohol} ({alcohol_mass} mg, {alcohol_mmol} mmol, 1.0 equiv), {nucleophile} ({nuc_mass} mg, {nuc_mmol} mmol, {nuc_equiv} equiv), and PPh₃ ({pph3_mass} mg, {pph3_mmol} mmol, {pph3_equiv} equiv) in THF ({solvent_volume} mL) at 0 °C was added DIAD ({diad_volume} μL, {diad_mmol} mmol, {diad_equiv} equiv) dropwise. The reaction was allowed to warm to room temperature and stirred for {time} h. The reaction was concentrated and purified directly by column chromatography (SiO₂, {column_eluent}) to afford the title compound ({product_mass} mg, {yield_percent}%).
524
+
525
+ ¹H NMR ({nmr_freq} MHz, {nmr_solvent}): δ {nmr_data_1h}
526
+ ¹³C NMR ({nmr_freq_13c} MHz, {nmr_solvent}): δ {nmr_data_13c}
527
+ HRMS ({ms_method}): m/z calcd for {ms_formula} [M+{ms_ion}]⁺ {ms_calcd}, found {ms_found}.""",
528
+ notes="Nucleophile pKₐ < 11 required. DIAD or DEAD as azodicarboxylate. Inversion of configuration at stereocenter. TPPO byproduct can be problematic — use polymer-bound PPh₃ or Fluorous-PPh₃ for easier removal.",
529
+ safety="DIAD/DEAD: toxic, eye/skin irritant. PPh₃: sensitizer.")
530
+
531
+
532
+ def lookup_experimental_template(query: str) -> dict | list[dict] | None:
533
+ """
534
+ Look up an experimental section template by reaction name.
535
+
536
+ Returns template dict or list of partial matches.
537
+ """
538
+ q = query.strip().lower()
539
+ if q in EXPERIMENTAL_TEMPLATES:
540
+ return EXPERIMENTAL_TEMPLATES[q]
541
+
542
+ # Fuzzy search
543
+ matches = []
544
+ for key, tmpl in EXPERIMENTAL_TEMPLATES.items():
545
+ if q in key or q in tmpl.get("category", "").lower():
546
+ if tmpl not in matches:
547
+ matches.append(tmpl)
548
+ return matches if matches else None
549
+
550
+
551
+ # =============================================================================
552
+ # Journal formatting guides
553
+ # =============================================================================
554
+
555
+ JOURNAL_GUIDES: dict[str, dict] = {}
556
+
557
+
558
+ def _jg(name, *, aliases=None, publisher="", issn="", citation_style="",
559
+ word_limit="", abstract_limit="", figure_limit="",
560
+ reference_format="", si_allowed=True, graphical_abstract="",
561
+ submission_url="", open_access="", apc="", scope="",
562
+ file_formats="", special_notes=""):
563
+ key = name.lower()
564
+ JOURNAL_GUIDES[key] = {
565
+ "name": name,
566
+ "aliases": aliases or [],
567
+ "publisher": publisher,
568
+ "issn": issn,
569
+ "citation_style": citation_style,
570
+ "word_limit": word_limit,
571
+ "abstract_limit": abstract_limit,
572
+ "figure_limit": figure_limit,
573
+ "reference_format": reference_format,
574
+ "si_allowed": si_allowed,
575
+ "graphical_abstract": graphical_abstract,
576
+ "submission_url": submission_url,
577
+ "open_access": open_access,
578
+ "apc": apc,
579
+ "scope": scope,
580
+ "file_formats": file_formats,
581
+ "special_notes": special_notes,
582
+ }
583
+ for a in (aliases or []):
584
+ JOURNAL_GUIDES[a.lower()] = JOURNAL_GUIDES[key]
585
+
586
+
587
+ _jg("Journal of the American Chemical Society",
588
+ aliases=["jacs", "j. am. chem. soc."],
589
+ publisher="ACS", issn="0002-7863",
590
+ citation_style="ACS (superscript numbered)",
591
+ word_limit="Articles: no strict limit (typically 8000-12000 words). Communications: 4 journal pages (~3500 words + figures).",
592
+ abstract_limit="200 words max",
593
+ figure_limit="No strict limit for articles. Communications: typically 3-4 figures.",
594
+ reference_format="ACS style: superscript numbers, numbered in order of appearance. Format: Author(s). Title. J. Abbrev. Year, Vol, Pages. DOI.",
595
+ graphical_abstract="Required. 3.25 × 1.75 inches, max 600 dpi.",
596
+ submission_url="https://pubs.acs.org/journal/jacsat",
597
+ open_access="Hybrid. ACS AuthorChoice (CC-BY or CC-BY-NC-ND).",
598
+ apc="~$5000 (ACS AuthorChoice)",
599
+ scope="All areas of chemistry — emphasis on significance and broad interest.",
600
+ file_formats="Manuscript: Word or LaTeX. Figures: TIFF, EPS, PDF (300+ dpi). SI: single PDF.",
601
+ special_notes="TOC graphic required. ORCID mandatory for corresponding author. Data availability statement required.")
602
+
603
+ _jg("Angewandte Chemie International Edition",
604
+ aliases=["angew", "angew. chem. int. ed.", "angewandte"],
605
+ publisher="Wiley-VCH", issn="1433-7851",
606
+ citation_style="Angew style: superscript numbered, numbered in order of citation.",
607
+ word_limit="Communications: 4 pages (~2500 words + refs). Reviews: by invitation.",
608
+ abstract_limit="200 words max",
609
+ figure_limit="Communications: 3-5 figures/schemes/tables combined.",
610
+ reference_format="Angew format: [N] a) Author, Journal Year, Vol, Pages; b) ... Multiple sub-references with a), b), c).",
611
+ graphical_abstract="Table of Contents graphic required. Max 5 × 5 cm.",
612
+ submission_url="https://onlinelibrary.wiley.com/journal/15213773",
613
+ open_access="Hybrid. OnlineOpen (CC-BY, CC-BY-NC).",
614
+ apc="~€4500",
615
+ scope="All areas — must be of very high importance and broad interest.",
616
+ file_formats="Word preferred. Figures: TIFF, EPS (300+ dpi).",
617
+ special_notes="Very selective (~15% acceptance). Highlight (5 keywords) and catch-phrase for TOC required.")
618
+
619
+ _jg("Nature Chemistry",
620
+ aliases=["nat. chem.", "nature chem"],
621
+ publisher="Nature Publishing Group / Springer Nature", issn="1755-4330",
622
+ citation_style="Nature style: superscript numbered, numbered in order of citation.",
623
+ word_limit="Articles: ~3000 words (main text, excluding methods, refs, figure legends). Letters: ~1500 words.",
624
+ abstract_limit="150 words max (no references in abstract)",
625
+ figure_limit="Articles: up to 6 display items (figures/tables). Letters: up to 4.",
626
+ reference_format="Nature style: Author(s). Title. Journal Abbrev. Vol, Pages (Year). Max 30 refs for Letters, 50 for Articles.",
627
+ graphical_abstract="Not required (Nature uses own design for TOC).",
628
+ submission_url="https://www.nature.com/nchem/",
629
+ open_access="Hybrid. Gold OA available.",
630
+ apc="~€9500 (Gold OA)",
631
+ scope="All areas of chemistry — emphasis on novelty and broad significance.",
632
+ file_formats="Word or LaTeX. Figures: separate files, 300+ dpi.",
633
+ special_notes="Extremely selective. Methods section separate from main text. Extended Data figures allowed (8 max). Cover letter essential.")
634
+
635
+ _jg("Chemical Science",
636
+ aliases=["chem. sci.", "chem sci"],
637
+ publisher="Royal Society of Chemistry (RSC)", issn="2041-6520",
638
+ citation_style="RSC style: superscript numbered.",
639
+ word_limit="Edge articles: ~5 journal pages. Full articles: ~8 journal pages.",
640
+ abstract_limit="200 words max",
641
+ figure_limit="Reasonable (typically 4-8 figures).",
642
+ reference_format="RSC format: N. Author, J. Abbrev., Year, Vol, Pages. ESI references numbered separately.",
643
+ graphical_abstract="Table of Contents entry: max 8 × 4 cm.",
644
+ submission_url="https://www.rsc.org/journals-books-databases/about-journals/chemical-science/",
645
+ open_access="Full Gold OA (CC-BY). No APC — funded by RSC.",
646
+ apc="Free (no APC)",
647
+ scope="Chemistry of exceptional significance. All areas.",
648
+ file_formats="Word or LaTeX. Figures: TIFF, EPS.",
649
+ special_notes="Flagship RSC journal. Free open access (unique!). ESI (Electronic Supplementary Information) strongly encouraged.")
650
+
651
+ _jg("Organic Letters",
652
+ aliases=["org. lett.", "orglett"],
653
+ publisher="ACS", issn="1523-7060",
654
+ citation_style="ACS style: superscript numbered.",
655
+ word_limit="Communications only: max 4 printed pages (~2500 words).",
656
+ abstract_limit="150 words max",
657
+ figure_limit="Typically 2-4 schemes + 1-2 figures/tables.",
658
+ reference_format="ACS style: superscript numbers.",
659
+ graphical_abstract="TOC graphic required.",
660
+ submission_url="https://pubs.acs.org/journal/orlef7",
661
+ open_access="Hybrid (ACS AuthorChoice).",
662
+ apc="~$5000",
663
+ scope="Organic and bioorganic chemistry — synthesis, mechanisms, theory, new reagents.",
664
+ file_formats="Word or LaTeX. SI: single PDF.",
665
+ special_notes="Short communications only. Highly competitive. Experimental + full characterization in SI.")
666
+
667
+ _jg("The Journal of Organic Chemistry",
668
+ aliases=["joc", "j. org. chem."],
669
+ publisher="ACS", issn="0022-3263",
670
+ citation_style="ACS style: superscript numbered.",
671
+ word_limit="Full articles: no strict limit (typically 6000-12000). Notes: 4 pages.",
672
+ abstract_limit="200 words max",
673
+ figure_limit="Reasonable for full articles.",
674
+ reference_format="ACS style: superscript numbers.",
675
+ graphical_abstract="TOC graphic required.",
676
+ submission_url="https://pubs.acs.org/journal/joceah",
677
+ open_access="Hybrid.",
678
+ apc="~$5000",
679
+ scope="Organic chemistry: synthesis, mechanisms, theory, natural products, methodology.",
680
+ file_formats="Word or LaTeX.",
681
+ special_notes="Full experimental details in main manuscript (not SI). Complete NMR data for all new compounds.")
682
+
683
+ _jg("ACS Catalysis",
684
+ aliases=["acs catal.", "acs catalysis"],
685
+ publisher="ACS", issn="2155-5435",
686
+ citation_style="ACS style.",
687
+ word_limit="Letters: 3000 words. Articles: no strict limit.",
688
+ abstract_limit="200 words",
689
+ figure_limit="Reasonable.",
690
+ reference_format="ACS style.",
691
+ graphical_abstract="TOC graphic required.",
692
+ submission_url="https://pubs.acs.org/journal/accacs",
693
+ open_access="Hybrid.",
694
+ apc="~$5000",
695
+ scope="Heterogeneous, homogeneous, bio-, and electro-catalysis. Mechanisms and applications.",
696
+ file_formats="Word or LaTeX.",
697
+ special_notes="Scope includes computational catalysis. Turnover numbers/frequencies expected.")
698
+
699
+ _jg("Journal of Medicinal Chemistry",
700
+ aliases=["jmc", "j. med. chem."],
701
+ publisher="ACS", issn="0022-2623",
702
+ citation_style="ACS style.",
703
+ word_limit="Articles: 7500-10000 words. Letters: 3000 words. Perspectives: 10000 words.",
704
+ abstract_limit="250 words",
705
+ figure_limit="Reasonable.",
706
+ reference_format="ACS style.",
707
+ graphical_abstract="TOC graphic required.",
708
+ submission_url="https://pubs.acs.org/journal/jmcmar",
709
+ open_access="Hybrid.",
710
+ apc="~$5000",
711
+ scope="Drug design, SAR, ADME, computational med chem, chemical biology.",
712
+ file_formats="Word or LaTeX.",
713
+ special_notes="Requires biological data + SAR analysis. SMILES strings for all compounds. Molecular formula strings for HRMS.")
714
+
715
+ _jg("Chemical Communications",
716
+ aliases=["chemcomm", "chem. commun.", "chem commun"],
717
+ publisher="RSC", issn="1359-7345",
718
+ citation_style="RSC style.",
719
+ word_limit="Communications: 3500 words max (including refs and captions).",
720
+ abstract_limit="No formal abstract — use first paragraph as summary.",
721
+ figure_limit="4 figures/tables max.",
722
+ reference_format="RSC format.",
723
+ graphical_abstract="TOC entry + graphical abstract preferred.",
724
+ submission_url="https://www.rsc.org/journals-books-databases/about-journals/chemcomm/",
725
+ open_access="Hybrid. Gold OA available.",
726
+ apc="~£2000",
727
+ scope="All areas of chemistry. Short, urgent communications.",
728
+ file_formats="Word. Figures: TIFF.",
729
+ special_notes="Very short format. All experimental details must go in ESI. Rapid publication.")
730
+
731
+ _jg("Dalton Transactions",
732
+ aliases=["dalton", "dalton trans."],
733
+ publisher="RSC", issn="1477-9226",
734
+ citation_style="RSC style.",
735
+ word_limit="Communications: 4 pages. Full papers: no strict limit.",
736
+ abstract_limit="200 words",
737
+ figure_limit="Reasonable.",
738
+ reference_format="RSC format.",
739
+ graphical_abstract="TOC graphic.",
740
+ submission_url="https://www.rsc.org/journals-books-databases/about-journals/dalton-transactions/",
741
+ open_access="Hybrid.",
742
+ apc="~£2000",
743
+ scope="Inorganic, organometallic, bioinorganic chemistry. Catalysis, materials.",
744
+ file_formats="Word.",
745
+ special_notes="CIF files required for crystal structures. CCDC deposition mandatory.")
746
+
747
+ _jg("Chemistry - A European Journal",
748
+ aliases=["chem. eur. j.", "cej", "chem eur j"],
749
+ publisher="Wiley-VCH", issn="0947-6539",
750
+ citation_style="Wiley (numbered).",
751
+ word_limit="Communications: 2500 words. Full papers: no strict limit.",
752
+ abstract_limit="200 words",
753
+ figure_limit="Reasonable.",
754
+ reference_format="Wiley format: numbered.",
755
+ graphical_abstract="Required.",
756
+ submission_url="https://chemistry-europe.onlinelibrary.wiley.com/journal/15213765",
757
+ open_access="Hybrid.",
758
+ apc="~€3500",
759
+ scope="All areas of chemistry.",
760
+ file_formats="Word.",
761
+ special_notes="Part of Chemistry Europe family of journals.")
762
+
763
+ _jg("ACS Nano",
764
+ aliases=["acs nano"],
765
+ publisher="ACS", issn="1936-0851",
766
+ citation_style="ACS style.",
767
+ word_limit="Articles: no strict limit. Letters: ~3000 words.",
768
+ abstract_limit="250 words",
769
+ figure_limit="Reasonable.",
770
+ reference_format="ACS style.",
771
+ graphical_abstract="TOC graphic required.",
772
+ submission_url="https://pubs.acs.org/journal/ancac3",
773
+ open_access="Hybrid.",
774
+ apc="~$5000",
775
+ scope="Nanoscience and nanotechnology — synthesis, assembly, properties, applications.",
776
+ file_formats="Word or LaTeX.",
777
+ special_notes="Strong emphasis on characterization (TEM, AFM, DLS, etc.).")
778
+
779
+
780
+ def lookup_journal_guide(query: str) -> dict | list[dict] | None:
781
+ """Look up journal formatting guide by name or alias."""
782
+ q = query.strip().lower()
783
+ if q in JOURNAL_GUIDES:
784
+ return JOURNAL_GUIDES[q]
785
+
786
+ matches = []
787
+ for key, guide in JOURNAL_GUIDES.items():
788
+ if q in key or q in guide.get("publisher", "").lower() or q in guide.get("scope", "").lower():
789
+ if guide not in matches:
790
+ matches.append(guide)
791
+ return matches if matches else None
792
+
793
+
794
+ # =============================================================================
795
+ # Supporting Information (SI) checklist
796
+ # =============================================================================
797
+
798
+ SI_REQUIREMENTS: dict[str, dict] = {
799
+ "1h_nmr": {
800
+ "name": "¹H NMR Spectrum",
801
+ "required_info": "Frequency (MHz), solvent, chemical shifts (δ in ppm), multiplicity (s, d, t, q, m, dd, dt, etc.), coupling constants (J in Hz), number of protons, assignment.",
802
+ "format": "δ X.XX (mult, J = X.X Hz, NH, assignment)",
803
+ "example": "δ 7.42 (dd, J = 8.2, 1.5 Hz, 2H, ArH), 3.85 (s, 3H, OCH₃)",
804
+ "common_mistakes": "Missing coupling constants. Wrong multiplicity for overlapping signals. Using 'br s' without explanation. Missing residual solvent peak assignment.",
805
+ "spectrum_required": True,
806
+ "checklist": [
807
+ "All peaks assigned",
808
+ "Coupling constants for non-singlets",
809
+ "Correct integration ratios",
810
+ "Residual solvent peak identified",
811
+ "Spectrum clean (no impurities >5%)",
812
+ "Baseline flat",
813
+ ],
814
+ },
815
+ "13c_nmr": {
816
+ "name": "¹³C NMR Spectrum",
817
+ "required_info": "Frequency (MHz), solvent, chemical shifts (δ in ppm). DEPT or HSQC data for multiplicity assignment recommended.",
818
+ "format": "δ X.X (Cₓ assignment, optional)",
819
+ "example": "δ 170.2 (C=O), 138.5 (C-Ar), 128.3 (CH-Ar), 55.2 (OCH₃)",
820
+ "common_mistakes": "Missing peaks (check molecule has correct number of unique carbons). Quaternary carbons sometimes weak/missing.",
821
+ "spectrum_required": True,
822
+ "checklist": [
823
+ "Number of peaks matches expected unique carbons",
824
+ "All peaks listed in text",
825
+ "Solvent peaks identified (CDCl₃: 77.16; DMSO-d₆: 39.52)",
826
+ "Spectrum shows adequate S/N for all peaks",
827
+ ],
828
+ },
829
+ "19f_nmr": {
830
+ "name": "¹⁹F NMR Spectrum",
831
+ "required_info": "Frequency (MHz), solvent, chemical shifts (δ in ppm referenced to CFCl₃ or internal standard), multiplicity, coupling constants.",
832
+ "format": "δ -X.X (mult, J = X.X Hz)",
833
+ "example": "δ −62.3 (s, 3F, CF₃), −110.5 (dd, J = 10.2, 8.5 Hz, 1F, ArF)",
834
+ "spectrum_required": True,
835
+ "checklist": ["Reference standard stated", "All F-containing groups assigned"],
836
+ },
837
+ "31p_nmr": {
838
+ "name": "³¹P NMR Spectrum",
839
+ "required_info": "Frequency (MHz), solvent, chemical shifts (δ referenced to H₃PO₄ = 0 ppm), multiplicity.",
840
+ "format": "δ X.X (mult)",
841
+ "example": "δ 26.5 (s)",
842
+ "spectrum_required": True,
843
+ "checklist": ["Reference standard stated", "Decoupled and/or coupled spectra"],
844
+ },
845
+ "2d_nmr": {
846
+ "name": "2D NMR (COSY, HSQC, HMBC, NOESY)",
847
+ "required_info": "Type of 2D experiment, frequency, solvent. Cross-peaks should be annotated.",
848
+ "format": "HSQC, HMBC, or NOESY cross-peaks listed or annotated on spectrum.",
849
+ "spectrum_required": True,
850
+ "checklist": [
851
+ "2D spectra included for structure elucidation of novel complex structures",
852
+ "Key cross-peaks labeled",
853
+ "Axis labels (¹H and ¹³C/¹H chemical shift axes)",
854
+ ],
855
+ },
856
+ "ir": {
857
+ "name": "Infrared Spectroscopy",
858
+ "required_info": "Method (KBr pellet, ATR, film), key absorptions in cm⁻¹ with assignment.",
859
+ "format": "IR (ATR): ν̃ = XXXX, XXXX, XXXX cm⁻¹",
860
+ "example": "IR (ATR): ν̃ = 3350 (br, O-H), 1720 (s, C=O), 1600, 1510 (Ar C=C) cm⁻¹",
861
+ "spectrum_required": True,
862
+ "checklist": ["Key functional group absorptions listed", "Method stated"],
863
+ },
864
+ "hrms": {
865
+ "name": "High-Resolution Mass Spectrometry",
866
+ "required_info": "Ionization method (ESI, EI, APCI, MALDI), ion type ([M+H]⁺, [M+Na]⁺, [M-H]⁻), calculated mass, found mass, molecular formula.",
867
+ "format": "HRMS (method): m/z calcd for C₁₂H₁₅NO₃ [M+ion]⁺ XXX.XXXX, found XXX.XXXX.",
868
+ "example": "HRMS (ESI): m/z calcd for C₁₂H₁₆NO₃ [M+H]⁺ 222.1125, found 222.1128.",
869
+ "common_mistakes": "Wrong molecular formula (forgetting to add H for [M+H]⁺). Mass accuracy >5 ppm. Wrong isotope pattern.",
870
+ "spectrum_required": True,
871
+ "checklist": [
872
+ "Correct molecular formula including ion",
873
+ "Mass accuracy within 5 ppm",
874
+ "Ionization method stated",
875
+ "Spectrum shows isotope pattern consistent with formula",
876
+ ],
877
+ },
878
+ "melting_point": {
879
+ "name": "Melting Point",
880
+ "required_info": "Range (onset-endset), method (capillary, DSC), corrected/uncorrected.",
881
+ "format": "mp X-Y °C (uncorrected)",
882
+ "example": "mp 152-154 °C (uncorrected)",
883
+ "spectrum_required": False,
884
+ "checklist": ["Range not too broad (<3 °C for pure compounds)", "Method stated"],
885
+ },
886
+ "optical_rotation": {
887
+ "name": "Optical Rotation",
888
+ "required_info": "Specific rotation [α], wavelength (D-line, 589 nm), temperature, concentration (g/100 mL), solvent.",
889
+ "format": "[α]²⁵_D = +/-X.X (c = Y.Y, solvent)",
890
+ "example": "[α]²⁵_D = −45.2 (c = 1.0, CHCl₃)",
891
+ "spectrum_required": False,
892
+ "checklist": ["Temperature stated", "Concentration and solvent stated", "Sign and magnitude reported"],
893
+ },
894
+ "hplc": {
895
+ "name": "HPLC Chromatogram",
896
+ "required_info": "Column type and dimensions, mobile phase (gradient or isocratic), flow rate, detection wavelength, retention time, purity.",
897
+ "format": "HPLC: tR = X.X min (purity: XX.X%, Column, gradient, λ = XXX nm)",
898
+ "example": "HPLC: tR = 12.3 min (purity: 99.2%, Phenomenex Luna C18 250×4.6 mm, H₂O/MeCN 0.1% TFA, 10-90% over 30 min, 1.0 mL/min, λ = 254 nm)",
899
+ "spectrum_required": True,
900
+ "checklist": [
901
+ "Full method reported (column, eluent, gradient, flow, detector)",
902
+ "Purity stated (≥95% for biological testing)",
903
+ "Retention time stated",
904
+ "Chromatogram included in SI",
905
+ ],
906
+ },
907
+ "chiral_hplc": {
908
+ "name": "Chiral HPLC",
909
+ "required_info": "Chiral column, eluent, flow rate, detection, retention times for both enantiomers, ee%.",
910
+ "format": "Chiral HPLC: tR(major) = X.X min, tR(minor) = X.X min, ee = XX% (column, eluent, flow)",
911
+ "spectrum_required": True,
912
+ "checklist": [
913
+ "Both enantiomers resolved (even if minor not detected)",
914
+ "Racemic reference shown",
915
+ "ee% calculated correctly",
916
+ ],
917
+ },
918
+ "xray": {
919
+ "name": "X-ray Crystallography",
920
+ "required_info": "Crystal data, data collection parameters, structure solution/refinement details, R-factors, CCDC number.",
921
+ "format": "CIF file deposited with CCDC (deposition number XXXXXXX).",
922
+ "spectrum_required": False,
923
+ "checklist": [
924
+ "CIF file deposited with CCDC",
925
+ "CCDC deposition number stated in manuscript",
926
+ "ORTEP or ellipsoid plot included",
927
+ "Crystal data table (a, b, c, α, β, γ, space group, Z, R₁, wR₂)",
928
+ "CheckCIF alerts addressed",
929
+ "Hydrogen atoms located or placed in calculated positions",
930
+ ],
931
+ },
932
+ "elemental_analysis": {
933
+ "name": "Elemental Analysis (CHN)",
934
+ "required_info": "Calculated and found percentages for C, H, N (and other elements). Must agree within 0.4%.",
935
+ "format": "Anal. Calcd for C₁₂H₁₅NO₃: C, 65.14; H, 6.83; N, 6.33. Found: C, 65.02; H, 6.79; N, 6.28.",
936
+ "spectrum_required": False,
937
+ "checklist": [
938
+ "Calcd and found within 0.4% for each element",
939
+ "Molecular formula includes any solvate/salt",
940
+ ],
941
+ },
942
+ "uv_vis": {
943
+ "name": "UV-Vis Spectroscopy",
944
+ "required_info": "Solvent, wavelength of maxima (λmax in nm), extinction coefficients (ε in M⁻¹cm⁻¹ or L mol⁻¹ cm⁻¹).",
945
+ "format": "UV-Vis (solvent): λmax (ε) = XXX nm (XXXX)",
946
+ "example": "UV-Vis (MeCN): λmax (ε) = 345 nm (12,500 M⁻¹cm⁻¹), 420 nm (8,200)",
947
+ "spectrum_required": True,
948
+ "checklist": ["Solvent stated", "ε values calculated from Beer-Lambert", "Concentration stated"],
949
+ },
950
+ "fluorescence": {
951
+ "name": "Fluorescence Spectroscopy",
952
+ "required_info": "Excitation wavelength, emission wavelength, quantum yield (Φ), solvent, concentration.",
953
+ "format": "Fluorescence (solvent): λex = XXX nm, λem = XXX nm, Φ = X.XX",
954
+ "spectrum_required": True,
955
+ "checklist": ["Reference standard for Φ stated", "Excitation and emission wavelengths"],
956
+ },
957
+ "tga": {
958
+ "name": "Thermogravimetric Analysis (TGA)",
959
+ "required_info": "Heating rate, atmosphere (N₂ or air), temperature range, mass loss events.",
960
+ "spectrum_required": True,
961
+ "checklist": ["Heating rate stated", "Atmosphere stated", "Onset temperatures identified"],
962
+ },
963
+ "dsc": {
964
+ "name": "Differential Scanning Calorimetry (DSC)",
965
+ "required_info": "Heating/cooling rate, temperature range, Tg, Tm, Tc values.",
966
+ "spectrum_required": True,
967
+ "checklist": ["Heating/cooling rates stated", "Cycle number stated (1st, 2nd heating)"],
968
+ },
969
+ "gc_ms": {
970
+ "name": "GC-MS",
971
+ "required_info": "Column, temperature program, carrier gas, injection mode, MS ionization.",
972
+ "format": "GC-MS: tR = X.X min, m/z (% relative intensity) = XXX [M]⁺ (XX), XXX (100).",
973
+ "spectrum_required": True,
974
+ "checklist": ["Method details stated", "Major fragments assigned", "M⁺ peak identified"],
975
+ },
976
+ }
977
+
978
+
979
+ def get_si_checklist(
980
+ content_types: list[str] | None = None,
981
+ compound_type: str = "small molecule",
982
+ ) -> dict:
983
+ """
984
+ Generate a Supporting Information checklist.
985
+
986
+ Args:
987
+ content_types: List of analytical methods (e.g., ['1h_nmr', '13c_nmr', 'hrms', 'hplc']).
988
+ If None, returns the standard minimum for the compound type.
989
+ compound_type: 'small molecule', 'peptide', 'polymer', 'material', 'natural product'
990
+
991
+ Returns dict with checklist items and formatting guidance.
992
+ """
993
+ # Standard minimums by compound type
994
+ standard_minimums = {
995
+ "small molecule": ["1h_nmr", "13c_nmr", "hrms", "melting_point"],
996
+ "peptide": ["hplc", "hrms", "1h_nmr"],
997
+ "polymer": ["1h_nmr", "13c_nmr", "tga", "dsc"],
998
+ "material": ["1h_nmr", "tga", "dsc", "uv_vis"],
999
+ "natural product": ["1h_nmr", "13c_nmr", "2d_nmr", "hrms", "optical_rotation", "ir"],
1000
+ }
1001
+
1002
+ if content_types is None:
1003
+ content_types = standard_minimums.get(compound_type.lower(), standard_minimums["small molecule"])
1004
+
1005
+ items = []
1006
+ for ct in content_types:
1007
+ ct_lower = ct.lower().replace(" ", "_").replace("-", "_")
1008
+ if ct_lower in SI_REQUIREMENTS:
1009
+ items.append(SI_REQUIREMENTS[ct_lower])
1010
+ else:
1011
+ # Fuzzy match
1012
+ for key, val in SI_REQUIREMENTS.items():
1013
+ if ct_lower in key or ct_lower in val["name"].lower():
1014
+ items.append(val)
1015
+ break
1016
+
1017
+ return {
1018
+ "compound_type": compound_type,
1019
+ "requested_content": content_types,
1020
+ "num_items": len(items),
1021
+ "checklist": items,
1022
+ "general_tips": [
1023
+ "Number compounds sequentially (1, 2a, 2b, 3, ...)",
1024
+ "Include General Information section (reagent sources, instrument models)",
1025
+ "Provide spectra as images (not raw data) unless journal requests FIDs",
1026
+ "Label all spectra with compound number and solvent",
1027
+ "Include a table of contents for the SI document",
1028
+ "Purity: ≥95% for compounds submitted for biological testing",
1029
+ ],
1030
+ }
1031
+
1032
+
1033
+ # =============================================================================
1034
+ # Standard chemistry abbreviations
1035
+ # =============================================================================
1036
+
1037
+ ABBREVIATIONS: dict[str, dict[str, str]] = {
1038
+ "solvents": {
1039
+ "ACN": "acetonitrile (MeCN)",
1040
+ "DCE": "1,2-dichloroethane",
1041
+ "DCM": "dichloromethane (CH₂Cl₂)",
1042
+ "DMA": "N,N-dimethylacetamide",
1043
+ "DME": "1,2-dimethoxyethane",
1044
+ "DMF": "N,N-dimethylformamide",
1045
+ "DMSO": "dimethyl sulfoxide",
1046
+ "EtOAc": "ethyl acetate",
1047
+ "EtOH": "ethanol",
1048
+ "Et₂O": "diethyl ether",
1049
+ "Hex": "hexane(s)",
1050
+ "MeCN": "acetonitrile",
1051
+ "MeOH": "methanol",
1052
+ "NMP": "N-methyl-2-pyrrolidone",
1053
+ "PE": "petroleum ether",
1054
+ "iPrOH": "2-propanol (isopropanol)",
1055
+ "THF": "tetrahydrofuran",
1056
+ "TFE": "2,2,2-trifluoroethanol",
1057
+ "tol": "toluene",
1058
+ },
1059
+ "reagents": {
1060
+ "AIBN": "azobisisobutyronitrile",
1061
+ "BHT": "butylated hydroxytoluene",
1062
+ "Boc": "tert-butyloxycarbonyl",
1063
+ "Boc₂O": "di-tert-butyl dicarbonate",
1064
+ "BPO": "benzoyl peroxide",
1065
+ "Cbz": "benzyloxycarbonyl",
1066
+ "CSA": "camphorsulfonic acid",
1067
+ "DABCO": "1,4-diazabicyclo[2.2.2]octane",
1068
+ "DBU": "1,8-diazabicyclo[5.4.0]undec-7-ene",
1069
+ "DCC": "N,N′-dicyclohexylcarbodiimide",
1070
+ "DDQ": "2,3-dichloro-5,6-dicyano-1,4-benzoquinone",
1071
+ "DEAD": "diethyl azodicarboxylate",
1072
+ "DIAD": "diisopropyl azodicarboxylate",
1073
+ "DIBAL-H": "diisobutylaluminium hydride",
1074
+ "DIPEA": "N,N-diisopropylethylamine (Hünig's base)",
1075
+ "DMAP": "4-(dimethylamino)pyridine",
1076
+ "DMP": "Dess-Martin periodinane",
1077
+ "EDC": "1-ethyl-3-(3-dimethylaminopropyl)carbodiimide",
1078
+ "Fmoc": "9-fluorenylmethyloxycarbonyl",
1079
+ "HATU": "hexafluorophosphate azabenzotriazole tetramethyl uronium",
1080
+ "HBTU": "hexafluorophosphate benzotriazole tetramethyl uronium",
1081
+ "HOBt": "1-hydroxybenzotriazole",
1082
+ "IBX": "2-iodoxybenzoic acid",
1083
+ "LAH": "lithium aluminium hydride (LiAlH₄)",
1084
+ "LDA": "lithium diisopropylamide",
1085
+ "LiHMDS": "lithium bis(trimethylsilyl)amide",
1086
+ "mCPBA": "meta-chloroperoxybenzoic acid",
1087
+ "MOM": "methoxymethyl",
1088
+ "Ms": "methanesulfonyl (mesyl)",
1089
+ "NaHMDS": "sodium bis(trimethylsilyl)amide",
1090
+ "NBS": "N-bromosuccinimide",
1091
+ "NCS": "N-chlorosuccinimide",
1092
+ "NMO": "N-methylmorpholine N-oxide",
1093
+ "NOBIN": "2-amino-2′-hydroxy-1,1′-binaphthyl",
1094
+ "PDC": "pyridinium dichromate",
1095
+ "PCC": "pyridinium chlorochromate",
1096
+ "PMB": "para-methoxybenzyl",
1097
+ "PTSA": "para-toluenesulfonic acid",
1098
+ "SEM": "2-(trimethylsilyl)ethoxymethyl",
1099
+ "TBAF": "tetrabutylammonium fluoride",
1100
+ "TBAI": "tetrabutylammonium iodide",
1101
+ "TBDMS": "tert-butyldimethylsilyl (= TBS)",
1102
+ "TBS": "tert-butyldimethylsilyl",
1103
+ "TBDPS": "tert-butyldiphenylsilyl",
1104
+ "TCA": "trichloroacetic acid",
1105
+ "TEMPO": "(2,2,6,6-tetramethylpiperidin-1-yl)oxyl",
1106
+ "TES": "triethylsilyl",
1107
+ "Tf": "trifluoromethanesulfonyl (triflyl)",
1108
+ "TFA": "trifluoroacetic acid",
1109
+ "TfOH": "triflic acid",
1110
+ "TIPS": "triisopropylsilyl",
1111
+ "TMS": "trimethylsilyl",
1112
+ "Ts": "para-toluenesulfonyl (tosyl)",
1113
+ "Xantphos": "4,5-bis(diphenylphosphino)-9,9-dimethylxanthene",
1114
+ },
1115
+ "catalysts_ligands": {
1116
+ "BINAP": "2,2′-bis(diphenylphosphino)-1,1′-binaphthyl",
1117
+ "BrettPhos": "2-(dicyclohexylphosphino)-3,6-dimethoxy-2′,4′,6′-triisopropyl-1,1′-biphenyl",
1118
+ "cod": "1,5-cyclooctadiene",
1119
+ "Cy": "cyclohexyl",
1120
+ "dba": "dibenzylideneacetone",
1121
+ "DavePhos": "2-dicyclohexylphosphino-2′-(N,N-dimethylamino)biphenyl",
1122
+ "dppf": "1,1′-bis(diphenylphosphino)ferrocene",
1123
+ "dppe": "1,2-bis(diphenylphosphino)ethane",
1124
+ "dppp": "1,3-bis(diphenylphosphino)propane",
1125
+ "JohnPhos": "2-(di-tert-butylphosphino)biphenyl",
1126
+ "P(o-tol)₃": "tri(ortho-tolyl)phosphine",
1127
+ "PCy₃": "tricyclohexylphosphine",
1128
+ "Pd/C": "palladium on carbon",
1129
+ "Pd₂(dba)₃": "tris(dibenzylideneacetone)dipalladium(0)",
1130
+ "Pd(OAc)₂": "palladium(II) acetate",
1131
+ "Pd(PPh₃)₄": "tetrakis(triphenylphosphine)palladium(0)",
1132
+ "PPh₃": "triphenylphosphine",
1133
+ "RuPhos": "2-dicyclohexylphosphino-2′,6′-diisopropoxybiphenyl",
1134
+ "SPhos": "2-dicyclohexylphosphino-2′,6′-dimethoxybiphenyl",
1135
+ "XPhos": "2-dicyclohexylphosphino-2′,4′,6′-triisopropylbiphenyl",
1136
+ "IPr": "1,3-bis(2,6-diisopropylphenyl)imidazol-2-ylidene (NHC ligand)",
1137
+ },
1138
+ "spectroscopy": {
1139
+ "ATR": "attenuated total reflectance",
1140
+ "COSY": "correlated spectroscopy",
1141
+ "DEPT": "distortionless enhancement by polarization transfer",
1142
+ "DQF-COSY": "double quantum filtered COSY",
1143
+ "EI": "electron ionization",
1144
+ "ESI": "electrospray ionization",
1145
+ "FAB": "fast atom bombardment",
1146
+ "FID": "free induction decay",
1147
+ "HMBC": "heteronuclear multiple bond correlation",
1148
+ "HSQC": "heteronuclear single quantum coherence",
1149
+ "HRMS": "high-resolution mass spectrometry",
1150
+ "LRMS": "low-resolution mass spectrometry",
1151
+ "MALDI": "matrix-assisted laser desorption/ionization",
1152
+ "MS": "mass spectrometry",
1153
+ "NMR": "nuclear magnetic resonance",
1154
+ "NOESY": "nuclear Overhauser effect spectroscopy",
1155
+ "ROESY": "rotating frame Overhauser effect spectroscopy",
1156
+ "TOCSY": "total correlation spectroscopy",
1157
+ "TOF": "time-of-flight",
1158
+ "UV-Vis": "ultraviolet-visible spectroscopy",
1159
+ },
1160
+ "analytical": {
1161
+ "CD": "circular dichroism",
1162
+ "CE": "capillary electrophoresis",
1163
+ "DLS": "dynamic light scattering",
1164
+ "DSC": "differential scanning calorimetry",
1165
+ "FPLC": "fast protein liquid chromatography",
1166
+ "GC": "gas chromatography",
1167
+ "GPC": "gel permeation chromatography",
1168
+ "HPLC": "high-performance liquid chromatography",
1169
+ "ICP-MS": "inductively coupled plasma mass spectrometry",
1170
+ "ITC": "isothermal titration calorimetry",
1171
+ "LC-MS": "liquid chromatography-mass spectrometry",
1172
+ "MPLC": "medium-pressure liquid chromatography",
1173
+ "ORD": "optical rotatory dispersion",
1174
+ "PAGE": "polyacrylamide gel electrophoresis",
1175
+ "RP-HPLC": "reversed-phase HPLC",
1176
+ "SEC": "size exclusion chromatography",
1177
+ "SEM": "scanning electron microscopy",
1178
+ "SFC": "supercritical fluid chromatography",
1179
+ "SPR": "surface plasmon resonance",
1180
+ "TEM": "transmission electron microscopy",
1181
+ "TGA": "thermogravimetric analysis",
1182
+ "TLC": "thin-layer chromatography",
1183
+ "UPLC": "ultra-performance liquid chromatography",
1184
+ "XPS": "X-ray photoelectron spectroscopy",
1185
+ "XRD": "X-ray diffraction",
1186
+ },
1187
+ "general": {
1188
+ "aq": "aqueous",
1189
+ "br": "broad (NMR)",
1190
+ "cat.": "catalytic",
1191
+ "conc.": "concentrated",
1192
+ "d": "doublet (NMR)",
1193
+ "dd": "doublet of doublets (NMR)",
1194
+ "dt": "doublet of triplets (NMR)",
1195
+ "ee": "enantiomeric excess",
1196
+ "er": "enantiomeric ratio",
1197
+ "dr": "diastereomeric ratio",
1198
+ "equiv": "equivalent(s)",
1199
+ "m": "multiplet (NMR)",
1200
+ "M": "molar (mol/L)",
1201
+ "mp": "melting point",
1202
+ "MW": "molecular weight",
1203
+ "ppm": "parts per million",
1204
+ "q": "quartet (NMR)",
1205
+ "quant.": "quantitative",
1206
+ "Rf": "retention factor (TLC)",
1207
+ "RT": "room temperature",
1208
+ "s": "singlet (NMR)",
1209
+ "sat.": "saturated",
1210
+ "t": "triplet (NMR)",
1211
+ "tR": "retention time",
1212
+ "v/v": "volume per volume",
1213
+ "w/w": "weight per weight",
1214
+ },
1215
+ "biochemistry": {
1216
+ "ATP": "adenosine triphosphate",
1217
+ "BSA": "bovine serum albumin",
1218
+ "cDNA": "complementary DNA",
1219
+ "DMEM": "Dulbecco's modified Eagle medium",
1220
+ "DNA": "deoxyribonucleic acid",
1221
+ "DTT": "dithiothreitol",
1222
+ "EDTA": "ethylenediaminetetraacetic acid",
1223
+ "ELISA": "enzyme-linked immunosorbent assay",
1224
+ "FBS": "fetal bovine serum",
1225
+ "FRET": "Förster resonance energy transfer",
1226
+ "GSH": "glutathione (reduced)",
1227
+ "GSSG": "glutathione (oxidized)",
1228
+ "IC₅₀": "half-maximal inhibitory concentration",
1229
+ "Kd": "dissociation constant",
1230
+ "Ki": "inhibition constant",
1231
+ "Km": "Michaelis constant",
1232
+ "mRNA": "messenger RNA",
1233
+ "NAD⁺/NADH": "nicotinamide adenine dinucleotide (oxidized/reduced)",
1234
+ "PBS": "phosphate-buffered saline",
1235
+ "PCR": "polymerase chain reaction",
1236
+ "PDB": "Protein Data Bank",
1237
+ "PI": "propidium iodide",
1238
+ "RNA": "ribonucleic acid",
1239
+ "SAR": "structure-activity relationship",
1240
+ "SDS": "sodium dodecyl sulfate",
1241
+ "siRNA": "small interfering RNA",
1242
+ "TRIS": "tris(hydroxymethyl)aminomethane",
1243
+ "WT": "wild-type",
1244
+ },
1245
+ }
1246
+
1247
+
1248
+ def get_abbreviations(category: str = "all") -> dict:
1249
+ """
1250
+ Get standard chemistry abbreviations.
1251
+
1252
+ Args:
1253
+ category: 'solvents', 'reagents', 'catalysts_ligands', 'spectroscopy',
1254
+ 'analytical', 'general', 'biochemistry', or 'all'
1255
+
1256
+ Returns dict of abbreviation → full name mappings.
1257
+ """
1258
+ cat = category.strip().lower()
1259
+ if cat == "all":
1260
+ combined = {}
1261
+ for cat_name, abbrevs in ABBREVIATIONS.items():
1262
+ for k, v in abbrevs.items():
1263
+ combined[k] = f"{v} [{cat_name}]"
1264
+ return {
1265
+ "category": "all",
1266
+ "num_abbreviations": len(combined),
1267
+ "abbreviations": combined,
1268
+ }
1269
+
1270
+ # Fuzzy match
1271
+ for key, data in ABBREVIATIONS.items():
1272
+ if cat in key or key in cat:
1273
+ return {
1274
+ "category": key,
1275
+ "num_abbreviations": len(data),
1276
+ "abbreviations": data,
1277
+ }
1278
+
1279
+ return {"error": f"Unknown category: {category}. Available: {', '.join(ABBREVIATIONS.keys())}"}
1280
+
1281
+
1282
+ def lookup_abbreviation(query: str) -> dict:
1283
+ """Look up what a specific abbreviation means."""
1284
+ q = query.strip()
1285
+ results = {}
1286
+ for cat, abbrevs in ABBREVIATIONS.items():
1287
+ for abbr, full in abbrevs.items():
1288
+ if q.lower() == abbr.lower() or q.lower() in abbr.lower():
1289
+ results[abbr] = {"meaning": full, "category": cat}
1290
+ if not results:
1291
+ # Reverse search: look in full names
1292
+ for cat, abbrevs in ABBREVIATIONS.items():
1293
+ for abbr, full in abbrevs.items():
1294
+ if q.lower() in full.lower():
1295
+ results[abbr] = {"meaning": full, "category": cat}
1296
+
1297
+ return {
1298
+ "query": q,
1299
+ "num_results": len(results),
1300
+ "results": results,
1301
+ }
1302
+
1303
+
1304
+ # =============================================================================
1305
+ # Thesis section writing guide
1306
+ # =============================================================================
1307
+
1308
+ THESIS_GUIDES: dict[str, dict] = {
1309
+ "abstract": {
1310
+ "name": "Abstract",
1311
+ "purpose": "Concise summary of the entire thesis/paper. Should stand alone.",
1312
+ "structure": [
1313
+ "Background / Context (1-2 sentences)",
1314
+ "Research gap / Objective (1 sentence)",
1315
+ "Methods / Approach (1-2 sentences)",
1316
+ "Key results (2-3 sentences)",
1317
+ "Significance / Conclusions (1 sentence)",
1318
+ ],
1319
+ "word_limit": "Thesis: 300-500 words. Papers: 150-250 words (journal-specific).",
1320
+ "tips": [
1321
+ "Write LAST, after all other sections",
1322
+ "No references in the abstract",
1323
+ "No abbreviations without definition (or avoid altogether)",
1324
+ "Every sentence should convey essential information",
1325
+ "Include key numerical results (yields, ee's, Ki values)",
1326
+ "Use past tense for results, present tense for conclusions",
1327
+ ],
1328
+ "common_mistakes": [
1329
+ "Too vague — no specific results",
1330
+ "Too detailed — reads like a methods section",
1331
+ "Including information not in the main text",
1332
+ "Using jargon without definition",
1333
+ ],
1334
+ },
1335
+ "introduction": {
1336
+ "name": "Introduction",
1337
+ "purpose": "Establish context, identify the gap, state your contribution.",
1338
+ "structure": [
1339
+ "Broad context: What is the field? Why does it matter? (1-2 paragraphs)",
1340
+ "Literature review: What has been done? Key developments. (3-5 paragraphs)",
1341
+ "The gap: What remains unknown or unsolved? (1 paragraph)",
1342
+ "Your contribution: What did you do and why? (1 paragraph)",
1343
+ "Outline: Brief overview of thesis structure (for thesis only)",
1344
+ ],
1345
+ "tips": [
1346
+ "Funnel structure: broad → specific → your work",
1347
+ "Cite primary literature, not just reviews",
1348
+ "Be objective about others' work — acknowledge strengths and limitations",
1349
+ "Don't bury your contribution at the very end",
1350
+ "State your hypothesis clearly if applicable",
1351
+ "Use present tense for established facts, past tense for specific studies",
1352
+ ],
1353
+ "common_mistakes": [
1354
+ "Too broad introduction that reads like a textbook",
1355
+ "Literature review without clear narrative thread",
1356
+ "Gap statement is too vague ('little is known about...')",
1357
+ "Missing connection between gap and your work",
1358
+ "Citing too many reviews, not enough primary literature",
1359
+ ],
1360
+ },
1361
+ "results_discussion": {
1362
+ "name": "Results and Discussion",
1363
+ "purpose": "Present findings with interpretation. Can be combined or separate sections.",
1364
+ "structure": [
1365
+ "Organize by logical theme, not chronological order",
1366
+ "Each subsection: aim → approach → results → interpretation",
1367
+ "Figures and schemes should drive the narrative",
1368
+ "Compare with literature — agree? disagree? why?",
1369
+ "Address unexpected results honestly",
1370
+ ],
1371
+ "tips": [
1372
+ "Lead with your strongest result",
1373
+ "Every figure/table should be referenced and discussed in text",
1374
+ "Don't just describe data — interpret it",
1375
+ "Use schemes for reaction development, figures for data",
1376
+ "Discuss selectivity, scope, and limitations",
1377
+ "Compare yields/selectivities with literature benchmarks",
1378
+ "Use 'we observed' not 'it was observed' (active voice)",
1379
+ ],
1380
+ "common_mistakes": [
1381
+ "Chronological 'lab diary' organization instead of logical narrative",
1382
+ "Figures not discussed in text",
1383
+ "Over-interpreting insignificant differences",
1384
+ "Not acknowledging limitations",
1385
+ "Repeating numbers from tables in running text",
1386
+ ],
1387
+ },
1388
+ "experimental": {
1389
+ "name": "Experimental Section",
1390
+ "purpose": "Enable exact reproduction by a competent chemist.",
1391
+ "structure": [
1392
+ "General Information: instruments, reagent sources, purification of solvents",
1393
+ "General Procedures (if applicable): used for repetitive reactions",
1394
+ "Specific Compound Procedures: one per new compound",
1395
+ "Each procedure: substrate, reagent amounts (mass, mmol, equiv), conditions, workup, purification, yield",
1396
+ "Full characterization data for each new compound",
1397
+ ],
1398
+ "tips": [
1399
+ "Be specific: '5 mL' not 'some', '80 °C' not 'heated'",
1400
+ "Report actual amounts used, not theoretical",
1401
+ "Include equiv for each reagent",
1402
+ "State how reaction progress was monitored (TLC, LC-MS)",
1403
+ "Report yield as mass and percentage",
1404
+ "Use consistent formatting for all procedures",
1405
+ "Known compounds: cite literature preparation, provide ¹H NMR to confirm identity",
1406
+ ],
1407
+ "common_mistakes": [
1408
+ "Missing equivalents for reagents",
1409
+ "Vague: 'worked up in the usual manner'",
1410
+ "Inconsistent formatting between procedures",
1411
+ "Missing characterization for new compounds",
1412
+ "Not specifying which NMR solvent was used",
1413
+ "Reporting yields > 100% without explanation",
1414
+ ],
1415
+ },
1416
+ "conclusion": {
1417
+ "name": "Conclusion",
1418
+ "purpose": "Summarize key findings and their significance. Look forward.",
1419
+ "structure": [
1420
+ "Restate the problem / objective (1 sentence)",
1421
+ "Key findings — what was accomplished (2-3 paragraphs)",
1422
+ "Significance — why this matters (1 paragraph)",
1423
+ "Future work — what comes next (1 paragraph)",
1424
+ ],
1425
+ "tips": [
1426
+ "Don't just repeat the abstract",
1427
+ "Be specific about what was achieved",
1428
+ "Be honest about limitations",
1429
+ "Future work should be realistic and specific",
1430
+ "End on a forward-looking, positive note",
1431
+ ],
1432
+ "common_mistakes": [
1433
+ "Simply repeating the abstract",
1434
+ "Introducing new results not in the main text",
1435
+ "Overly speculative future work",
1436
+ "Too brief — dismissive of own work",
1437
+ ],
1438
+ },
1439
+ "supporting_information": {
1440
+ "name": "Supporting Information",
1441
+ "purpose": "Provide complete analytical data, additional experiments, and full characterization.",
1442
+ "structure": [
1443
+ "Table of Contents",
1444
+ "General Information",
1445
+ "Synthetic Procedures (if not in main text)",
1446
+ "Characterization Data (organized by compound number)",
1447
+ "NMR Spectra (¹H, ¹³C, 2D if applicable)",
1448
+ "HPLC/GC traces",
1449
+ "HRMS data",
1450
+ "X-ray crystallographic data (CIF reference)",
1451
+ "Computational details (if applicable)",
1452
+ "Additional tables and figures",
1453
+ ],
1454
+ "tips": [
1455
+ "Number compounds consistently with main text",
1456
+ "Include a table of contents for SI > 20 pages",
1457
+ "Label every spectrum with compound number and conditions",
1458
+ "Include full-page spectra, not cropped fragments",
1459
+ "Arrange spectra in compound number order",
1460
+ ],
1461
+ "common_mistakes": [
1462
+ "Unlabeled spectra",
1463
+ "Missing ¹³C NMR for new compounds",
1464
+ "HRMS without isotope pattern",
1465
+ "NMR spectra with obvious impurities not acknowledged",
1466
+ ],
1467
+ },
1468
+ }
1469
+
1470
+
1471
+ def get_thesis_guide(section: str) -> dict | None:
1472
+ """
1473
+ Get writing guidance for a thesis/paper section.
1474
+
1475
+ Args:
1476
+ section: 'abstract', 'introduction', 'results_discussion', 'experimental',
1477
+ 'conclusion', 'supporting_information'
1478
+ """
1479
+ s = section.strip().lower().replace(" ", "_")
1480
+ if s in THESIS_GUIDES:
1481
+ return THESIS_GUIDES[s]
1482
+
1483
+ # Fuzzy match
1484
+ for key, guide in THESIS_GUIDES.items():
1485
+ if s in key or s in guide.get("name", "").lower():
1486
+ return guide
1487
+
1488
+ return {"error": f"Unknown section: {section}. Available: {', '.join(THESIS_GUIDES.keys())}"}