opencode-skills-antigravity 1.0.39 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/bundled-skills/.antigravity-install-manifest.json +10 -1
  2. package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
  3. package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
  4. package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
  5. package/bundled-skills/docs/maintainers/security-findings-triage-2026-03-29-refresh.csv +34 -0
  6. package/bundled-skills/docs/maintainers/security-findings-triage-2026-03-29-refresh.md +2 -0
  7. package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
  8. package/bundled-skills/docs/sources/sources.md +2 -2
  9. package/bundled-skills/docs/users/bundles.md +1 -1
  10. package/bundled-skills/docs/users/claude-code-skills.md +1 -1
  11. package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
  12. package/bundled-skills/docs/users/getting-started.md +1 -1
  13. package/bundled-skills/docs/users/kiro-integration.md +1 -1
  14. package/bundled-skills/docs/users/usage.md +4 -4
  15. package/bundled-skills/docs/users/visual-guide.md +4 -4
  16. package/bundled-skills/hugging-face-cli/SKILL.md +192 -195
  17. package/bundled-skills/hugging-face-community-evals/SKILL.md +213 -0
  18. package/bundled-skills/hugging-face-community-evals/examples/.env.example +3 -0
  19. package/bundled-skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md +101 -0
  20. package/bundled-skills/hugging-face-community-evals/scripts/inspect_eval_uv.py +104 -0
  21. package/bundled-skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py +306 -0
  22. package/bundled-skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py +297 -0
  23. package/bundled-skills/hugging-face-dataset-viewer/SKILL.md +120 -120
  24. package/bundled-skills/hugging-face-gradio/SKILL.md +304 -0
  25. package/bundled-skills/hugging-face-gradio/examples.md +613 -0
  26. package/bundled-skills/hugging-face-jobs/SKILL.md +25 -18
  27. package/bundled-skills/hugging-face-jobs/index.html +216 -0
  28. package/bundled-skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  29. package/bundled-skills/hugging-face-jobs/references/hub_saving.md +352 -0
  30. package/bundled-skills/hugging-face-jobs/references/token_usage.md +570 -0
  31. package/bundled-skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  32. package/bundled-skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  33. package/bundled-skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  34. package/bundled-skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  35. package/bundled-skills/hugging-face-model-trainer/SKILL.md +11 -12
  36. package/bundled-skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  37. package/bundled-skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  38. package/bundled-skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  39. package/bundled-skills/hugging-face-model-trainer/references/local_training_macos.md +231 -0
  40. package/bundled-skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  41. package/bundled-skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  42. package/bundled-skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  43. package/bundled-skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  44. package/bundled-skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  45. package/bundled-skills/hugging-face-model-trainer/references/unsloth.md +313 -0
  46. package/bundled-skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  47. package/bundled-skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  48. package/bundled-skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  49. package/bundled-skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  50. package/bundled-skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  51. package/bundled-skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  52. package/bundled-skills/hugging-face-model-trainer/scripts/unsloth_sft_example.py +512 -0
  53. package/bundled-skills/hugging-face-paper-publisher/SKILL.md +11 -4
  54. package/bundled-skills/hugging-face-paper-publisher/examples/example_usage.md +326 -0
  55. package/bundled-skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  56. package/bundled-skills/hugging-face-paper-publisher/scripts/paper_manager.py +606 -0
  57. package/bundled-skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  58. package/bundled-skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  59. package/bundled-skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  60. package/bundled-skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  61. package/bundled-skills/hugging-face-papers/SKILL.md +241 -0
  62. package/bundled-skills/hugging-face-trackio/.claude-plugin/plugin.json +19 -0
  63. package/bundled-skills/hugging-face-trackio/SKILL.md +117 -0
  64. package/bundled-skills/hugging-face-trackio/references/alerts.md +196 -0
  65. package/bundled-skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  66. package/bundled-skills/hugging-face-trackio/references/retrieving_metrics.md +251 -0
  67. package/bundled-skills/hugging-face-vision-trainer/SKILL.md +595 -0
  68. package/bundled-skills/hugging-face-vision-trainer/references/finetune_sam2_trainer.md +254 -0
  69. package/bundled-skills/hugging-face-vision-trainer/references/hub_saving.md +618 -0
  70. package/bundled-skills/hugging-face-vision-trainer/references/image_classification_training_notebook.md +279 -0
  71. package/bundled-skills/hugging-face-vision-trainer/references/object_detection_training_notebook.md +700 -0
  72. package/bundled-skills/hugging-face-vision-trainer/references/reliability_principles.md +310 -0
  73. package/bundled-skills/hugging-face-vision-trainer/references/timm_trainer.md +91 -0
  74. package/bundled-skills/hugging-face-vision-trainer/scripts/dataset_inspector.py +814 -0
  75. package/bundled-skills/hugging-face-vision-trainer/scripts/estimate_cost.py +217 -0
  76. package/bundled-skills/hugging-face-vision-trainer/scripts/image_classification_training.py +383 -0
  77. package/bundled-skills/hugging-face-vision-trainer/scripts/object_detection_training.py +710 -0
  78. package/bundled-skills/hugging-face-vision-trainer/scripts/sam_segmentation_training.py +382 -0
  79. package/bundled-skills/jq/SKILL.md +273 -0
  80. package/bundled-skills/odoo-edi-connector/SKILL.md +32 -10
  81. package/bundled-skills/odoo-woocommerce-bridge/SKILL.md +9 -5
  82. package/bundled-skills/tmux/SKILL.md +370 -0
  83. package/bundled-skills/transformers-js/SKILL.md +639 -0
  84. package/bundled-skills/transformers-js/references/CACHE.md +339 -0
  85. package/bundled-skills/transformers-js/references/CONFIGURATION.md +390 -0
  86. package/bundled-skills/transformers-js/references/EXAMPLES.md +605 -0
  87. package/bundled-skills/transformers-js/references/MODEL_ARCHITECTURES.md +167 -0
  88. package/bundled-skills/transformers-js/references/PIPELINE_OPTIONS.md +545 -0
  89. package/bundled-skills/transformers-js/references/TEXT_GENERATION.md +315 -0
  90. package/bundled-skills/viboscope/SKILL.md +64 -0
  91. package/package.json +1 -1
@@ -0,0 +1,606 @@
1
+ #!/usr/bin/env -S uv run
2
+ # /// script
3
+ # requires-python = ">=3.10"
4
+ # dependencies = [
5
+ # "huggingface_hub",
6
+ # "pyyaml",
7
+ # "requests",
8
+ # "python-dotenv",
9
+ # ]
10
+ # ///
11
+ """
12
+ Paper Manager for Hugging Face Hub
13
+ Manages paper indexing, linking, authorship, and article creation.
14
+ """
15
+
16
+ import argparse
17
+ import os
18
+ import sys
19
+ import re
20
+ import json
21
+ from pathlib import Path
22
+ from typing import Optional, List, Dict, Any
23
+ from datetime import datetime
24
+
25
+ try:
26
+ from huggingface_hub import HfApi, hf_hub_download, get_token
27
+ import yaml
28
+ import requests
29
+ from dotenv import load_dotenv
30
+ except ImportError as e:
31
+ print(f"Error: Missing required dependency: {e}")
32
+ print("Tip: run this script with `uv run scripts/paper_manager.py ...`.")
33
+ sys.exit(1)
34
+
35
+ # Load environment variables
36
+ load_dotenv()
37
+
38
+
39
+ class PaperManager:
40
+ """Manages paper publishing operations on Hugging Face Hub."""
41
+
42
+ def __init__(self, hf_token: Optional[str] = None):
43
+ """Initialize Paper Manager with HF token."""
44
+ self.token = hf_token or os.getenv("HF_TOKEN") or get_token()
45
+ if not self.token:
46
+ print("Warning: No HF_TOKEN found. Some operations will fail.")
47
+ self.api = HfApi(token=self.token)
48
+
49
+ def index_paper(self, arxiv_id: str) -> Dict[str, Any]:
50
+ """
51
+ Index a paper on Hugging Face from arXiv.
52
+
53
+ Args:
54
+ arxiv_id: arXiv identifier (e.g., "2301.12345")
55
+
56
+ Returns:
57
+ dict: Status information
58
+ """
59
+ # Clean and validate arXiv ID
60
+ try:
61
+ arxiv_id = self._clean_arxiv_id(arxiv_id)
62
+ except ValueError as e:
63
+ print(f"Error: {e}")
64
+ return {"status": "error", "message": str(e)}
65
+
66
+ print(f"Indexing paper {arxiv_id} on Hugging Face...")
67
+
68
+ # Check if paper exists
69
+ paper_url = f"https://huggingface.co/papers/{arxiv_id}"
70
+
71
+ try:
72
+ response = requests.get(paper_url, timeout=10)
73
+ if response.status_code == 200:
74
+ print(f"✓ Paper already indexed at {paper_url}")
75
+ return {"status": "exists", "url": paper_url}
76
+ else:
77
+ print(f"Paper not indexed. Visit {paper_url} to trigger indexing.")
78
+ print("The paper will be automatically indexed when you first visit the URL.")
79
+ return {"status": "not_indexed", "url": paper_url, "action": "visit_url"}
80
+ except requests.RequestException as e:
81
+ print(f"Error checking paper status: {e}")
82
+ return {"status": "error", "message": str(e)}
83
+
84
+ def check_paper(self, arxiv_id: str) -> Dict[str, Any]:
85
+ """
86
+ Check if a paper exists on Hugging Face.
87
+
88
+ Args:
89
+ arxiv_id: arXiv identifier
90
+
91
+ Returns:
92
+ dict: Paper status and metadata
93
+ """
94
+ try:
95
+ arxiv_id = self._clean_arxiv_id(arxiv_id)
96
+ except ValueError as e:
97
+ return {"exists": False, "error": str(e)}
98
+ paper_url = f"https://huggingface.co/papers/{arxiv_id}"
99
+
100
+ try:
101
+ response = requests.get(paper_url, timeout=10)
102
+ if response.status_code == 200:
103
+ return {
104
+ "exists": True,
105
+ "url": paper_url,
106
+ "arxiv_id": arxiv_id,
107
+ "arxiv_url": f"https://arxiv.org/abs/{arxiv_id}"
108
+ }
109
+ else:
110
+ return {
111
+ "exists": False,
112
+ "arxiv_id": arxiv_id,
113
+ "index_url": paper_url,
114
+ "message": f"Visit {paper_url} to index this paper"
115
+ }
116
+ except requests.RequestException as e:
117
+ return {"exists": False, "error": str(e)}
118
+
119
+ def link_paper_to_repo(
120
+ self,
121
+ repo_id: str,
122
+ arxiv_id: str,
123
+ repo_type: str = "model",
124
+ citation: Optional[str] = None,
125
+ create_pr: bool = False
126
+ ) -> Dict[str, Any]:
127
+ """
128
+ Link a paper to a model/dataset/space repository.
129
+
130
+ Args:
131
+ repo_id: Repository identifier (e.g., "username/repo-name")
132
+ arxiv_id: arXiv identifier
133
+ repo_type: Type of repository ("model", "dataset", or "space")
134
+ citation: Optional full citation text
135
+ create_pr: Create a PR instead of direct commit
136
+
137
+ Returns:
138
+ dict: Operation status
139
+ """
140
+ try:
141
+ arxiv_id = self._clean_arxiv_id(arxiv_id)
142
+ except ValueError as e:
143
+ print(f"Error: {e}")
144
+ return {"status": "error", "message": str(e)}
145
+
146
+ print(f"Linking paper {arxiv_id} to {repo_type} {repo_id}...")
147
+
148
+ try:
149
+ # Download current README
150
+ readme_path = hf_hub_download(
151
+ repo_id=repo_id,
152
+ filename="README.md",
153
+ repo_type=repo_type,
154
+ token=self.token
155
+ )
156
+
157
+ with open(readme_path, 'r', encoding='utf-8') as f:
158
+ content = f.read()
159
+
160
+ # Parse or create YAML frontmatter
161
+ updated_content = self._add_paper_to_readme(content, arxiv_id, citation)
162
+
163
+ # Upload updated README
164
+ commit_message = f"Add paper reference: arXiv:{arxiv_id}"
165
+
166
+ if create_pr:
167
+ # Create PR (not implemented in basic version)
168
+ print("PR creation not yet implemented. Committing directly.")
169
+
170
+ self.api.upload_file(
171
+ path_or_fileobj=updated_content.encode('utf-8'),
172
+ path_in_repo="README.md",
173
+ repo_id=repo_id,
174
+ repo_type=repo_type,
175
+ commit_message=commit_message,
176
+ token=self.token
177
+ )
178
+
179
+ paper_url = f"https://huggingface.co/papers/{arxiv_id}"
180
+ repo_url = f"https://huggingface.co/{repo_id}"
181
+
182
+ print(f"✓ Successfully linked paper to repository")
183
+ print(f" Paper: {paper_url}")
184
+ print(f" Repo: {repo_url}")
185
+
186
+ return {
187
+ "status": "success",
188
+ "paper_url": paper_url,
189
+ "repo_url": repo_url,
190
+ "arxiv_id": arxiv_id
191
+ }
192
+
193
+ except Exception as e:
194
+ print(f"Error linking paper: {e}")
195
+ return {"status": "error", "message": str(e)}
196
+
197
+ def _add_paper_to_readme(
198
+ self,
199
+ content: str,
200
+ arxiv_id: str,
201
+ citation: Optional[str] = None
202
+ ) -> str:
203
+ """
204
+ Add paper reference to README content.
205
+
206
+ Args:
207
+ content: Current README content
208
+ arxiv_id: arXiv identifier
209
+ citation: Optional citation text
210
+
211
+ Returns:
212
+ str: Updated README content
213
+ """
214
+ arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
215
+ hf_paper_url = f"https://huggingface.co/papers/{arxiv_id}"
216
+
217
+ # Check if YAML frontmatter exists
218
+ yaml_pattern = r'^---\s*\n(.*?)\n---\s*\n'
219
+ match = re.match(yaml_pattern, content, re.DOTALL)
220
+
221
+ if match:
222
+ # YAML exists, check if paper already referenced
223
+ if arxiv_id in content:
224
+ print(f"Paper {arxiv_id} already referenced in README")
225
+ return content
226
+
227
+ # Add to existing content (after YAML)
228
+ yaml_end = match.end()
229
+ before = content[:yaml_end]
230
+ after = content[yaml_end:]
231
+ else:
232
+ # No YAML, add minimal frontmatter
233
+ yaml_content = "---\n---\n\n"
234
+ before = yaml_content
235
+ after = content
236
+
237
+ # Add paper reference section with boundary markers
238
+ paper_section = "\n<!-- paper-manager:start -->\n"
239
+ paper_section += f"## Paper\n\n"
240
+ paper_section += f"This {'model' if 'model' in content.lower() else 'work'} is based on research presented in:\n\n"
241
+ paper_section += f"**[View on arXiv]({arxiv_url})** | "
242
+ paper_section += f"**[View on Hugging Face]({hf_paper_url})**\n\n"
243
+
244
+ if citation:
245
+ safe_citation = self._sanitize_text(citation)
246
+ paper_section += f"### Citation\n\n```bibtex\n{safe_citation}\n```\n\n"
247
+
248
+ paper_section += "<!-- paper-manager:end -->\n"
249
+
250
+ # Insert after YAML, before main content
251
+ updated_content = before + paper_section + after
252
+
253
+ return updated_content
254
+
255
+ def create_research_article(
256
+ self,
257
+ template: str,
258
+ title: str,
259
+ output: str,
260
+ authors: Optional[str] = None,
261
+ abstract: Optional[str] = None
262
+ ) -> Dict[str, Any]:
263
+ """
264
+ Create a research article from template.
265
+
266
+ Args:
267
+ template: Template name ("standard", "modern", "arxiv", "ml-report")
268
+ title: Paper title
269
+ output: Output filename
270
+ authors: Comma-separated author names
271
+ abstract: Abstract text
272
+
273
+ Returns:
274
+ dict: Creation status
275
+ """
276
+ print(f"Creating research article with '{template}' template...")
277
+
278
+ # Load template
279
+ template_dir = Path(__file__).parent.parent / "templates"
280
+ template_file = template_dir / f"{template}.md"
281
+
282
+ if not template_file.exists():
283
+ return {
284
+ "status": "error",
285
+ "message": f"Template '{template}' not found at {template_file}"
286
+ }
287
+
288
+ with open(template_file, 'r', encoding='utf-8') as f:
289
+ template_content = f.read()
290
+
291
+ # Prepare safe values for different contexts
292
+ date_str = datetime.now().strftime("%Y-%m-%d")
293
+ safe_title_body = self._sanitize_text(title)
294
+ authors_val = authors if authors else "Your Name"
295
+ safe_authors_body = self._sanitize_text(authors_val)
296
+ abstract_val = abstract if abstract else "Abstract to be written..."
297
+ safe_abstract_body = self._sanitize_text(abstract_val)
298
+
299
+ # Split frontmatter from body for context-aware escaping
300
+ fm_pattern = r'^(---\s*\n)(.*?\n)(---\s*\n)'
301
+ fm_match = re.match(fm_pattern, template_content, re.DOTALL)
302
+
303
+ if fm_match:
304
+ fm_open, fm_body, fm_close = fm_match.group(1), fm_match.group(2), fm_match.group(3)
305
+ body = template_content[fm_match.end():]
306
+
307
+ # YAML-escape values in frontmatter
308
+ fm_body = fm_body.replace("{{TITLE}}", self._escape_yaml_value(title))
309
+ fm_body = fm_body.replace("{{AUTHORS}}", self._escape_yaml_value(authors_val))
310
+ fm_body = fm_body.replace("{{DATE}}", date_str)
311
+
312
+ # Sanitize values in body
313
+ body = body.replace("{{TITLE}}", safe_title_body)
314
+ body = body.replace("{{AUTHORS}}", safe_authors_body)
315
+ body = body.replace("{{ABSTRACT}}", safe_abstract_body)
316
+ body = body.replace("{{DATE}}", date_str)
317
+
318
+ content = fm_open + fm_body + fm_close + body
319
+ else:
320
+ # No frontmatter — sanitize everything
321
+ content = template_content.replace("{{TITLE}}", safe_title_body)
322
+ content = content.replace("{{DATE}}", date_str)
323
+ content = content.replace("{{AUTHORS}}", safe_authors_body)
324
+ content = content.replace("{{ABSTRACT}}", safe_abstract_body)
325
+
326
+ # Write output
327
+ with open(output, 'w', encoding='utf-8') as f:
328
+ f.write(content)
329
+
330
+ print(f"✓ Research article created at {output}")
331
+
332
+ return {
333
+ "status": "success",
334
+ "output": output,
335
+ "template": template
336
+ }
337
+
338
+ def get_arxiv_info(self, arxiv_id: str) -> Dict[str, Any]:
339
+ """
340
+ Fetch paper information from arXiv API.
341
+
342
+ Args:
343
+ arxiv_id: arXiv identifier
344
+
345
+ Returns:
346
+ dict: Paper metadata
347
+ """
348
+ try:
349
+ arxiv_id = self._clean_arxiv_id(arxiv_id)
350
+ except ValueError as e:
351
+ return {"error": str(e)}
352
+ api_url = f"https://export.arxiv.org/api/query?id_list={arxiv_id}"
353
+
354
+ try:
355
+ response = requests.get(api_url, timeout=10)
356
+ response.raise_for_status()
357
+
358
+ # Parse XML response (simplified)
359
+ content = response.text
360
+
361
+ # Extract basic info with regex (proper XML parsing would be better)
362
+ title_match = re.search(r'<title>(.*?)</title>', content, re.DOTALL)
363
+ authors_matches = re.findall(r'<name>(.*?)</name>', content)
364
+ summary_match = re.search(r'<summary>(.*?)</summary>', content, re.DOTALL)
365
+
366
+ # Sanitize all text extracted from the external API
367
+ raw_title = title_match.group(1).strip() if title_match else None
368
+ raw_authors = authors_matches[1:] if len(authors_matches) > 1 else []
369
+ raw_abstract = summary_match.group(1).strip() if summary_match else None
370
+
371
+ return {
372
+ "arxiv_id": arxiv_id,
373
+ "title": self._sanitize_text(raw_title) if raw_title else None,
374
+ "authors": [self._sanitize_text(a) for a in raw_authors],
375
+ "abstract": self._sanitize_text(raw_abstract) if raw_abstract else None,
376
+ "arxiv_url": f"https://arxiv.org/abs/{arxiv_id}",
377
+ "pdf_url": f"https://arxiv.org/pdf/{arxiv_id}.pdf"
378
+ }
379
+ except Exception as e:
380
+ return {"error": str(e)}
381
+
382
+ def generate_citation(
383
+ self,
384
+ arxiv_id: str,
385
+ format: str = "bibtex"
386
+ ) -> str:
387
+ """
388
+ Generate citation for a paper.
389
+
390
+ Args:
391
+ arxiv_id: arXiv identifier
392
+ format: Citation format ("bibtex", "apa", "mla")
393
+
394
+ Returns:
395
+ str: Formatted citation
396
+ """
397
+ try:
398
+ arxiv_id = self._clean_arxiv_id(arxiv_id)
399
+ except ValueError as e:
400
+ return f"Error: {e}"
401
+
402
+ info = self.get_arxiv_info(arxiv_id)
403
+
404
+ if "error" in info:
405
+ return f"Error fetching paper info: {info['error']}"
406
+
407
+ if format == "bibtex":
408
+ # Generate BibTeX citation
409
+ key = f"arxiv{arxiv_id.replace('.', '_')}"
410
+ raw_authors = " and ".join(info.get("authors", ["Unknown"]))
411
+ raw_title = info.get("title", "Untitled")
412
+ year = arxiv_id.split(".")[0][:2] # Extract year from ID (simplified)
413
+ year = f"20{year}" if int(year) < 50 else f"19{year}"
414
+
415
+ # Escape BibTeX structural characters in untrusted values
416
+ safe_title = raw_title.replace('{', r'\{').replace('}', r'\}')
417
+ safe_authors = raw_authors.replace('{', r'\{').replace('}', r'\}')
418
+
419
+ citation = f"""@article{{{key},
420
+ title={{{safe_title}}},
421
+ author={{{safe_authors}}},
422
+ journal={{arXiv preprint arXiv:{arxiv_id}}},
423
+ year={{{year}}}
424
+ }}"""
425
+ return citation
426
+
427
+ return f"Format '{format}' not yet implemented"
428
+
429
+ # Patterns for valid arXiv IDs
430
+ _ARXIV_ID_MODERN = re.compile(r'^\d{4}\.\d{4,5}(v\d+)?$')
431
+ _ARXIV_ID_LEGACY = re.compile(r'^[a-zA-Z\-]+/\d{7}(v\d+)?$')
432
+
433
+ @staticmethod
434
+ def _clean_arxiv_id(arxiv_id: str) -> str:
435
+ """Clean, normalize, and validate arXiv ID.
436
+
437
+ Raises:
438
+ ValueError: If the cleaned ID does not match a valid arXiv format.
439
+ """
440
+ # Remove common prefixes and whitespace
441
+ arxiv_id = arxiv_id.strip()
442
+ arxiv_id = re.sub(r'^(arxiv:|arXiv:)', '', arxiv_id, flags=re.IGNORECASE)
443
+ arxiv_id = re.sub(r'https?://arxiv\.org/(abs|pdf)/', '', arxiv_id)
444
+ arxiv_id = arxiv_id.replace('.pdf', '')
445
+
446
+ # Validate format
447
+ if not (PaperManager._ARXIV_ID_MODERN.match(arxiv_id)
448
+ or PaperManager._ARXIV_ID_LEGACY.match(arxiv_id)):
449
+ raise ValueError(
450
+ f"Invalid arXiv ID: {arxiv_id!r}. "
451
+ "Expected format: YYMM.NNNNN[vN] or category/YYMMNNN[vN]"
452
+ )
453
+
454
+ return arxiv_id
455
+
456
+ @staticmethod
457
+ def _escape_yaml_value(value: str) -> str:
458
+ """Escape a string for safe use as a YAML scalar value.
459
+
460
+ Wraps in double quotes and escapes internal quotes and backslashes
461
+ to prevent YAML injection via crafted titles/authors.
462
+ """
463
+ value = value.replace('\\', '\\\\').replace('"', '\\"')
464
+ return f'"{value}"'
465
+
466
+ @staticmethod
467
+ def _sanitize_text(text: str) -> str:
468
+ """Sanitize untrusted text for safe inclusion in Markdown/YAML output.
469
+
470
+ Normalizes whitespace, strips control characters, and neutralizes
471
+ markdown code-fence breakout and YAML document delimiters.
472
+ """
473
+ # Remove control characters (keep newlines and tabs)
474
+ text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
475
+ # Normalize whitespace runs (collapse multiple spaces/tabs, preserve single newlines)
476
+ text = re.sub(r'[^\S\n]+', ' ', text)
477
+ text = re.sub(r'\n{3,}', '\n\n', text)
478
+ # Neutralize markdown code fence breakout
479
+ text = text.replace('```', r'\`\`\`')
480
+ # Neutralize YAML document delimiters at line start
481
+ text = re.sub(r'^---', r'\\---', text, flags=re.MULTILINE)
482
+ return text.strip()
483
+
484
+
485
+ def main():
486
+ """Main CLI entry point."""
487
+ parser = argparse.ArgumentParser(
488
+ description="Paper Manager for Hugging Face Hub",
489
+ formatter_class=argparse.RawDescriptionHelpFormatter
490
+ )
491
+
492
+ subparsers = parser.add_subparsers(dest="command", help="Command to execute")
493
+
494
+ # Index command
495
+ index_parser = subparsers.add_parser("index", help="Index a paper from arXiv")
496
+ index_parser.add_argument("--arxiv-id", required=True, help="arXiv paper ID")
497
+
498
+ # Check command
499
+ check_parser = subparsers.add_parser("check", help="Check if paper exists")
500
+ check_parser.add_argument("--arxiv-id", required=True, help="arXiv paper ID")
501
+
502
+ # Link command
503
+ link_parser = subparsers.add_parser("link", help="Link paper to repository")
504
+ link_parser.add_argument("--repo-id", required=True, help="Repository ID")
505
+ link_parser.add_argument("--repo-type", default="model", choices=["model", "dataset", "space"])
506
+ link_parser.add_argument("--arxiv-id", help="Single arXiv ID")
507
+ link_parser.add_argument("--arxiv-ids", help="Comma-separated arXiv IDs")
508
+ link_parser.add_argument("--citation", help="Full citation text")
509
+ link_parser.add_argument("--create-pr", action="store_true", help="Create PR instead of direct commit")
510
+
511
+ # Create command
512
+ create_parser = subparsers.add_parser("create", help="Create research article")
513
+ create_parser.add_argument("--template", required=True, help="Template name")
514
+ create_parser.add_argument("--title", required=True, help="Paper title")
515
+ create_parser.add_argument("--output", required=True, help="Output filename")
516
+ create_parser.add_argument("--authors", help="Comma-separated authors")
517
+ create_parser.add_argument("--abstract", help="Abstract text")
518
+
519
+ # Info command
520
+ info_parser = subparsers.add_parser("info", help="Get paper information")
521
+ info_parser.add_argument("--arxiv-id", required=True, help="arXiv paper ID")
522
+ info_parser.add_argument("--format", default="json", choices=["json", "text"])
523
+
524
+ # Citation command
525
+ citation_parser = subparsers.add_parser("citation", help="Generate citation")
526
+ citation_parser.add_argument("--arxiv-id", required=True, help="arXiv paper ID")
527
+ citation_parser.add_argument("--format", default="bibtex", choices=["bibtex", "apa", "mla"])
528
+
529
+ # Search command
530
+ search_parser = subparsers.add_parser("search", help="Search papers")
531
+ search_parser.add_argument("--query", required=True, help="Search query")
532
+
533
+ args = parser.parse_args()
534
+
535
+ if not args.command:
536
+ parser.print_help()
537
+ sys.exit(1)
538
+
539
+ # Initialize manager
540
+ manager = PaperManager()
541
+
542
+ # Execute command
543
+ if args.command == "index":
544
+ result = manager.index_paper(args.arxiv_id)
545
+ print(json.dumps(result, indent=2))
546
+
547
+ elif args.command == "check":
548
+ result = manager.check_paper(args.arxiv_id)
549
+ print(json.dumps(result, indent=2))
550
+
551
+ elif args.command == "link":
552
+ arxiv_ids = []
553
+ if args.arxiv_id:
554
+ arxiv_ids.append(args.arxiv_id)
555
+ if args.arxiv_ids:
556
+ arxiv_ids.extend([id.strip() for id in args.arxiv_ids.split(",")])
557
+
558
+ if not arxiv_ids:
559
+ print("Error: Must provide --arxiv-id or --arxiv-ids")
560
+ sys.exit(1)
561
+
562
+ for arxiv_id in arxiv_ids:
563
+ result = manager.link_paper_to_repo(
564
+ repo_id=args.repo_id,
565
+ arxiv_id=arxiv_id,
566
+ repo_type=args.repo_type,
567
+ citation=args.citation,
568
+ create_pr=args.create_pr
569
+ )
570
+ print(json.dumps(result, indent=2))
571
+
572
+ elif args.command == "create":
573
+ result = manager.create_research_article(
574
+ template=args.template,
575
+ title=args.title,
576
+ output=args.output,
577
+ authors=args.authors,
578
+ abstract=args.abstract
579
+ )
580
+ print(json.dumps(result, indent=2))
581
+
582
+ elif args.command == "info":
583
+ result = manager.get_arxiv_info(args.arxiv_id)
584
+ if args.format == "json":
585
+ print(json.dumps(result, indent=2))
586
+ else:
587
+ if "error" in result:
588
+ print(f"Error: {result['error']}")
589
+ else:
590
+ print(f"Title: {result.get('title')}")
591
+ print(f"Authors: {', '.join(result.get('authors', []))}")
592
+ print(f"arXiv URL: {result.get('arxiv_url')}")
593
+ print(f"\nAbstract:\n{result.get('abstract')}")
594
+
595
+ elif args.command == "citation":
596
+ citation = manager.generate_citation(args.arxiv_id, args.format)
597
+ print(citation)
598
+
599
+ elif args.command == "search":
600
+ print(f"Searching for: {args.query}")
601
+ print("Search functionality coming soon!")
602
+ print(f"Visit: https://huggingface.co/papers?search={args.query}")
603
+
604
+
605
+ if __name__ == "__main__":
606
+ main()