cnhkmcp 2.1.1__py3-none-any.whl → 2.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {cnhkmcp-2.1.1.dist-info → cnhkmcp-2.1.3.dist-info}/METADATA +1 -1
  2. cnhkmcp-2.1.3.dist-info/RECORD +6 -0
  3. cnhkmcp-2.1.3.dist-info/top_level.txt +1 -0
  4. cnhkmcp/__init__.py +0 -125
  5. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/README.md +0 -38
  6. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/ace.log +0 -0
  7. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/config.json +0 -6
  8. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/ace_lib.py +0 -1510
  9. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_datasets.py +0 -157
  10. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_documentation.py +0 -132
  11. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_operators.py +0 -99
  12. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/helpful_functions.py +0 -180
  13. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.ico +0 -0
  14. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.png +0 -0
  15. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/test.txt +0 -1
  16. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/main.py +0 -576
  17. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/process_knowledge_base.py +0 -280
  18. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/rag_engine.py +0 -356
  19. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/requirements.txt +0 -7
  20. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/run.bat +0 -3
  21. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/_manifest.json +0 -326
  22. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/_meta.json +0 -1
  23. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/be5d957c-b724-46e3-91d1-999e9f5f7d28/index_metadata.pickle +0 -0
  24. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/chroma.sqlite3 +0 -0
  25. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242//321/211/320/266/320/246/321/206/320/274/320/261/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +0 -265
  26. cnhkmcp/untracked/APP/.gitignore +0 -32
  27. cnhkmcp/untracked/APP/MODULAR_STRUCTURE.md +0 -112
  28. cnhkmcp/untracked/APP/README.md +0 -309
  29. cnhkmcp/untracked/APP/Tranformer/Transformer.py +0 -4985
  30. cnhkmcp/untracked/APP/Tranformer/ace.log +0 -0
  31. cnhkmcp/untracked/APP/Tranformer/ace_lib.py +0 -1510
  32. cnhkmcp/untracked/APP/Tranformer/helpful_functions.py +0 -180
  33. cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates.json +0 -2421
  34. cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates_/321/207/320/264/342/225/221/321/204/342/225/233/320/233.json +0 -654
  35. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_error.json +0 -1034
  36. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_success.json +0 -444
  37. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_/321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/277/321/207/320/253/342/224/244/321/206/320/236/320/265/321/210/342/225/234/342/225/234/321/205/320/225/320/265Machine_lib.json +0 -22
  38. cnhkmcp/untracked/APP/Tranformer/parsetab.py +0 -60
  39. cnhkmcp/untracked/APP/Tranformer/template_summary.txt +0 -3182
  40. cnhkmcp/untracked/APP/Tranformer/transformer_config.json +0 -7
  41. cnhkmcp/untracked/APP/Tranformer/validator.py +0 -889
  42. cnhkmcp/untracked/APP/ace.log +0 -69
  43. cnhkmcp/untracked/APP/ace_lib.py +0 -1510
  44. cnhkmcp/untracked/APP/blueprints/__init__.py +0 -6
  45. cnhkmcp/untracked/APP/blueprints/feature_engineering.py +0 -347
  46. cnhkmcp/untracked/APP/blueprints/idea_house.py +0 -221
  47. cnhkmcp/untracked/APP/blueprints/inspiration_house.py +0 -432
  48. cnhkmcp/untracked/APP/blueprints/paper_analysis.py +0 -570
  49. cnhkmcp/untracked/APP/custom_templates/templates.json +0 -1257
  50. cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +0 -400
  51. cnhkmcp/untracked/APP/give_me_idea/ace_lib.py +0 -1510
  52. cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +0 -252
  53. cnhkmcp/untracked/APP/give_me_idea/fetch_all_datasets.py +0 -157
  54. cnhkmcp/untracked/APP/give_me_idea/fetch_all_operators.py +0 -99
  55. cnhkmcp/untracked/APP/give_me_idea/helpful_functions.py +0 -180
  56. cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +0 -11
  57. cnhkmcp/untracked/APP/helpful_functions.py +0 -180
  58. cnhkmcp/untracked/APP/hkSimulator/ace_lib.py +0 -1497
  59. cnhkmcp/untracked/APP/hkSimulator/autosimulator.py +0 -447
  60. cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +0 -180
  61. cnhkmcp/untracked/APP/mirror_config.txt +0 -20
  62. cnhkmcp/untracked/APP/operaters.csv +0 -129
  63. cnhkmcp/untracked/APP/requirements.txt +0 -53
  64. cnhkmcp/untracked/APP/run_app.bat +0 -28
  65. cnhkmcp/untracked/APP/run_app.sh +0 -34
  66. cnhkmcp/untracked/APP/setup_tsinghua.bat +0 -39
  67. cnhkmcp/untracked/APP/setup_tsinghua.sh +0 -43
  68. cnhkmcp/untracked/APP/simulator/alpha_submitter.py +0 -404
  69. cnhkmcp/untracked/APP/simulator/simulator_wqb.py +0 -618
  70. cnhkmcp/untracked/APP/ssrn-3332513.pdf +6 -109201
  71. cnhkmcp/untracked/APP/static/brain.js +0 -589
  72. cnhkmcp/untracked/APP/static/decoder.js +0 -1540
  73. cnhkmcp/untracked/APP/static/feature_engineering.js +0 -1729
  74. cnhkmcp/untracked/APP/static/idea_house.js +0 -937
  75. cnhkmcp/untracked/APP/static/inspiration.js +0 -465
  76. cnhkmcp/untracked/APP/static/inspiration_house.js +0 -868
  77. cnhkmcp/untracked/APP/static/paper_analysis.js +0 -390
  78. cnhkmcp/untracked/APP/static/script.js +0 -3082
  79. cnhkmcp/untracked/APP/static/simulator.js +0 -597
  80. cnhkmcp/untracked/APP/static/styles.css +0 -3127
  81. cnhkmcp/untracked/APP/static/usage_widget.js +0 -508
  82. cnhkmcp/untracked/APP/templates/alpha_inspector.html +0 -511
  83. cnhkmcp/untracked/APP/templates/feature_engineering.html +0 -960
  84. cnhkmcp/untracked/APP/templates/idea_house.html +0 -564
  85. cnhkmcp/untracked/APP/templates/index.html +0 -932
  86. cnhkmcp/untracked/APP/templates/inspiration_house.html +0 -861
  87. cnhkmcp/untracked/APP/templates/paper_analysis.html +0 -91
  88. cnhkmcp/untracked/APP/templates/simulator.html +0 -343
  89. cnhkmcp/untracked/APP/templates/transformer_web.html +0 -580
  90. cnhkmcp/untracked/APP/usage.md +0 -351
  91. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/ace_lib.py +0 -1510
  92. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/brain_alpha_inspector.py +0 -712
  93. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/helpful_functions.py +0 -180
  94. cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +0 -2456
  95. cnhkmcp/untracked/arXiv_API_Tool_Manual.md +0 -490
  96. cnhkmcp/untracked/arxiv_api.py +0 -229
  97. cnhkmcp/untracked/forum_functions.py +0 -998
  98. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/forum_functions.py +0 -407
  99. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/platform_functions.py +0 -2415
  100. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/user_config.json +0 -31
  101. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/210/320/276/320/271AI/321/210/320/277/342/225/227/321/210/342/224/220/320/251/321/204/342/225/225/320/272/321/206/320/246/320/227/321/206/320/261/320/263/321/206/320/255/320/265/321/205/320/275/320/266/321/204/342/225/235/320/252/321/204/342/225/225/320/233/321/210/342/225/234/342/225/234/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270.md +0 -101
  102. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +0 -190
  103. cnhkmcp/untracked/platform_functions.py +0 -2886
  104. cnhkmcp/untracked/sample_mcp_config.json +0 -11
  105. cnhkmcp/untracked/user_config.json +0 -31
  106. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/222/321/210/320/220/320/223/321/206/320/246/320/227/321/206/320/261/320/263_BRAIN_Alpha_Test_Requirements_and_Tips.md +0 -202
  107. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Alpha_explaination_workflow.md +0 -56
  108. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_6_Tips_Datafield_Exploration_Guide.md +0 -194
  109. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_Alpha_Improvement_Workflow.md +0 -101
  110. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Dataset_Exploration_Expert_Manual.md +0 -436
  111. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_daily_report_workflow.md +0 -128
  112. cnhkmcp/untracked//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +0 -190
  113. cnhkmcp-2.1.1.dist-info/RECORD +0 -115
  114. cnhkmcp-2.1.1.dist-info/top_level.txt +0 -1
  115. {cnhkmcp-2.1.1.dist-info → cnhkmcp-2.1.3.dist-info}/WHEEL +0 -0
  116. {cnhkmcp-2.1.1.dist-info → cnhkmcp-2.1.3.dist-info}/entry_points.txt +0 -0
  117. {cnhkmcp-2.1.1.dist-info → cnhkmcp-2.1.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,490 +0,0 @@
1
- # 🔍 arXiv Paper Search & Download Tool
2
-
3
- A comprehensive Python tool for searching, analyzing, and downloading research papers from arXiv using their public API. Perfect for researchers, students, and anyone interested in academic papers.
4
-
5
- ## 📋 Table of Contents
6
-
7
- - [Features](#-features)
8
- - [Installation](#-installation)
9
- - [Quick Start](#-quick-start)
10
- - [Usage Modes](#-usage-modes)
11
- - [API Functions](#-api-functions)
12
- - [Examples](#-examples)
13
- - [Advanced Usage](#-advanced-usage)
14
- - [Troubleshooting](#-troubleshooting)
15
-
16
- ## ✨ Features
17
-
18
- - **🔍 Smart Search**: Search arXiv papers by title, author, abstract, or any keyword
19
- - **📥 Smart Download**: Download PDFs with automatic filename renaming to paper titles
20
- - **📊 Result Parsing**: Automatically extract structured information (title, authors, abstract, ID)
21
- - **🖥️ Interactive Mode**: Command-line interface for easy searching and downloading
22
- - **⚡ Batch Operations**: Search multiple papers and download in sequence
23
- - **📈 Academic Research**: Perfect for literature reviews and research discovery
24
- - **🔄 Auto-Rename**: Downloaded files are automatically named using paper titles instead of cryptic IDs
25
-
26
- ## 🚀 Installation
27
-
28
- ### Prerequisites
29
- - Python 3.6 or higher
30
- - Internet connection for API access
31
-
32
- ### Install Dependencies
33
- ```bash
34
- pip install requests
35
- ```
36
-
37
- ### Download the Script
38
- ```bash
39
- # Clone or download arxiv_api.py to your working directory
40
- ```
41
-
42
- ## 🎯 Quick Start
43
-
44
- ### Basic Search
45
- ```bash
46
- python arxiv_api.py "machine learning"
47
- ```
48
-
49
- ### Search with Custom Results
50
- ```bash
51
- python arxiv_api.py "quantum computing" -n 10
52
- ```
53
-
54
- ### Search and Download First Result
55
- ```bash
56
- python arxiv_api.py "deep learning" -d
57
- ```
58
-
59
- ### Interactive Mode
60
- ```bash
61
- python arxiv_api.py -i
62
- ```
63
-
64
- ### Download Paper by ID (with auto-rename)
65
- ```bash
66
- # In interactive mode:
67
- # 📚 arxiv> download 2502.05218v1
68
- # This will automatically rename the file to the paper's title
69
- ```
70
-
71
- ## 🎮 Usage Modes
72
-
73
- ### 1. Command Line Mode
74
- Direct search queries from the command line.
75
-
76
- **Syntax:**
77
- ```bash
78
- python arxiv_api.py [query] [options]
79
- ```
80
-
81
- **Options:**
82
- - `-n, --max_results`: Maximum number of results (default: 5)
83
- - `-d, --download`: Download the first result automatically
84
- - `-i, --interactive`: Start interactive mode
85
- - `-h, --help`: Show help message
86
-
87
- ### 2. Interactive Mode
88
- Interactive command-line interface for multiple operations.
89
-
90
- **Commands:**
91
- - `search <query> [max_results]`: Search for papers
92
- - `download <paper_id>`: Download a specific paper (with auto-rename)
93
- - `help`: Show available commands
94
- - `quit/exit`: Exit the program
95
-
96
- ## 🔧 API Functions
97
-
98
- ### Core Functions
99
-
100
- #### `search_arxiv(query, max_results=10)`
101
- Searches arXiv for papers using the public API.
102
-
103
- **Parameters:**
104
- - `query` (str): Search query string
105
- - `max_results` (int): Maximum number of results (default: 10)
106
-
107
- **Returns:**
108
- - `str`: XML response from arXiv API
109
-
110
- **Example:**
111
- ```python
112
- from arxiv_api import search_arxiv
113
-
114
- results = search_arxiv("artificial intelligence", max_results=5)
115
- ```
116
-
117
- #### `get_paper_metadata(paper_id)`
118
- Fetches paper metadata directly from arXiv API using paper ID.
119
-
120
- **Parameters:**
121
- - `paper_id` (str): arXiv paper ID (e.g., "2502.05218v1")
122
-
123
- **Returns:**
124
- - `dict`: Paper information dictionary, or `None` if not found
125
-
126
- **Example:**
127
- ```python
128
- from arxiv_api import get_paper_metadata
129
-
130
- paper_info = get_paper_metadata("2502.05218v1")
131
- if paper_info:
132
- print(f"Title: {paper_info['title']}")
133
- print(f"Authors: {', '.join(paper_info['authors'])}")
134
- ```
135
-
136
- #### `download_paper(paper_id, output_dir=".", paper_title=None)`
137
- Downloads a specific paper by its arXiv ID and automatically renames it to the paper title.
138
-
139
- **Parameters:**
140
- - `paper_id` (str): arXiv paper ID (e.g., "2502.05218v1")
141
- - `output_dir` (str): Output directory (default: current directory)
142
- - `paper_title` (str): Paper title for filename (optional, will be fetched automatically if not provided)
143
-
144
- **Returns:**
145
- - `str`: File path of downloaded PDF, or `None` if failed
146
-
147
- **Features:**
148
- - **Auto-rename**: Automatically renames downloaded files to paper titles
149
- - **Smart cleaning**: Removes special characters and limits filename length
150
- - **Fallback**: Uses paper ID if title is unavailable
151
-
152
- **Example:**
153
- ```python
154
- from arxiv_api import download_paper
155
-
156
- # Download with automatic title fetching and renaming
157
- filepath = download_paper("2502.05218v1")
158
-
159
- # Download with custom title
160
- filepath = download_paper("2502.05218v1", paper_title="My Custom Title")
161
- ```
162
-
163
- #### `parse_search_results(xml_content)`
164
- Parses XML search results and extracts structured paper information.
165
-
166
- **Parameters:**
167
- - `xml_content` (str): XML response from arXiv API
168
-
169
- **Returns:**
170
- - `list`: List of dictionaries containing paper information
171
-
172
- **Paper Information Structure:**
173
- ```python
174
- {
175
- 'title': 'Paper Title',
176
- 'authors': ['Author 1', 'Author 2'],
177
- 'abstract': 'Paper abstract...',
178
- 'paper_id': '2502.05218v1',
179
- 'published': '2025-02-05T12:37:15Z'
180
- }
181
- ```
182
-
183
- #### `search_and_download(query, max_results=5, download_first=False)`
184
- Combined function that searches for papers and optionally downloads the first result.
185
-
186
- **Parameters:**
187
- - `query` (str): Search query string
188
- - `max_results` (int): Maximum number of results (default: 5)
189
- - `download_first` (bool): Whether to download first result (default: False)
190
-
191
- **Example:**
192
- ```python
193
- from arxiv_api import search_and_download
194
-
195
- # Search and display results only
196
- search_and_download("machine learning", max_results=3)
197
-
198
- # Search and download first result (with auto-rename)
199
- search_and_download("deep learning", max_results=5, download_first=True)
200
- ```
201
-
202
- ### Interactive Mode Functions
203
-
204
- #### `interactive_mode()`
205
- Starts the interactive command-line interface.
206
-
207
- **Features:**
208
- - Command history
209
- - Error handling
210
- - User-friendly prompts
211
- - Multiple search sessions
212
- - **Smart download with auto-rename**
213
-
214
- ## 📚 Examples
215
-
216
- ### Example 1: Basic Paper Search
217
- ```bash
218
- # Search for machine learning papers
219
- python arxiv_api.py "machine learning"
220
-
221
- # Output:
222
- # Searching arXiv for: 'machine learning'
223
- # --------------------------------------------------
224
- # Found 5 papers:
225
- #
226
- # 1. Title: Introduction to Machine Learning
227
- # Authors: John Doe, Jane Smith
228
- # Paper ID: 2103.12345
229
- # Published: 2021-03-15T10:30:00Z
230
- # Abstract: This paper introduces...
231
- ```
232
-
233
- ### Example 2: Search with Custom Results
234
- ```bash
235
- # Get 10 results for quantum computing
236
- python arxiv_api.py "quantum computing" -n 10
237
- ```
238
-
239
- ### Example 3: Search and Download (with auto-rename)
240
- ```bash
241
- # Search for papers and download the first one
242
- python arxiv_api.py "artificial intelligence" -d
243
- # Downloaded file will be automatically renamed to the paper title
244
- ```
245
-
246
- ### Example 4: Interactive Mode with Smart Download
247
- ```bash
248
- python arxiv_api.py -i
249
-
250
- # 📚 arxiv> search blockchain finance 5
251
- # 📚 arxiv> download 2502.05218v1
252
- # Fetching paper information for 2502.05218v1...
253
- # Found paper: FactorGCL: A Hypergraph-Based Factor Model...
254
- # Downloaded: .\FactorGCL_A_Hypergraph-Based_Factor_Model...pdf
255
- # 📚 arxiv> help
256
- # 📚 arxiv> quit
257
- ```
258
-
259
- ### Example 5: Python Script Integration
260
- ```python
261
- from arxiv_api import search_and_download, download_paper, get_paper_metadata
262
-
263
- # Search for papers on a specific topic
264
- search_and_download("quantitative finance China", max_results=3)
265
-
266
- # Download a specific paper with auto-rename
267
- download_paper("2502.05218v1")
268
-
269
- # Get paper metadata
270
- paper_info = get_paper_metadata("2502.05218v1")
271
- if paper_info:
272
- print(f"Title: {paper_info['title']}")
273
- ```
274
-
275
- ## 🔍 Advanced Usage
276
-
277
- ### Smart Download Features
278
-
279
- #### Automatic Filename Generation
280
- ```python
281
- from arxiv_api import download_paper
282
-
283
- # The tool automatically:
284
- # 1. Fetches paper metadata
285
- # 2. Extracts the title
286
- # 3. Cleans the title for filename use
287
- # 4. Downloads and renames the file
288
-
289
- # Example output filename:
290
- # "FactorGCL_A_Hypergraph-Based_Factor_Model_with_Temporal_Residual_Contrastive_Learning_for_Stock_Returns_Prediction.pdf"
291
- ```
292
-
293
- #### Custom Search Queries
294
-
295
- ##### Field-Specific Searches
296
- ```bash
297
- # Search by author
298
- python arxiv_api.py "au:Yann LeCun"
299
-
300
- # Search by title
301
- python arxiv_api.py "ti:deep learning"
302
-
303
- # Search by abstract
304
- python arxiv_api.py "abs:neural networks"
305
-
306
- # Search by category
307
- python arxiv_api.py "cat:cs.AI"
308
- ```
309
-
310
- ##### Complex Queries
311
- ```bash
312
- # Multiple terms
313
- python arxiv_api.py "machine learning AND neural networks"
314
-
315
- # Exclude terms
316
- python arxiv_api.py "deep learning NOT reinforcement"
317
-
318
- # Date range
319
- python arxiv_api.py "machine learning AND submittedDate:[20230101 TO 20231231]"
320
- ```
321
-
322
- ### Batch Operations
323
-
324
- #### Download Multiple Papers with Auto-Rename
325
- ```python
326
- from arxiv_api import search_arxiv, parse_search_results, download_paper
327
-
328
- # Search for papers
329
- query = "quantum computing"
330
- results = search_arxiv(query, max_results=10)
331
- papers = parse_search_results(results)
332
-
333
- # Download all papers (each will be automatically renamed)
334
- for paper in papers:
335
- paper_id = paper.get('paper_id')
336
- if paper_id:
337
- download_paper(paper_id, output_dir="./quantum_papers")
338
- ```
339
-
340
- #### Custom Output Formatting
341
- ```python
342
- from arxiv_api import search_and_download
343
-
344
- # Custom display function
345
- def custom_display(papers):
346
- for i, paper in enumerate(papers, 1):
347
- print(f"📄 Paper {i}: {paper['title']}")
348
- print(f"👥 Authors: {', '.join(paper['authors'])}")
349
- print(f"🆔 ID: {paper['paper_id']}")
350
- print(f"📅 Date: {paper['published']}")
351
- print(f"📝 Abstract: {paper['abstract'][:150]}...")
352
- print("-" * 80)
353
-
354
- # Use custom display
355
- search_and_download("blockchain", max_results=3)
356
- ```
357
-
358
- ## 🛠️ Troubleshooting
359
-
360
- ### Common Issues
361
-
362
- #### 1. No Results Found
363
- **Problem:** Search returns no papers
364
- **Solution:**
365
- - Check spelling and use broader terms
366
- - Try different keyword combinations
367
- - Verify internet connection
368
-
369
- #### 2. Download Failed
370
- **Problem:** Paper download fails
371
- **Solution:**
372
- - Verify paper ID is correct
373
- - Check if paper exists on arXiv
374
- - Ensure write permissions in output directory
375
-
376
- #### 3. API Rate Limiting
377
- **Problem:** Too many requests
378
- **Solution:**
379
- - Wait between requests
380
- - Reduce batch size
381
- - Use interactive mode for multiple searches
382
-
383
- #### 4. XML Parsing Errors
384
- **Problem:** Error parsing search results
385
- **Solution:**
386
- - Check internet connection
387
- - Verify API response format
388
- - Update the script if needed
389
-
390
- #### 5. Filename Too Long
391
- **Problem:** Generated filename exceeds system limits
392
- **Solution:**
393
- - The tool automatically limits filenames to 100 characters
394
- - Special characters are automatically cleaned
395
- - Fallback to paper ID if title is unavailable
396
-
397
- ### Error Messages
398
-
399
- ```
400
- Error: Failed to download paper 2502.05218v1
401
- ```
402
- - Paper ID may not exist
403
- - Network connection issue
404
- - arXiv server problem
405
-
406
- ```
407
- Error parsing XML: ...
408
- ```
409
- - Malformed API response
410
- - Network interruption
411
- - API format change
412
-
413
- ```
414
- Could not find paper information for 2502.05218v1
415
- ```
416
- - Paper ID may be invalid
417
- - arXiv API issue
418
- - Network connectivity problem
419
-
420
- ## 📖 API Reference
421
-
422
- ### arXiv API Endpoints
423
- - **Search API**: `http://export.arxiv.org/api/query`
424
- - **Metadata API**: `http://export.arxiv.org/api/query?id_list={paper_id}`
425
- - **Documentation**: https://arxiv.org/help/api
426
- - **Rate Limits**: Be respectful, avoid excessive requests
427
-
428
- ### Data Fields Available
429
- - **Title**: Paper title
430
- - **Authors**: List of author names
431
- - **Abstract**: Paper abstract
432
- - **Paper ID**: Unique arXiv identifier
433
- - **Published Date**: Publication timestamp
434
- - **Categories**: arXiv subject categories
435
-
436
- ### Paper ID Format
437
- - **Format**: `YYMM.NNNNNvN`
438
- - **Example**: `2502.05218v1`
439
- - **Download URL**: `https://arxiv.org/pdf/{paper_id}.pdf`
440
-
441
- ### Smart Download Features
442
- - **Automatic Metadata Fetching**: Gets paper information before download
443
- - **Intelligent Filename Generation**: Converts paper titles to valid filenames
444
- - **Character Cleaning**: Removes special characters and spaces
445
- - **Length Limiting**: Ensures filenames don't exceed system limits
446
- - **Fallback Naming**: Uses paper ID if title is unavailable
447
-
448
- ## 🤝 Contributing
449
-
450
- ### Adding New Features
451
- 1. Fork the repository
452
- 2. Create a feature branch
453
- 3. Implement your changes
454
- 4. Add tests and documentation
455
- 5. Submit a pull request
456
-
457
- ### Reporting Issues
458
- - Check existing issues first
459
- - Provide detailed error messages
460
- - Include system information
461
- - Describe steps to reproduce
462
-
463
- ## 📄 License
464
-
465
- This project is open source and available under the MIT License.
466
-
467
- ## 🙏 Acknowledgments
468
-
469
- - **arXiv**: For providing the public API
470
- - **Python Community**: For excellent libraries and tools
471
- - **Researchers**: For contributing to open science
472
-
473
- ## 📞 Support
474
-
475
- ### Getting Help
476
- - Check this documentation first
477
- - Review the examples section
478
- - Search existing issues
479
- - Create a new issue for bugs
480
-
481
- ### Useful Links
482
- - [arXiv Official Site](https://arxiv.org/)
483
- - [arXiv API Documentation](https://arxiv.org/help/api)
484
- - [Python Requests Library](https://requests.readthedocs.io/)
485
-
486
- ---
487
-
488
- **Happy Researching! 🎓📚**
489
-
490
- *This tool makes academic research more accessible and efficient. Use it responsibly and respect arXiv's terms of service.*
@@ -1,229 +0,0 @@
1
- import requests
2
- import xml.etree.ElementTree as ET
3
- import os
4
- import sys
5
- import argparse
6
-
7
- def search_arxiv(query, max_results=10):
8
- """Search arXiv for papers"""
9
- base_url = "http://export.arxiv.org/api/query"
10
- params = {
11
- 'search_query': query,
12
- 'start': 0,
13
- 'max_results': max_results
14
- }
15
-
16
- response = requests.get(base_url, params=params)
17
- return response.text
18
-
19
- def get_paper_metadata(paper_id):
20
- """Get paper metadata directly from arXiv API"""
21
- try:
22
- # Use the arXiv API to get paper metadata
23
- metadata_url = f"http://export.arxiv.org/api/query?id_list={paper_id}"
24
- response = requests.get(metadata_url)
25
-
26
- if response.status_code == 200:
27
- papers = parse_search_results(response.text)
28
- if papers and len(papers) > 0:
29
- return papers[0]
30
- return None
31
- except Exception as e:
32
- print(f"Error fetching paper metadata: {e}")
33
- return None
34
-
35
- def download_paper(paper_id, output_dir=".", paper_title=None):
36
- """Download a paper by its ID and rename it to the paper title"""
37
- pdf_url = f"https://arxiv.org/pdf/{paper_id}.pdf"
38
- response = requests.get(pdf_url)
39
-
40
- if response.status_code == 200:
41
- # Create filename from paper title if available, otherwise use paper ID
42
- if paper_title:
43
- # Clean the title for filename (remove special characters, limit length)
44
- clean_title = "".join(c for c in paper_title if c.isalnum() or c in (' ', '-', '_')).rstrip()
45
- clean_title = clean_title.replace(' ', '_')[:100] # Limit length to 100 chars
46
- filename = f"{clean_title}.pdf"
47
- else:
48
- filename = f"{paper_id}.pdf"
49
-
50
- filepath = os.path.join(output_dir, filename)
51
-
52
- with open(filepath, 'wb') as f:
53
- f.write(response.content)
54
- print(f"Downloaded: {filepath}")
55
- return filepath
56
- else:
57
- print(f"Failed to download paper {paper_id}")
58
- return None
59
-
60
- def parse_search_results(xml_content):
61
- """Parse XML search results and extract paper information"""
62
- try:
63
- root = ET.fromstring(xml_content)
64
- papers = []
65
-
66
- # Find all entry elements
67
- for entry in root.findall('.//{http://www.w3.org/2005/Atom}entry'):
68
- paper_info = {}
69
-
70
- # Extract title
71
- title_elem = entry.find('.//{http://www.w3.org/2005/Atom}title')
72
- if title_elem is not None:
73
- paper_info['title'] = title_elem.text.strip()
74
-
75
- # Extract authors
76
- authors = []
77
- for author in entry.findall('.//{http://www.w3.org/2005/Atom}author'):
78
- name_elem = author.find('.//{http://www.w3.org/2005/Atom}name')
79
- if name_elem is not None:
80
- authors.append(name_elem.text.strip())
81
- paper_info['authors'] = authors
82
-
83
- # Extract abstract
84
- summary_elem = entry.find('.//{http://www.w3.org/2005/Atom}summary')
85
- if summary_elem is not None:
86
- paper_info['abstract'] = summary_elem.text.strip()
87
-
88
- # Extract paper ID from the id field
89
- id_elem = entry.find('.//{http://www.w3.org/2005/Atom}id')
90
- if id_elem is not None:
91
- # Extract ID from URL like "http://arxiv.org/abs/2103.12345"
92
- paper_id = id_elem.text.split('/')[-1]
93
- paper_info['paper_id'] = paper_id
94
-
95
- # Extract published date
96
- published_elem = entry.find('.//{http://www.w3.org/2005/Atom}published')
97
- if published_elem is not None:
98
- paper_info['published'] = published_elem.text.strip()
99
-
100
- papers.append(paper_info)
101
-
102
- return papers
103
- except ET.ParseError as e:
104
- print(f"Error parsing XML: {e}")
105
- return []
106
-
107
- def search_and_download(query, max_results=5, download_first=False):
108
- """Search for papers and optionally download the first result"""
109
- print(f"Searching arXiv for: '{query}'")
110
- print("-" * 50)
111
-
112
- # Search for papers
113
- results = search_arxiv(query, max_results)
114
- papers = parse_search_results(results)
115
-
116
- if not papers:
117
- print("No papers found.")
118
- return
119
-
120
- # Display search results
121
- print(f"Found {len(papers)} papers:\n")
122
- for i, paper in enumerate(papers, 1):
123
- print(f"{i}. Title: {paper.get('title', 'N/A')}")
124
- print(f" Authors: {', '.join(paper.get('authors', ['N/A']))}")
125
- print(f" Paper ID: {paper.get('paper_id', 'N/A')}")
126
- print(f" Published: {paper.get('published', 'N/A')}")
127
- print(f" Abstract: {paper.get('abstract', 'N/A')[:200]}...")
128
- print()
129
-
130
- # Optionally download first paper
131
- if download_first and papers:
132
- first_paper = papers[0]
133
- paper_id = first_paper.get('paper_id')
134
- paper_title = first_paper.get('title')
135
- if paper_id:
136
- print(f"Downloading first paper: {paper_id}")
137
- download_paper(paper_id, paper_title=paper_title)
138
- else:
139
- print("Could not extract paper ID for download")
140
-
141
- def interactive_mode():
142
- """Interactive mode for searching arXiv"""
143
- print("🔍 arXiv Paper Search Tool")
144
- print("=" * 40)
145
- print("Commands:")
146
- print(" search <query> [max_results] - Search for papers")
147
- print(" download <paper_id> - Download a specific paper")
148
- print(" help - Show this help message")
149
- print(" quit/exit - Exit the program")
150
- print()
151
-
152
- while True:
153
- try:
154
- command = input("📚 arxiv> ").strip()
155
-
156
- if not command:
157
- continue
158
-
159
- parts = command.split()
160
- cmd = parts[0].lower()
161
-
162
- if cmd in ['quit', 'exit', 'q']:
163
- print("Goodbye! 👋")
164
- break
165
-
166
- elif cmd == 'help':
167
- print("Commands:")
168
- print(" search <query> [max_results] - Search for papers")
169
- print(" download <paper_id> - Download a specific paper")
170
- print(" help - Show this help message")
171
- print(" quit/exit - Exit the program")
172
- print()
173
-
174
- elif cmd == 'search':
175
- if len(parts) < 2:
176
- print("Usage: search <query> [max_results]")
177
- continue
178
-
179
- query = ' '.join(parts[1:-1]) if len(parts) > 2 else parts[1]
180
- max_results = int(parts[-1]) if len(parts) > 2 and parts[-1].isdigit() else 5
181
-
182
- search_and_download(query, max_results, download_first=False)
183
-
184
- elif cmd == 'download':
185
- if len(parts) < 2:
186
- print("Usage: download <paper_id>")
187
- continue
188
-
189
- paper_id = parts[1]
190
- # Get paper metadata first
191
- print(f"Fetching paper information for {paper_id}...")
192
- paper_info = get_paper_metadata(paper_id)
193
-
194
- if paper_info and paper_info.get('title'):
195
- paper_title = paper_info['title']
196
- print(f"Found paper: {paper_title}")
197
- download_paper(paper_id, paper_title=paper_title)
198
- else:
199
- print(f"Could not find paper information for {paper_id}")
200
- print("Downloading with paper ID as filename...")
201
- download_paper(paper_id)
202
-
203
- else:
204
- print(f"Unknown command: {cmd}")
205
- print("Type 'help' for available commands")
206
-
207
- except KeyboardInterrupt:
208
- print("\nGoodbye! 👋")
209
- break
210
- except Exception as e:
211
- print(f"Error: {e}")
212
-
213
- # Example usage
214
- if __name__ == "__main__":
215
- parser = argparse.ArgumentParser(description='Search and download papers from arXiv')
216
- parser.add_argument('query', nargs='?', help='Search query')
217
- parser.add_argument('-n', '--max_results', type=int, default=5, help='Maximum number of results (default: 5)')
218
- parser.add_argument('-d', '--download', action='store_true', help='Download the first result')
219
- parser.add_argument('-i', '--interactive', action='store_true', help='Start interactive mode')
220
-
221
- args = parser.parse_args()
222
-
223
- if args.interactive:
224
- interactive_mode()
225
- elif args.query:
226
- search_and_download(args.query, args.max_results, args.download)
227
- else:
228
- # Default behavior - start interactive mode
229
- interactive_mode()