arxiv-pulse 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,5 +29,5 @@ try:
29
29
 
30
30
  except importlib.metadata.PackageNotFoundError:
31
31
  # 包未安装时使用默认版本
32
- __version__ = "0.6.0"
33
- __version_info__ = (0, 6, 0)
32
+ __version__ = "0.6.1"
33
+ __version_info__ = (0, 6, 1)
arxiv_pulse/cli.py CHANGED
@@ -811,10 +811,10 @@ def interactive_configuration():
811
811
  click.echo("\n📊 爬虫配置")
812
812
  click.echo("-" * 40)
813
813
 
814
- max_results_initial = click.prompt("初始同步每个查询的最大论文数", default=100, type=int, show_default=True)
814
+ max_results_initial = click.prompt("初始同步每个查询的最大论文数", default=10000, type=int, show_default=True)
815
815
  config["MAX_RESULTS_INITIAL"] = str(max_results_initial)
816
816
 
817
- max_results_daily = click.prompt("每日同步每个查询的最大论文数", default=20, type=int, show_default=True)
817
+ max_results_daily = click.prompt("每日同步每个查询的最大论文数", default=500, type=int, show_default=True)
818
818
  config["MAX_RESULTS_DAILY"] = str(max_results_daily)
819
819
 
820
820
  years_back = click.prompt("初始同步回溯的年数", default=5, type=int, show_default=True)
@@ -899,27 +899,24 @@ def interactive_configuration():
899
899
  click.echo("-" * 40)
900
900
 
901
901
  # 根据领域数量提供建议
902
- recommended_initial = 100
903
- recommended_daily = 20
904
-
905
- if num_selected_fields <= 3:
902
+ if num_selected_fields <= 6:
906
903
  click.echo("✅ 您选择了少量领域,保持默认配置即可。")
907
- elif num_selected_fields <= 6:
908
- recommended_initial = 70
909
- recommended_daily = 15
904
+ elif num_selected_fields <= 10:
905
+ recommended_initial = 4000
906
+ recommended_daily = 200
910
907
  click.echo(f"⚠️ 您选择了中等数量领域,建议调整爬虫配置以避免过多论文:")
911
- click.echo(f" - 初始同步每个查询最大论文数: {recommended_initial} (原默认: 100)")
912
- click.echo(f" - 每日同步每个查询最大论文数: {recommended_daily} (原默认: 20)")
908
+ click.echo(f" - 初始同步每个查询最大论文数: {recommended_initial}")
909
+ click.echo(f" - 每日同步每个查询最大论文数: {recommended_daily}")
913
910
  else:
914
- recommended_initial = 50
915
- recommended_daily = 10
911
+ recommended_initial = 1000
912
+ recommended_daily = 50
916
913
  click.echo(f"⚠️ 您选择了大量领域 ({num_selected_fields}个),强烈建议调整爬虫配置:")
917
- click.echo(f" - 初始同步每个查询最大论文数: {recommended_initial} (原默认: 100)")
918
- click.echo(f" - 每日同步每个查询最大论文数: {recommended_daily} (原默认: 20)")
914
+ click.echo(f" - 初始同步每个查询最大论文数: {recommended_initial}")
915
+ click.echo(f" - 每日同步每个查询最大论文数: {recommended_daily}")
919
916
  click.echo(f" - 注意:同步大量领域可能需要较长时间和更多存储空间。")
920
917
 
921
918
  # 询问用户是否应用建议
922
- if num_selected_fields > 3:
919
+ if num_selected_fields > 6:
923
920
  if click.confirm("\n💡 是否应用上述建议调整爬虫配置?", default=True):
924
921
  config["MAX_RESULTS_INITIAL"] = str(recommended_initial)
925
922
  config["MAX_RESULTS_DAILY"] = str(recommended_daily)
@@ -966,58 +963,77 @@ def init(directory, years_back):
966
963
  if years_back is None:
967
964
  years_back = interactive_years_back
968
965
 
969
- # 生成 .env 文件内容
970
- env_content = f"""# arXiv Pulse 配置文件
971
- # 由交互式配置向导于 {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} 生成
972
-
973
- # ========================
974
- # AI API 配置 (支持 OpenAI 格式)
975
- # ========================
976
- AI_API_KEY={config.get("AI_API_KEY", "your_api_key_here")}
977
- AI_MODEL={config.get("AI_MODEL", "DeepSeek-V3.2-Thinking")}
978
- AI_BASE_URL={config.get("AI_BASE_URL", "https://llmapi.paratera.com")}
979
-
980
- # ========================
981
- # 数据库配置
982
- # ========================
983
- DATABASE_URL=sqlite:///data/arxiv_papers.db
984
-
985
- # ========================
986
- # 爬虫配置
987
- # ========================
988
- MAX_RESULTS_INITIAL={config.get("MAX_RESULTS_INITIAL", "100")} # init命令每个查询的论文数
989
- MAX_RESULTS_DAILY={config.get("MAX_RESULTS_DAILY", "20")} # sync命令每个查询的论文数
990
-
991
- # ========================
992
- # 搜索查询配置
993
- # ========================
994
- # 分号分隔,允许查询中包含逗号
995
- # 根据您的选择生成的研究领域查询
996
- SEARCH_QUERIES={config.get("SEARCH_QUERIES", 'condensed matter physics AND cat:cond-mat.*; (ti:"density functional" OR abs:"density functional") AND (cat:physics.comp-ph OR cat:cond-mat.mtrl-sci OR cat:physics.chem-ph); (ti:"machine learning" OR abs:"machine learning") AND (cat:physics.comp-ph OR cat:cond-mat.mtrl-sci OR cat:physics.chem-ph)')}
997
-
998
- # ========================
999
- # 报告配置
1000
- # ========================
1001
- REPORT_DIR=reports
1002
- SUMMARY_MAX_TOKENS=2000 # 总结和翻译的最大token数
1003
- TOKEN_PRICE_PER_MILLION=3.0
1004
- REPORT_MAX_PAPERS={config.get("REPORT_MAX_PAPERS", "50")}
1005
-
1006
- # ========================
1007
- # 同步配置
1008
- # ========================
1009
- YEARS_BACK={config.get("YEARS_BACK", "3")} # 同步回溯的年数
1010
- IMPORTANT_PAPERS_FILE=important_papers.txt
1011
-
1012
- # ========================
1013
- # 可选配置
1014
- # ========================
1015
- # 日志级别: DEBUG, INFO, WARNING, ERROR (默认: INFO)
1016
- LOG_LEVEL=INFO
1017
-
1018
- # 爬虫延迟(秒,避免频繁请求 arXiv API)
1019
- CRAWL_DELAY=1.0
1020
- """
966
+ # 读取 .ENV.TEMPLATE 文件作为基础模板
967
+ template_file = Path(__file__).parent / ".ENV.TEMPLATE"
968
+ if not template_file.exists():
969
+ click.echo(f"❌ 找不到模板文件: {template_file}")
970
+ click.echo("请确保 .ENV.TEMPLATE 文件存在于 arxiv_pulse 目录中")
971
+ return
972
+
973
+ env_content = template_file.read_text(encoding="utf-8")
974
+
975
+ # 添加生成时间戳注释(插入到第一行之后)
976
+ timestamp_comment = f"# 由交互式配置向导于 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} 生成\n"
977
+ lines = env_content.split("\n")
978
+ if lines and lines[0].startswith("#"):
979
+ # 在第一行注释后插入时间戳
980
+ lines.insert(1, timestamp_comment)
981
+ else:
982
+ # 如果没有注释行,添加到开头
983
+ lines.insert(0, timestamp_comment)
984
+ env_content = "\n".join(lines)
985
+
986
+ # 替换配置项(使用 config 字典中的值)
987
+ # 统一替换逻辑:搜索以键名开头的行,替换整行
988
+ lines = env_content.split("\n")
989
+
990
+ # AI API 配置
991
+ for i, line in enumerate(lines):
992
+ if line.strip().startswith("AI_API_KEY="):
993
+ lines[i] = f"AI_API_KEY={config.get('AI_API_KEY', 'your_api_key_here')}"
994
+ break
995
+
996
+ for i, line in enumerate(lines):
997
+ if line.strip().startswith("AI_MODEL="):
998
+ lines[i] = f"AI_MODEL={config.get('AI_MODEL', 'DeepSeek-V3.2-Thinking')}"
999
+ break
1000
+
1001
+ for i, line in enumerate(lines):
1002
+ if line.strip().startswith("AI_BASE_URL="):
1003
+ lines[i] = f"AI_BASE_URL={config.get('AI_BASE_URL', 'https://llmapi.paratera.com')}"
1004
+ break
1005
+
1006
+ # 爬虫配置
1007
+ for i, line in enumerate(lines):
1008
+ if line.strip().startswith("MAX_RESULTS_INITIAL="):
1009
+ lines[i] = f"MAX_RESULTS_INITIAL={config.get('MAX_RESULTS_INITIAL', '10000')}"
1010
+ break
1011
+
1012
+ for i, line in enumerate(lines):
1013
+ if line.strip().startswith("MAX_RESULTS_DAILY="):
1014
+ lines[i] = f"MAX_RESULTS_DAILY={config.get('MAX_RESULTS_DAILY', '500')}"
1015
+ break
1016
+
1017
+ # 搜索查询配置
1018
+ default_search_queries = 'condensed matter physics AND cat:cond-mat.*; (ti:"density functional" OR abs:"density functional") AND (cat:physics.comp-ph OR cat:cond-mat.mtrl-sci OR cat:physics.chem-ph); (ti:"machine learning" OR abs:"machine learning") AND (cat:physics.comp-ph OR cat:cond-mat.mtrl-sci OR cat:physics.chem-ph)'
1019
+ for i, line in enumerate(lines):
1020
+ if line.strip().startswith("SEARCH_QUERIES="):
1021
+ lines[i] = f"SEARCH_QUERIES={config.get('SEARCH_QUERIES', default_search_queries)}"
1022
+ break
1023
+
1024
+ # 报告配置
1025
+ for i, line in enumerate(lines):
1026
+ if line.strip().startswith("REPORT_MAX_PAPERS="):
1027
+ lines[i] = f"REPORT_MAX_PAPERS={config.get('REPORT_MAX_PAPERS', '50')}"
1028
+ break
1029
+
1030
+ # 同步配置
1031
+ for i, line in enumerate(lines):
1032
+ if line.strip().startswith("YEARS_BACK="):
1033
+ lines[i] = f"YEARS_BACK={config.get('YEARS_BACK', '5')}"
1034
+ break
1035
+
1036
+ env_content = "\n".join(lines)
1021
1037
 
1022
1038
  env_file.write_text(env_content)
1023
1039
  click.echo(f"\n✅ 已在 {directory} 创建 .env 配置文件")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arxiv-pulse
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: An intelligent arXiv literature crawler and analyzer for physics research
5
5
  Author-email: Yang Li <lyang.1915@gmail.com>
6
6
  License-Expression: GPL-3.0-or-later
@@ -1,17 +1,17 @@
1
1
  arxiv_pulse/.ENV.TEMPLATE,sha256=-dG9dl1Cnb7Xjq30HZ7v4__ArtPewdjDtzgpJmBNjYE,5525
2
2
  arxiv_pulse/__init__.py,sha256=BBNwoE-pmiSqc3_X_Sr3Ao9bceygkhsDof1QuXCEfHs,608
3
- arxiv_pulse/__version__.py,sha256=2i17GI0eh-9H1KYRHgcksotLUY2PwD7DSzuEswUbnXg,961
3
+ arxiv_pulse/__version__.py,sha256=q6GkywvRWb4klaJxfZzJnixMfPZhLCV4pKiL_QrBX_Q,961
4
4
  arxiv_pulse/arxiv_crawler.py,sha256=krUY_SCk2bFl9QidDbHt_qEMfO__KK_lDF_QekhDCSc,16623
5
- arxiv_pulse/cli.py,sha256=N6molcFwe4gRlYXQ8r1a3TnWEdX8mejT0ea3RZVLcNc,55994
5
+ arxiv_pulse/cli.py,sha256=-XvHVOZAszLxwjxwMzmI84eIHaPR4IQ91oaWYnfJiDk,57158
6
6
  arxiv_pulse/config.py,sha256=Hcb6zcEjkO0y6QZ0AhqV2YAnKRnM3E6fNIyuGaXL0V0,2503
7
7
  arxiv_pulse/models.py,sha256=pvdd_bfDJcqenVNoCmdnU2049zucA0H8ERHqDa88MbI,10112
8
8
  arxiv_pulse/output_manager.py,sha256=WqYSGWX7cEzqPKCGCp6zevX99D7TYVcWJYTWL_za2XU,7139
9
9
  arxiv_pulse/report_generator.py,sha256=O6AmlHPeLI_0hyFo-5B0HpFP3P287ZfzGk4y_2mFziA,30333
10
10
  arxiv_pulse/search_engine.py,sha256=mEC63uOEawr3-I9zTscbbm2e_opaYkyf4-ihkFK5Who,16045
11
11
  arxiv_pulse/summarizer.py,sha256=60JgAR2bdwmc0V8lou8OVuEmZXWYbfbI-P4EbEdT5D0,12853
12
- arxiv_pulse-0.6.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
13
- arxiv_pulse-0.6.0.dist-info/METADATA,sha256=DGRCVAMQ8H0TD3BTrX0x5jS8DQDZmAb9n7faRkXKO6g,18493
14
- arxiv_pulse-0.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
15
- arxiv_pulse-0.6.0.dist-info/entry_points.txt,sha256=ld4XAcWJDyHM1i5RKs0r22LdzhgFcNilNoU1dbf5r6E,46
16
- arxiv_pulse-0.6.0.dist-info/top_level.txt,sha256=CKAnBbV76S-15CP-m3rfia1VNSdZOocr5MhVxXzH-Vw,12
17
- arxiv_pulse-0.6.0.dist-info/RECORD,,
12
+ arxiv_pulse-0.6.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
13
+ arxiv_pulse-0.6.1.dist-info/METADATA,sha256=mPJ3M01dKRsGBqHPql8ZCDqgJvqC7LJ28JKzF05wsV8,18493
14
+ arxiv_pulse-0.6.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
15
+ arxiv_pulse-0.6.1.dist-info/entry_points.txt,sha256=ld4XAcWJDyHM1i5RKs0r22LdzhgFcNilNoU1dbf5r6E,46
16
+ arxiv_pulse-0.6.1.dist-info/top_level.txt,sha256=CKAnBbV76S-15CP-m3rfia1VNSdZOocr5MhVxXzH-Vw,12
17
+ arxiv_pulse-0.6.1.dist-info/RECORD,,