arxiv-pulse 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arxiv_pulse/__version__.py +2 -2
- arxiv_pulse/cli.py +84 -68
- {arxiv_pulse-0.6.0.dist-info → arxiv_pulse-0.6.1.dist-info}/METADATA +1 -1
- {arxiv_pulse-0.6.0.dist-info → arxiv_pulse-0.6.1.dist-info}/RECORD +8 -8
- {arxiv_pulse-0.6.0.dist-info → arxiv_pulse-0.6.1.dist-info}/WHEEL +0 -0
- {arxiv_pulse-0.6.0.dist-info → arxiv_pulse-0.6.1.dist-info}/entry_points.txt +0 -0
- {arxiv_pulse-0.6.0.dist-info → arxiv_pulse-0.6.1.dist-info}/licenses/LICENSE +0 -0
- {arxiv_pulse-0.6.0.dist-info → arxiv_pulse-0.6.1.dist-info}/top_level.txt +0 -0
arxiv_pulse/__version__.py
CHANGED
arxiv_pulse/cli.py
CHANGED
|
@@ -811,10 +811,10 @@ def interactive_configuration():
|
|
|
811
811
|
click.echo("\n📊 爬虫配置")
|
|
812
812
|
click.echo("-" * 40)
|
|
813
813
|
|
|
814
|
-
max_results_initial = click.prompt("初始同步每个查询的最大论文数", default=
|
|
814
|
+
max_results_initial = click.prompt("初始同步每个查询的最大论文数", default=10000, type=int, show_default=True)
|
|
815
815
|
config["MAX_RESULTS_INITIAL"] = str(max_results_initial)
|
|
816
816
|
|
|
817
|
-
max_results_daily = click.prompt("每日同步每个查询的最大论文数", default=
|
|
817
|
+
max_results_daily = click.prompt("每日同步每个查询的最大论文数", default=500, type=int, show_default=True)
|
|
818
818
|
config["MAX_RESULTS_DAILY"] = str(max_results_daily)
|
|
819
819
|
|
|
820
820
|
years_back = click.prompt("初始同步回溯的年数", default=5, type=int, show_default=True)
|
|
@@ -899,27 +899,24 @@ def interactive_configuration():
|
|
|
899
899
|
click.echo("-" * 40)
|
|
900
900
|
|
|
901
901
|
# 根据领域数量提供建议
|
|
902
|
-
|
|
903
|
-
recommended_daily = 20
|
|
904
|
-
|
|
905
|
-
if num_selected_fields <= 3:
|
|
902
|
+
if num_selected_fields <= 6:
|
|
906
903
|
click.echo("✅ 您选择了少量领域,保持默认配置即可。")
|
|
907
|
-
elif num_selected_fields <=
|
|
908
|
-
recommended_initial =
|
|
909
|
-
recommended_daily =
|
|
904
|
+
elif num_selected_fields <= 10:
|
|
905
|
+
recommended_initial = 4000
|
|
906
|
+
recommended_daily = 200
|
|
910
907
|
click.echo(f"⚠️ 您选择了中等数量领域,建议调整爬虫配置以避免过多论文:")
|
|
911
|
-
click.echo(f" - 初始同步每个查询最大论文数: {recommended_initial}
|
|
912
|
-
click.echo(f" - 每日同步每个查询最大论文数: {recommended_daily}
|
|
908
|
+
click.echo(f" - 初始同步每个查询最大论文数: {recommended_initial}")
|
|
909
|
+
click.echo(f" - 每日同步每个查询最大论文数: {recommended_daily}")
|
|
913
910
|
else:
|
|
914
|
-
recommended_initial =
|
|
915
|
-
recommended_daily =
|
|
911
|
+
recommended_initial = 1000
|
|
912
|
+
recommended_daily = 50
|
|
916
913
|
click.echo(f"⚠️ 您选择了大量领域 ({num_selected_fields}个),强烈建议调整爬虫配置:")
|
|
917
|
-
click.echo(f" - 初始同步每个查询最大论文数: {recommended_initial}
|
|
918
|
-
click.echo(f" - 每日同步每个查询最大论文数: {recommended_daily}
|
|
914
|
+
click.echo(f" - 初始同步每个查询最大论文数: {recommended_initial}")
|
|
915
|
+
click.echo(f" - 每日同步每个查询最大论文数: {recommended_daily}")
|
|
919
916
|
click.echo(f" - 注意:同步大量领域可能需要较长时间和更多存储空间。")
|
|
920
917
|
|
|
921
918
|
# 询问用户是否应用建议
|
|
922
|
-
if num_selected_fields >
|
|
919
|
+
if num_selected_fields > 6:
|
|
923
920
|
if click.confirm("\n💡 是否应用上述建议调整爬虫配置?", default=True):
|
|
924
921
|
config["MAX_RESULTS_INITIAL"] = str(recommended_initial)
|
|
925
922
|
config["MAX_RESULTS_DAILY"] = str(recommended_daily)
|
|
@@ -966,58 +963,77 @@ def init(directory, years_back):
|
|
|
966
963
|
if years_back is None:
|
|
967
964
|
years_back = interactive_years_back
|
|
968
965
|
|
|
969
|
-
#
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
#
|
|
982
|
-
#
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
#
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
#
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
966
|
+
# 读取 .ENV.TEMPLATE 文件作为基础模板
|
|
967
|
+
template_file = Path(__file__).parent / ".ENV.TEMPLATE"
|
|
968
|
+
if not template_file.exists():
|
|
969
|
+
click.echo(f"❌ 找不到模板文件: {template_file}")
|
|
970
|
+
click.echo("请确保 .ENV.TEMPLATE 文件存在于 arxiv_pulse 目录中")
|
|
971
|
+
return
|
|
972
|
+
|
|
973
|
+
env_content = template_file.read_text(encoding="utf-8")
|
|
974
|
+
|
|
975
|
+
# 添加生成时间戳注释(插入到第一行之后)
|
|
976
|
+
timestamp_comment = f"# 由交互式配置向导于 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} 生成\n"
|
|
977
|
+
lines = env_content.split("\n")
|
|
978
|
+
if lines and lines[0].startswith("#"):
|
|
979
|
+
# 在第一行注释后插入时间戳
|
|
980
|
+
lines.insert(1, timestamp_comment)
|
|
981
|
+
else:
|
|
982
|
+
# 如果没有注释行,添加到开头
|
|
983
|
+
lines.insert(0, timestamp_comment)
|
|
984
|
+
env_content = "\n".join(lines)
|
|
985
|
+
|
|
986
|
+
# 替换配置项(使用 config 字典中的值)
|
|
987
|
+
# 统一替换逻辑:搜索以键名开头的行,替换整行
|
|
988
|
+
lines = env_content.split("\n")
|
|
989
|
+
|
|
990
|
+
# AI API 配置
|
|
991
|
+
for i, line in enumerate(lines):
|
|
992
|
+
if line.strip().startswith("AI_API_KEY="):
|
|
993
|
+
lines[i] = f"AI_API_KEY={config.get('AI_API_KEY', 'your_api_key_here')}"
|
|
994
|
+
break
|
|
995
|
+
|
|
996
|
+
for i, line in enumerate(lines):
|
|
997
|
+
if line.strip().startswith("AI_MODEL="):
|
|
998
|
+
lines[i] = f"AI_MODEL={config.get('AI_MODEL', 'DeepSeek-V3.2-Thinking')}"
|
|
999
|
+
break
|
|
1000
|
+
|
|
1001
|
+
for i, line in enumerate(lines):
|
|
1002
|
+
if line.strip().startswith("AI_BASE_URL="):
|
|
1003
|
+
lines[i] = f"AI_BASE_URL={config.get('AI_BASE_URL', 'https://llmapi.paratera.com')}"
|
|
1004
|
+
break
|
|
1005
|
+
|
|
1006
|
+
# 爬虫配置
|
|
1007
|
+
for i, line in enumerate(lines):
|
|
1008
|
+
if line.strip().startswith("MAX_RESULTS_INITIAL="):
|
|
1009
|
+
lines[i] = f"MAX_RESULTS_INITIAL={config.get('MAX_RESULTS_INITIAL', '10000')}"
|
|
1010
|
+
break
|
|
1011
|
+
|
|
1012
|
+
for i, line in enumerate(lines):
|
|
1013
|
+
if line.strip().startswith("MAX_RESULTS_DAILY="):
|
|
1014
|
+
lines[i] = f"MAX_RESULTS_DAILY={config.get('MAX_RESULTS_DAILY', '500')}"
|
|
1015
|
+
break
|
|
1016
|
+
|
|
1017
|
+
# 搜索查询配置
|
|
1018
|
+
default_search_queries = 'condensed matter physics AND cat:cond-mat.*; (ti:"density functional" OR abs:"density functional") AND (cat:physics.comp-ph OR cat:cond-mat.mtrl-sci OR cat:physics.chem-ph); (ti:"machine learning" OR abs:"machine learning") AND (cat:physics.comp-ph OR cat:cond-mat.mtrl-sci OR cat:physics.chem-ph)'
|
|
1019
|
+
for i, line in enumerate(lines):
|
|
1020
|
+
if line.strip().startswith("SEARCH_QUERIES="):
|
|
1021
|
+
lines[i] = f"SEARCH_QUERIES={config.get('SEARCH_QUERIES', default_search_queries)}"
|
|
1022
|
+
break
|
|
1023
|
+
|
|
1024
|
+
# 报告配置
|
|
1025
|
+
for i, line in enumerate(lines):
|
|
1026
|
+
if line.strip().startswith("REPORT_MAX_PAPERS="):
|
|
1027
|
+
lines[i] = f"REPORT_MAX_PAPERS={config.get('REPORT_MAX_PAPERS', '50')}"
|
|
1028
|
+
break
|
|
1029
|
+
|
|
1030
|
+
# 同步配置
|
|
1031
|
+
for i, line in enumerate(lines):
|
|
1032
|
+
if line.strip().startswith("YEARS_BACK="):
|
|
1033
|
+
lines[i] = f"YEARS_BACK={config.get('YEARS_BACK', '5')}"
|
|
1034
|
+
break
|
|
1035
|
+
|
|
1036
|
+
env_content = "\n".join(lines)
|
|
1021
1037
|
|
|
1022
1038
|
env_file.write_text(env_content)
|
|
1023
1039
|
click.echo(f"\n✅ 已在 {directory} 创建 .env 配置文件")
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
arxiv_pulse/.ENV.TEMPLATE,sha256=-dG9dl1Cnb7Xjq30HZ7v4__ArtPewdjDtzgpJmBNjYE,5525
|
|
2
2
|
arxiv_pulse/__init__.py,sha256=BBNwoE-pmiSqc3_X_Sr3Ao9bceygkhsDof1QuXCEfHs,608
|
|
3
|
-
arxiv_pulse/__version__.py,sha256=
|
|
3
|
+
arxiv_pulse/__version__.py,sha256=q6GkywvRWb4klaJxfZzJnixMfPZhLCV4pKiL_QrBX_Q,961
|
|
4
4
|
arxiv_pulse/arxiv_crawler.py,sha256=krUY_SCk2bFl9QidDbHt_qEMfO__KK_lDF_QekhDCSc,16623
|
|
5
|
-
arxiv_pulse/cli.py,sha256
|
|
5
|
+
arxiv_pulse/cli.py,sha256=-XvHVOZAszLxwjxwMzmI84eIHaPR4IQ91oaWYnfJiDk,57158
|
|
6
6
|
arxiv_pulse/config.py,sha256=Hcb6zcEjkO0y6QZ0AhqV2YAnKRnM3E6fNIyuGaXL0V0,2503
|
|
7
7
|
arxiv_pulse/models.py,sha256=pvdd_bfDJcqenVNoCmdnU2049zucA0H8ERHqDa88MbI,10112
|
|
8
8
|
arxiv_pulse/output_manager.py,sha256=WqYSGWX7cEzqPKCGCp6zevX99D7TYVcWJYTWL_za2XU,7139
|
|
9
9
|
arxiv_pulse/report_generator.py,sha256=O6AmlHPeLI_0hyFo-5B0HpFP3P287ZfzGk4y_2mFziA,30333
|
|
10
10
|
arxiv_pulse/search_engine.py,sha256=mEC63uOEawr3-I9zTscbbm2e_opaYkyf4-ihkFK5Who,16045
|
|
11
11
|
arxiv_pulse/summarizer.py,sha256=60JgAR2bdwmc0V8lou8OVuEmZXWYbfbI-P4EbEdT5D0,12853
|
|
12
|
-
arxiv_pulse-0.6.
|
|
13
|
-
arxiv_pulse-0.6.
|
|
14
|
-
arxiv_pulse-0.6.
|
|
15
|
-
arxiv_pulse-0.6.
|
|
16
|
-
arxiv_pulse-0.6.
|
|
17
|
-
arxiv_pulse-0.6.
|
|
12
|
+
arxiv_pulse-0.6.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
13
|
+
arxiv_pulse-0.6.1.dist-info/METADATA,sha256=mPJ3M01dKRsGBqHPql8ZCDqgJvqC7LJ28JKzF05wsV8,18493
|
|
14
|
+
arxiv_pulse-0.6.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
15
|
+
arxiv_pulse-0.6.1.dist-info/entry_points.txt,sha256=ld4XAcWJDyHM1i5RKs0r22LdzhgFcNilNoU1dbf5r6E,46
|
|
16
|
+
arxiv_pulse-0.6.1.dist-info/top_level.txt,sha256=CKAnBbV76S-15CP-m3rfia1VNSdZOocr5MhVxXzH-Vw,12
|
|
17
|
+
arxiv_pulse-0.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|