nber-cli 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nber_cli-0.1.0 → nber_cli-0.1.2}/PKG-INFO +2 -1
- {nber_cli-0.1.0 → nber_cli-0.1.2}/pyproject.toml +3 -4
- {nber_cli-0.1.0 → nber_cli-0.1.2}/src/nber_cli/downloader.py +14 -7
- {nber_cli-0.1.0 → nber_cli-0.1.2}/uv.lock +12 -1
- {nber_cli-0.1.0 → nber_cli-0.1.2}/.github/workflows/publish.yml +0 -0
- {nber_cli-0.1.0 → nber_cli-0.1.2}/.gitignore +0 -0
- {nber_cli-0.1.0 → nber_cli-0.1.2}/.python-version +0 -0
- {nber_cli-0.1.0 → nber_cli-0.1.2}/LICENSE +0 -0
- {nber_cli-0.1.0 → nber_cli-0.1.2}/README.md +0 -0
- {nber_cli-0.1.0 → nber_cli-0.1.2}/src/nber_cli/__init__.py +0 -0
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nber-cli
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: A command-line tool to download NBER papers.
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.11
|
|
7
7
|
Requires-Dist: aiohttp
|
|
8
8
|
Requires-Dist: aiohttp-retry
|
|
9
9
|
Requires-Dist: aiosqlite
|
|
10
|
+
Requires-Dist: certifi
|
|
10
11
|
Requires-Dist: fake-useragent
|
|
11
12
|
Requires-Dist: orjson
|
|
12
13
|
Description-Content-Type: text/markdown
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "nber-cli"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.2"
|
|
4
4
|
description = "A command-line tool to download NBER papers."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.11"
|
|
@@ -9,7 +9,8 @@ dependencies = [
|
|
|
9
9
|
"aiosqlite",
|
|
10
10
|
"orjson",
|
|
11
11
|
"fake_useragent",
|
|
12
|
-
"aiohttp-retry"
|
|
12
|
+
"aiohttp-retry",
|
|
13
|
+
"certifi"
|
|
13
14
|
]
|
|
14
15
|
|
|
15
16
|
[project.scripts]
|
|
@@ -18,5 +19,3 @@ nber-cli = "nber_cli:main"
|
|
|
18
19
|
[build-system]
|
|
19
20
|
requires = ["hatchling"]
|
|
20
21
|
build-backend = "hatchling.build"
|
|
21
|
-
|
|
22
|
-
|
|
@@ -13,10 +13,12 @@ import aiosqlite
|
|
|
13
13
|
import os
|
|
14
14
|
import time
|
|
15
15
|
import random
|
|
16
|
-
import orjson
|
|
16
|
+
import orjson # 比json更快的JSON库
|
|
17
17
|
from fake_useragent import UserAgent
|
|
18
18
|
from typing import List, Set, Dict, Any
|
|
19
19
|
import logging
|
|
20
|
+
import ssl
|
|
21
|
+
import certifi
|
|
20
22
|
from aiohttp_retry import RetryClient, ExponentialRetry
|
|
21
23
|
import functools
|
|
22
24
|
import concurrent.futures
|
|
@@ -123,23 +125,28 @@ async def update_paper_state(paper_id: str, status: str):
|
|
|
123
125
|
|
|
124
126
|
async def download_paper(paper_id: str, save_path: str):
|
|
125
127
|
"""下载单个NBER论文"""
|
|
126
|
-
if await get_paper_state(paper_id) == 'ok':
|
|
127
|
-
logger.info(f"Skipping {paper_id}, already downloaded.")
|
|
128
|
-
return
|
|
129
|
-
|
|
130
|
-
url = f"https://www.nber.org/papers/{paper_id}.pdf"
|
|
131
128
|
filepath = os.path.join(save_path, f"{paper_id}.pdf")
|
|
132
129
|
|
|
133
130
|
# 确保保存路径存在
|
|
134
131
|
os.makedirs(save_path, exist_ok=True)
|
|
135
132
|
|
|
133
|
+
state = await get_paper_state(paper_id)
|
|
134
|
+
if state == 'ok' and os.path.exists(filepath):
|
|
135
|
+
logger.info(f"Skipping {paper_id}, already downloaded.")
|
|
136
|
+
return
|
|
137
|
+
if state == 'ok' and not os.path.exists(filepath):
|
|
138
|
+
logger.info(f"{paper_id} marked as downloaded but file missing, re-downloading.")
|
|
139
|
+
|
|
140
|
+
url = f"https://www.nber.org/papers/{paper_id}.pdf"
|
|
141
|
+
|
|
136
142
|
ua = UserAgent()
|
|
137
143
|
headers = {'User-Agent': ua.random}
|
|
138
144
|
retry_options = ExponentialRetry(attempts=MAX_RETRIES)
|
|
145
|
+
ssl_context = ssl.create_default_context(cafile=certifi.where())
|
|
139
146
|
|
|
140
147
|
try:
|
|
141
148
|
async with RetryClient(retry_options=retry_options, headers=headers) as session:
|
|
142
|
-
async with session.get(url, timeout=30) as response:
|
|
149
|
+
async with session.get(url, timeout=30, ssl=ssl_context) as response:
|
|
143
150
|
if response.status == 200:
|
|
144
151
|
content = await response.read()
|
|
145
152
|
with open(filepath, 'wb') as f:
|
|
@@ -124,6 +124,15 @@ wheels = [
|
|
|
124
124
|
{ url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" },
|
|
125
125
|
]
|
|
126
126
|
|
|
127
|
+
[[package]]
|
|
128
|
+
name = "certifi"
|
|
129
|
+
version = "2025.6.15"
|
|
130
|
+
source = { registry = "https://pypi.org/simple" }
|
|
131
|
+
sdist = { url = "https://files.pythonhosted.org/packages/73/f7/f14b46d4bcd21092d7d3ccef689615220d8a08fb25e564b65d20738e672e/certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b", size = 158753, upload-time = "2025-06-15T02:45:51.329Z" }
|
|
132
|
+
wheels = [
|
|
133
|
+
{ url = "https://files.pythonhosted.org/packages/84/ae/320161bd181fc06471eed047ecce67b693fd7515b16d495d8932db763426/certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057", size = 157650, upload-time = "2025-06-15T02:45:49.977Z" },
|
|
134
|
+
]
|
|
135
|
+
|
|
127
136
|
[[package]]
|
|
128
137
|
name = "fake-useragent"
|
|
129
138
|
version = "2.2.0"
|
|
@@ -302,12 +311,13 @@ wheels = [
|
|
|
302
311
|
|
|
303
312
|
[[package]]
|
|
304
313
|
name = "nber-cli"
|
|
305
|
-
version = "0.1.
|
|
314
|
+
version = "0.1.2"
|
|
306
315
|
source = { editable = "." }
|
|
307
316
|
dependencies = [
|
|
308
317
|
{ name = "aiohttp" },
|
|
309
318
|
{ name = "aiohttp-retry" },
|
|
310
319
|
{ name = "aiosqlite" },
|
|
320
|
+
{ name = "certifi" },
|
|
311
321
|
{ name = "fake-useragent" },
|
|
312
322
|
{ name = "orjson" },
|
|
313
323
|
]
|
|
@@ -317,6 +327,7 @@ requires-dist = [
|
|
|
317
327
|
{ name = "aiohttp" },
|
|
318
328
|
{ name = "aiohttp-retry" },
|
|
319
329
|
{ name = "aiosqlite" },
|
|
330
|
+
{ name = "certifi" },
|
|
320
331
|
{ name = "fake-useragent" },
|
|
321
332
|
{ name = "orjson" },
|
|
322
333
|
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|