nber-cli 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nber-cli
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: A command-line tool to download NBER papers.
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.11
7
7
  Requires-Dist: aiohttp
8
8
  Requires-Dist: aiohttp-retry
9
9
  Requires-Dist: aiosqlite
10
+ Requires-Dist: certifi
10
11
  Requires-Dist: fake-useragent
11
12
  Requires-Dist: orjson
12
13
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nber-cli"
3
- version = "0.1.0"
3
+ version = "0.1.2"
4
4
  description = "A command-line tool to download NBER papers."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -9,7 +9,8 @@ dependencies = [
9
9
  "aiosqlite",
10
10
  "orjson",
11
11
  "fake_useragent",
12
- "aiohttp-retry"
12
+ "aiohttp-retry",
13
+ "certifi"
13
14
  ]
14
15
 
15
16
  [project.scripts]
@@ -18,5 +19,3 @@ nber-cli = "nber_cli:main"
18
19
  [build-system]
19
20
  requires = ["hatchling"]
20
21
  build-backend = "hatchling.build"
21
-
22
-
@@ -13,10 +13,12 @@ import aiosqlite
13
13
  import os
14
14
  import time
15
15
  import random
16
- import orjson # 比json更快的JSON库
16
+ import orjson # 比json更快的JSON库
17
17
  from fake_useragent import UserAgent
18
18
  from typing import List, Set, Dict, Any
19
19
  import logging
20
+ import ssl
21
+ import certifi
20
22
  from aiohttp_retry import RetryClient, ExponentialRetry
21
23
  import functools
22
24
  import concurrent.futures
@@ -123,23 +125,28 @@ async def update_paper_state(paper_id: str, status: str):
123
125
 
124
126
  async def download_paper(paper_id: str, save_path: str):
125
127
  """下载单个NBER论文"""
126
- if await get_paper_state(paper_id) == 'ok':
127
- logger.info(f"Skipping {paper_id}, already downloaded.")
128
- return
129
-
130
- url = f"https://www.nber.org/papers/{paper_id}.pdf"
131
128
  filepath = os.path.join(save_path, f"{paper_id}.pdf")
132
129
 
133
130
  # 确保保存路径存在
134
131
  os.makedirs(save_path, exist_ok=True)
135
132
 
133
+ state = await get_paper_state(paper_id)
134
+ if state == 'ok' and os.path.exists(filepath):
135
+ logger.info(f"Skipping {paper_id}, already downloaded.")
136
+ return
137
+ if state == 'ok' and not os.path.exists(filepath):
138
+ logger.info(f"{paper_id} marked as downloaded but file missing, re-downloading.")
139
+
140
+ url = f"https://www.nber.org/papers/{paper_id}.pdf"
141
+
136
142
  ua = UserAgent()
137
143
  headers = {'User-Agent': ua.random}
138
144
  retry_options = ExponentialRetry(attempts=MAX_RETRIES)
145
+ ssl_context = ssl.create_default_context(cafile=certifi.where())
139
146
 
140
147
  try:
141
148
  async with RetryClient(retry_options=retry_options, headers=headers) as session:
142
- async with session.get(url, timeout=30) as response:
149
+ async with session.get(url, timeout=30, ssl=ssl_context) as response:
143
150
  if response.status == 200:
144
151
  content = await response.read()
145
152
  with open(filepath, 'wb') as f:
@@ -124,6 +124,15 @@ wheels = [
124
124
  { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" },
125
125
  ]
126
126
 
127
+ [[package]]
128
+ name = "certifi"
129
+ version = "2025.6.15"
130
+ source = { registry = "https://pypi.org/simple" }
131
+ sdist = { url = "https://files.pythonhosted.org/packages/73/f7/f14b46d4bcd21092d7d3ccef689615220d8a08fb25e564b65d20738e672e/certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b", size = 158753, upload-time = "2025-06-15T02:45:51.329Z" }
132
+ wheels = [
133
+ { url = "https://files.pythonhosted.org/packages/84/ae/320161bd181fc06471eed047ecce67b693fd7515b16d495d8932db763426/certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057", size = 157650, upload-time = "2025-06-15T02:45:49.977Z" },
134
+ ]
135
+
127
136
  [[package]]
128
137
  name = "fake-useragent"
129
138
  version = "2.2.0"
@@ -302,12 +311,13 @@ wheels = [
302
311
 
303
312
  [[package]]
304
313
  name = "nber-cli"
305
- version = "0.1.0"
314
+ version = "0.1.2"
306
315
  source = { editable = "." }
307
316
  dependencies = [
308
317
  { name = "aiohttp" },
309
318
  { name = "aiohttp-retry" },
310
319
  { name = "aiosqlite" },
320
+ { name = "certifi" },
311
321
  { name = "fake-useragent" },
312
322
  { name = "orjson" },
313
323
  ]
@@ -317,6 +327,7 @@ requires-dist = [
317
327
  { name = "aiohttp" },
318
328
  { name = "aiohttp-retry" },
319
329
  { name = "aiosqlite" },
330
+ { name = "certifi" },
320
331
  { name = "fake-useragent" },
321
332
  { name = "orjson" },
322
333
  ]
File without changes
File without changes
File without changes
File without changes