crawlo 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

crawlo/utils/project.py CHANGED
@@ -19,8 +19,7 @@ from typing import Callable, Optional
19
19
  from crawlo.utils.log import get_logger
20
20
  from crawlo.settings.setting_manager import SettingManager
21
21
 
22
-
23
- logger =get_logger(__name__)
22
+ logger = get_logger(__name__)
24
23
 
25
24
 
26
25
  def _find_project_root(start_path: str = '.') -> Optional[str]:
@@ -37,13 +36,11 @@ def _find_project_root(start_path: str = '.') -> Optional[str]:
37
36
  Optional[str]: 找到的项目根目录的绝对路径,如果未找到则返回 None。
38
37
  """
39
38
  path = os.path.abspath(start_path)
40
- logger.info(f"开始向上搜索项目根目录,起始路径: {path}")
41
39
 
42
40
  while True:
43
41
  # 1. 检查是否存在 crawlo.cfg 文件
44
42
  cfg_file = os.path.join(path, 'crawlo.cfg')
45
43
  if os.path.isfile(cfg_file):
46
- logger.info(f"在路径 {path} 找到 'crawlo.cfg' 文件,确定为项目根目录。")
47
44
  return path
48
45
 
49
46
  # 2. 检查是否存在 settings.py 文件,并且它位于一个 Python 包中
@@ -51,7 +48,6 @@ def _find_project_root(start_path: str = '.') -> Optional[str]:
51
48
  if os.path.isfile(settings_file):
52
49
  init_file = os.path.join(path, '__init__.py')
53
50
  if os.path.isfile(init_file):
54
- logger.info(f"在路径 {path} 找到 'settings.py' 文件,确定为项目根目录。")
55
51
  return path
56
52
  else:
57
53
  logger.debug(f"在路径 {path} 找到 'settings.py',但缺少 '__init__.py',忽略。")
@@ -86,7 +82,7 @@ def _get_settings_module_from_cfg(cfg_path: str) -> str:
86
82
  config.read(cfg_path, encoding='utf-8')
87
83
  if config.has_section('settings') and config.has_option('settings', 'default'):
88
84
  module_path = config.get('settings', 'default')
89
- logger.info(f"从 'crawlo.cfg' 中读取到 settings 模块路径: {module_path}")
85
+ logger.debug(f"从 'crawlo.cfg' 中读取到 settings 模块路径: {module_path}")
90
86
  return module_path
91
87
  else:
92
88
  error_msg = f"配置文件 '{cfg_path}' 缺少 '[settings]' 或 'default' 配置项。"
@@ -113,7 +109,7 @@ def get_settings(custom_settings=None):
113
109
  RuntimeError: 当无法找到项目或配置文件时。
114
110
  ImportError: 当无法导入指定的 settings 模块时。
115
111
  """
116
- logger.info("正在初始化配置管理器...")
112
+ logger.debug("正在初始化配置管理器...")
117
113
 
118
114
  # 1. 发现项目根目录
119
115
  project_root = _find_project_root()
@@ -122,7 +118,7 @@ def get_settings(custom_settings=None):
122
118
  logger.error(error_msg)
123
119
  raise RuntimeError(error_msg)
124
120
 
125
- logger.info(f"项目根目录已确定: {project_root}")
121
+ logger.debug(f"项目根目录已确定: {project_root}")
126
122
 
127
123
  # 2. 确定 settings 模块的导入路径
128
124
  settings_module_path = None
@@ -132,27 +128,27 @@ def get_settings(custom_settings=None):
132
128
  if os.path.isfile(cfg_file):
133
129
  settings_module_path = _get_settings_module_from_cfg(cfg_file)
134
130
  else:
135
- logger.info("未找到 'crawlo.cfg',尝试推断 settings 模块路径...")
131
+ logger.debug("未找到 'crawlo.cfg',尝试推断 settings 模块路径...")
136
132
  # 推断:项目目录名.settings
137
133
  project_name = os.path.basename(project_root)
138
134
  settings_module_path = f"{project_name}.settings"
139
- logger.info(f"推断 settings 模块路径为: {settings_module_path}")
135
+ logger.debug(f"推断 settings 模块路径为: {settings_module_path}")
140
136
 
141
137
  # 3. 将项目根目录添加到 Python 路径,确保可以成功导入
142
138
  if project_root not in sys.path:
143
139
  sys.path.insert(0, project_root)
144
- logger.info(f"已将项目根目录 '{project_root}' 添加到 Python 路径。")
140
+ logger.debug(f"已将项目根目录 '{project_root}' 添加到 Python 路径。")
145
141
  else:
146
142
  logger.debug(f"项目根目录 '{project_root}' 已在 Python 路径中。")
147
143
 
148
144
  # 4. 创建 SettingManager 并加载配置
149
- logger.info(f"正在加载 settings 模块: {settings_module_path}")
145
+ logger.debug(f"正在加载 settings 模块: {settings_module_path}")
150
146
  settings = SettingManager()
151
147
 
152
148
  try:
153
149
  # 这会触发 SettingManager.set_settings(),从模块中加载所有大写常量
154
150
  settings.set_settings(settings_module_path)
155
- logger.info("settings 模块加载成功。")
151
+ logger.debug("settings 模块加载成功。")
156
152
  except Exception as e:
157
153
  error_msg = f"加载 settings 模块 '{settings_module_path}' 失败: {e}"
158
154
  logger.error(error_msg)
@@ -160,13 +156,14 @@ def get_settings(custom_settings=None):
160
156
 
161
157
  # 5. 应用运行时自定义设置
162
158
  if custom_settings:
163
- logger.info(f"正在应用运行时自定义设置: {custom_settings}")
159
+ logger.debug(f"正在应用运行时自定义设置: {custom_settings}")
164
160
  settings.update_attributes(custom_settings)
165
161
  logger.info("运行时自定义设置已应用。")
166
162
 
167
- logger.info("配置管理器初始化完成。")
163
+ logger.debug("配置管理器初始化完成。")
168
164
  return settings
169
165
 
166
+
170
167
  def load_class(_path):
171
168
  if not isinstance(_path, str):
172
169
  if callable(_path):
@@ -183,13 +180,14 @@ def load_class(_path):
183
180
  raise NameError(f"Module {module_name!r} has no class named {class_name!r}")
184
181
  return cls
185
182
 
183
+
186
184
  def merge_settings(spider, settings):
187
185
  spider_name = getattr(spider, 'name', 'UnknownSpider')
188
186
  if hasattr(spider, 'custom_settings'):
189
187
  custom_settings = getattr(spider, 'custom_settings')
190
188
  settings.update_attributes(custom_settings)
191
189
  else:
192
- logger.debug(f"爬虫 '{spider_name}' 无 custom_settings,跳过合并") # 添加日志
190
+ logger.debug(f"爬虫 '{spider_name}' 无 custom_settings,跳过合并") # 添加日志
193
191
 
194
192
 
195
193
  async def common_call(func: Callable, *args, **kwargs):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlo
3
- Version: 1.0.6
3
+ Version: 1.0.8
4
4
  Summary: Crawlo 是一款基于异步IO的高性能Python爬虫框架,支持分布式抓取。
5
5
  Home-page: https://github.com/crawl-coder/Crawlo.git
6
6
  Author: crawl-coder
@@ -1,16 +1,19 @@
1
1
  crawlo/__init__.py,sha256=xpiIAZbSG3CzneJuDLPCbwfRcvw2wyHYl2kJjaNfNGY,584
2
- crawlo/__version__.py,sha256=1HqFYnow__4MUVRI_OMjvzTBzKkReNozOdA96kH53cA,23
2
+ crawlo/__version__.py,sha256=uyL3a6o1xccXPZ2OS65zqIN_lbEMT7PcCxErq7cuWwA,23
3
3
  crawlo/cli.py,sha256=hjAJKx9pba375sATvvcy-dtZyBIgXj8fRBq9RFIZHA4,1206
4
- crawlo/crawler.py,sha256=nfuA_f8QnuIp2F4ZbaJv8Fceo_QPwqV1jYdD_edkMjg,8527
4
+ crawlo/crawler.py,sha256=AyKxUyJvCwb1u4d3Zn3vFmjH28ExWKIygfTICps-3yY,20026
5
5
  crawlo/event.py,sha256=ZhoPW5CglCEuZNFEwviSCBIw0pT5O6jT98bqYrDFd3E,324
6
6
  crawlo/exceptions.py,sha256=xdyZkvVcLEJ-19sWMHvn9IJsu30-hAY2jJhA2kYIims,1207
7
7
  crawlo/stats_collector.py,sha256=v4jC9BAe-23w93hWzbeMCCgQ9VuFPyxw5JV9ItbGH8w,1636
8
8
  crawlo/subscriber.py,sha256=udlHeTR0ymGQhCDxVUGwUzeeeR4TYCEJrJwFnkgr0cU,3836
9
9
  crawlo/task_manager.py,sha256=D9m-nqnGj-FZPtGk4CdwZX3Gw7IWyYvTS7CHpRGWc_w,748
10
10
  crawlo/commands/__init__.py,sha256=dRu3ipuhDM7M1eTb6zJtQZ_u7N_tZumGfH5_I92xno8,252
11
+ crawlo/commands/check.py,sha256=Q8wFjIo43XW0wP93TTlM7HSShgytJsbSWHIlmkcNxz0,3585
11
12
  crawlo/commands/genspider.py,sha256=kSHYsAGHRoxU6Qf_MGpR_VS-Ua5NUGY2KGm_Wapn0sw,3529
12
- crawlo/commands/run.py,sha256=Upv8K4sM0c0I1fIwTFK18VDcSHF7xabqfXtQ82fk56g,4628
13
+ crawlo/commands/list.py,sha256=itR05muZlZs8FbRh88kOhcRbZc77OXiR6A86UnVhSMY,2974
14
+ crawlo/commands/run.py,sha256=s6JJC8HNa-tBgPDB2BPUmj26D7PMckhlx4AOEz57ESY,6197
13
15
  crawlo/commands/startproject.py,sha256=1KOq_CALy01oklr0dAUYhGFzu4f7w45q2H0O3qafLX4,3494
16
+ crawlo/commands/stats.py,sha256=rH0TlD0o-xUr9RxtvNYgnSjHHoRyma3rvx9Q9nIGDNg,1659
14
17
  crawlo/core/__init__.py,sha256=JYSAn15r8yWgRK_Nc69t_8tZCyb70MiPZKssA8wrYz0,43
15
18
  crawlo/core/engine.py,sha256=JFHooPp-5cfHSyxEh87nOOR5NMaPLVDfNSqAsbtx4PM,6030
16
19
  crawlo/core/processor.py,sha256=oHLs-cno0bJGTNc9NGD2S7_2-grI3ruvggO0SY2mf3Q,1180
@@ -26,8 +29,8 @@ crawlo/extension/logging_extension.py,sha256=rty2_up53KV05nCazuBuz2ZapHKq0ti7mGV
26
29
  crawlo/filters/__init__.py,sha256=9fJQRVkxWWPChajYbAGe1O6UYB639xWt0hiLUGBs4hQ,1014
27
30
  crawlo/filters/aioredis_filter.py,sha256=phBFW9Z28oylbik9Kb2WHM65Wo5yRAH2w9Yz0_2HaOQ,5621
28
31
  crawlo/filters/memory_filter.py,sha256=L8XEJkObOxs4BzYpQvk9PVM969k2LE61VFsnEOTEf_E,6841
29
- crawlo/items/__init__.py,sha256=HLDShSwAQUrgwt9_Ec2SIwzpIDZnNOCg9nSYqqEQdp8,407
30
- crawlo/items/base.py,sha256=DZG0qENdukJExRtKjqdNkSlzUoWR3ucjyF73LYLANFo,754
32
+ crawlo/items/__init__.py,sha256=rFpx1qFBo0Ik7bSdnXC8EVTJUOQdoJYGVdhYjaH00nk,409
33
+ crawlo/items/base.py,sha256=hwGJEdFWOdaZfalFX8umRkh_HUWLEbCjvq4j70fplMQ,598
31
34
  crawlo/items/fields.py,sha256=fpS0vlRPpZYjTaMDgI9Q8z_YQqruwf6fi4Dgm6R2oEk,1854
32
35
  crawlo/items/items.py,sha256=OmVEvMmgofMU95GkaiWkfNQ2fjsH2fY9sw3SKcmUhLs,3478
33
36
  crawlo/middleware/__init__.py,sha256=PSwpRLdBUopaQzBp1S0zK_TZbrRagQ4yzvgyLy4tBk8,570
@@ -51,7 +54,7 @@ crawlo/pipelines/pipeline_manager.py,sha256=k-Rg0os0Havrov99D-Jn3ROpnz154K30tf7a
51
54
  crawlo/settings/__init__.py,sha256=NgYFLfk_Bw7h6KSoepJn_lMBSqVbCHebjKxaE3_eMgw,130
52
55
  crawlo/settings/default_settings.py,sha256=urj4XJ--ZpVRbbo3fWUT71bYQLmElx43AC9KeHtqHBs,7310
53
56
  crawlo/settings/setting_manager.py,sha256=4xXOzKwZCgAp8ybwvVcs2R--CsOD7c6dBIkj6DJHB3c,2998
54
- crawlo/spider/__init__.py,sha256=lWi0bCR7HLT5bnj7_e9UIgFJjuqoeWtbwADfNkaajug,1139
57
+ crawlo/spider/__init__.py,sha256=IyQd4ufbAIhA_cvWrsNReRv3tj76CHc5Aef9c8KR-9s,3983
55
58
  crawlo/templates/crawlo.cfg.tmpl,sha256=lwiUVe5sFixJgHFEjn1OtbAeyWsECOrz37uheuVtulk,240
56
59
  crawlo/templates/project/__init__.py.tmpl,sha256=aQnHaOjMSkTviOC8COUX0fKymuyf8lx2tGduxkMkXEE,61
57
60
  crawlo/templates/project/items.py.tmpl,sha256=bXx-oCldMr2EgBKUAH9LH5gMnbyLiWX-EySAaMzcu2g,318
@@ -67,7 +70,7 @@ crawlo/utils/db_helper.py,sha256=ZqOt1d3mErVv4TOvoWlov0niUxORB9aHByTmMoNFIDw,109
67
70
  crawlo/utils/func_tools.py,sha256=y-TYP9H3X67MS_foWy9Z2LIS6GP7Y4Cy3T168ulq3Jc,2451
68
71
  crawlo/utils/log.py,sha256=YD2FfXuuE2MC9ZdQQZ0H7KysE7l_LHZqQepaTPlcApo,4133
69
72
  crawlo/utils/pqueue.py,sha256=HDgX4HAkc7RqYUtX6q51tzI1ZRTACf8P_4jLqC4-uC0,5559
70
- crawlo/utils/project.py,sha256=qAiCmpIxiB7RxCLG-U5lGV6k4UCa21uRdykTfnAF834,7669
73
+ crawlo/utils/project.py,sha256=hXSKV55OBUFjJi7TXekB4X3MmAgsqAeVTj5wPUWOizc,7394
71
74
  crawlo/utils/request.py,sha256=ejdKpTwc-HE04HQybafhOVywzz57IV3pY0YMkSLyGUo,9065
72
75
  crawlo/utils/spider_loader.py,sha256=V0CBTicJBYBZafhwLfDEfuEc_hJ2mSoiptT6qKufI9U,2249
73
76
  crawlo/utils/system.py,sha256=24zGmtHNhDFMGVo7ftMV-Pqg6_5d63zsyNey9udvJJk,248
@@ -87,8 +90,8 @@ tests/test_proxy_middleware_integration.py,sha256=mTPK_XvbmLCV_QoVZzA3ybWOOX6149
87
90
  tests/test_proxy_providers.py,sha256=u_R2fhab90vqvQEaOAztpAOe9tJXvUMIdoDxmStmXJ4,1749
88
91
  tests/test_proxy_stats.py,sha256=ES00CEoDITYPFBGPk8pecFzD3ItYIv6NSpcqNd8-kvo,526
89
92
  tests/test_proxy_strategies.py,sha256=9Z1pXmTNyw-eIhGXlf2abZbJx6igLohYq-_3hldQ5uE,1868
90
- crawlo-1.0.6.dist-info/METADATA,sha256=_TDAivxDg2R8omq5gG1kUiODY2tZ3UEp5aH0SwshOjI,1825
91
- crawlo-1.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
92
- crawlo-1.0.6.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
93
- crawlo-1.0.6.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
94
- crawlo-1.0.6.dist-info/RECORD,,
93
+ crawlo-1.0.8.dist-info/METADATA,sha256=ia-nA0g0Rl76iHFIlvaRbvUnjd88KEKoxIrJKcjtCyw,1825
94
+ crawlo-1.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
95
+ crawlo-1.0.8.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
96
+ crawlo-1.0.8.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
97
+ crawlo-1.0.8.dist-info/RECORD,,
File without changes