crawlo 1.2.4__py3-none-any.whl → 1.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

crawlo/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.4"
1
+ __version__ = "1.2.5"
crawlo/cli.py CHANGED
@@ -10,12 +10,19 @@ from crawlo.commands import get_commands
10
10
  def main():
11
11
  # 获取框架版本号
12
12
  version_file = os.path.join(os.path.dirname(__file__), '__version__.py')
13
+ VERSION = '1.0.0' # 默认版本号
13
14
  if os.path.exists(version_file):
14
- with open(version_file, 'r') as f:
15
- exec(f.read())
16
- VERSION = locals().get('__version__', '1.0.0')
17
- else:
18
- VERSION = '1.0.0'
15
+ try:
16
+ with open(version_file, 'r') as f:
17
+ content = f.read()
18
+ # 使用正则表达式提取版本号
19
+ import re
20
+ version_match = re.search(r"__version__\s*=\s*['\"]([^'\"]*)['\"]", content)
21
+ if version_match:
22
+ VERSION = version_match.group(1)
23
+ except Exception:
24
+ # 如果读取失败,使用默认版本号
25
+ pass
19
26
 
20
27
  # 获取所有可用命令
21
28
  commands = get_commands()
@@ -108,7 +108,11 @@ def _copytree_with_templates(src, dst, context, template_type='default', modules
108
108
 
109
109
  for item in src_path.rglob('*'):
110
110
  rel_path = item.relative_to(src_path)
111
- dst_item = dst_path / rel_path
111
+ # 对于run.py.tmpl文件,需要特殊处理,将其放到项目根目录
112
+ if item.name == 'run.py.tmpl':
113
+ dst_item = dst_path.parent / rel_path # 放到项目根目录
114
+ else:
115
+ dst_item = dst_path / rel_path
112
116
 
113
117
  # 检查是否应该包含此文件
114
118
  path_str = str(rel_path).replace('\\', '/')
@@ -147,6 +151,9 @@ def _copytree_with_templates(src, dst, context, template_type='default', modules
147
151
  if item.name == 'settings.py.tmpl':
148
152
  # 特殊处理设置模板文件,统一生成为 settings.py
149
153
  final_dst = dst_item.parent / 'settings.py'
154
+ # 特殊处理run.py.tmpl文件
155
+ elif item.name == 'run.py.tmpl':
156
+ final_dst = dst_item.with_suffix('') # 去掉.tmpl后缀
150
157
  else:
151
158
  final_dst = dst_item.with_suffix('')
152
159
 
@@ -171,8 +178,8 @@ def _should_include_file(rel_path, modules: List[str]) -> bool:
171
178
  'settings.py.tmpl',
172
179
  'spiders/__init__.py.tmpl',
173
180
  'items.py.tmpl',
174
- 'middlewares.py.tmpl',
175
- 'run.py.tmpl'
181
+ 'middlewares.py.tmpl'
182
+ # 移除了'run.py.tmpl',因为它现在在模板根目录
176
183
  ]
177
184
 
178
185
  path_str = str(rel_path).replace('\\', '/')
@@ -364,16 +371,25 @@ def main(args):
364
371
  else:
365
372
  console.print("[yellow]⚠ 警告:[/yellow] 找不到模板 'crawlo.cfg.tmpl'。")
366
373
 
367
- # 3. 复制并渲染项目包内容
374
+ # 3. 渲染 run.py.tmpl (放在项目根目录)
375
+ run_template = TEMPLATES_DIR / 'run.py.tmpl'
376
+ if run_template.exists():
377
+ run_content = _render_template(run_template, context)
378
+ (project_dir / 'run.py').write_text(run_content, encoding='utf-8')
379
+ console.print(f":white_check_mark: 已创建 [green]{project_dir / 'run.py'}[/green]")
380
+ else:
381
+ console.print("[yellow]⚠ 警告:[/yellow] 找不到模板 'run.py.tmpl'。")
382
+
383
+ # 4. 复制并渲染项目包内容
368
384
  package_dir = project_dir / project_name
369
385
  _copytree_with_templates(template_dir, package_dir, context, template_type, modules)
370
386
  console.print(f":white_check_mark: 已创建项目包: [green]{package_dir}[/green]")
371
387
 
372
- # 4. 创建 logs 目录
388
+ # 5. 创建 logs 目录
373
389
  (project_dir / 'logs').mkdir(exist_ok=True)
374
390
  console.print(":white_check_mark: 已创建 logs 目录")
375
391
 
376
- # 5. 创建 output 目录(用于数据输出)
392
+ # 6. 创建 output 目录(用于数据输出)
377
393
  (project_dir / 'output').mkdir(exist_ok=True)
378
394
  console.print(":white_check_mark: 已创建 output 目录")
379
395
 
@@ -1,35 +1,9 @@
1
1
  # -*- coding: UTF-8 -*-
2
- """
3
- {{project_name}} 项目配置文件
4
- =============================
5
- 基于 Crawlo 框架的爬虫项目配置。
6
-
7
- 🎯 快速开始:
8
-
9
- # 方式1:使用默认配置(推荐)
10
- from crawlo.crawler import CrawlerProcess
11
- process = CrawlerProcess() # 无需任何配置
12
-
13
- # 方式2:使用配置工厂
14
- from crawlo.config import CrawloConfig
15
- config = CrawloConfig.standalone() # 单机模式
16
- config = CrawloConfig.distributed(redis_host='192.168.1.100') # 分布式模式
17
- process = CrawlerProcess(settings=config.to_dict())
18
-
19
- # 方式3:使用环境变量
20
- from crawlo.config import CrawloConfig
21
- config = CrawloConfig.from_env() # 从环境变量读取
22
- """
23
2
  import os
24
3
  from crawlo.config import CrawloConfig
25
4
 
26
5
  # ============================== 项目基本信息 ==============================
27
6
  PROJECT_NAME = '{{project_name}}'
28
- try:
29
- from crawlo import __version__
30
- VERSION = __version__
31
- except ImportError:
32
- VERSION = '1.0.0'
33
7
 
34
8
  # ============================== 运行模式选择 ==============================
35
9
 
@@ -5,17 +5,11 @@
5
5
  基于 Crawlo 框架的分布式爬虫项目配置。
6
6
  适合大规模数据采集和多节点部署。
7
7
  """
8
-
9
8
  import os
10
9
  from crawlo.config import CrawloConfig
11
10
 
12
11
  # ============================== 项目基本信息 ==============================
13
12
  PROJECT_NAME = '{{project_name}}'
14
- try:
15
- from crawlo import __version__
16
- VERSION = __version__
17
- except ImportError:
18
- VERSION = '1.0.0'
19
13
 
20
14
  # ============================== 分布式配置说明 ==============================
21
15
  #
@@ -5,17 +5,11 @@
5
5
  基于 Crawlo 框架的温和爬虫项目配置。
6
6
  适合对目标网站友好的低负载爬取。
7
7
  """
8
-
9
8
  import os
10
9
  from crawlo.config import CrawloConfig
11
10
 
12
11
  # ============================== 项目基本信息 ==============================
13
12
  PROJECT_NAME = '{{project_name}}'
14
- try:
15
- from crawlo import __version__
16
- VERSION = __version__
17
- except ImportError:
18
- VERSION = '1.0.0'
19
13
 
20
14
  # ============================== 温和模式配置说明 ==============================
21
15
  #
@@ -5,17 +5,11 @@
5
5
  基于 Crawlo 框架的高性能爬虫项目配置。
6
6
  针对大规模、高并发场景优化。
7
7
  """
8
-
9
8
  import os
10
9
  from crawlo.config import CrawloConfig
11
10
 
12
11
  # ============================== 项目基本信息 ==============================
13
12
  PROJECT_NAME = '{{project_name}}'
14
- try:
15
- from crawlo import __version__
16
- VERSION = __version__
17
- except ImportError:
18
- VERSION = '1.0.0'
19
13
 
20
14
  # ============================== 高性能配置 ==============================
21
15
  # 使用配置工厂创建高性能配置
@@ -5,17 +5,11 @@
5
5
  基于 Crawlo 框架的简化爬虫项目配置。
6
6
  适合快速开始和小型项目。
7
7
  """
8
-
9
8
  import os
10
9
  from crawlo.config import CrawloConfig
11
10
 
12
11
  # ============================== 项目基本信息 ==============================
13
12
  PROJECT_NAME = '{{project_name}}'
14
- try:
15
- from crawlo import __version__
16
- VERSION = __version__
17
- except ImportError:
18
- VERSION = '1.0.0'
19
13
 
20
14
  # ============================== 简化版配置说明 ==============================
21
15
  #
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlo
3
- Version: 1.2.4
3
+ Version: 1.2.5
4
4
  Summary: Crawlo 是一款基于异步IO的高性能Python爬虫框架,支持分布式抓取。
5
5
  Home-page: https://github.com/crawl-coder/Crawlo.git
6
6
  Author: crawl-coder
@@ -1,6 +1,6 @@
1
1
  crawlo/__init__.py,sha256=1tc6uUDF1yRNU7K_k-Dl6h9FGy7Jp8fdhRsXu9PctFI,1312
2
- crawlo/__version__.py,sha256=XBKH8E1LmDxv06U39yqMBbXZapOERFgICEDYZs_kRso,22
3
- crawlo/cli.py,sha256=3noRkIcHUsraO-o0ZI_EylR3-RsoJ0_j_f27hVBzbdc,2536
2
+ crawlo/__version__.py,sha256=jBmZf3HLbiQlWiolOsAA6J5-BbxXD2bqFqEqDH3lfqo,22
3
+ crawlo/cli.py,sha256=8OD2us3YYG55AMTm_3YmHiZ8_Plokkx6p2DVR7w0JGs,2872
4
4
  crawlo/config.py,sha256=zbFn2HfcbDcD1QOSgNOB9kjw12UDb7zo4Y7RLcJJirQ,9506
5
5
  crawlo/config_validator.py,sha256=eQWrSY4xNnbDgwAmPOENyMb0wb3rdzOL55yBCKmpIak,9883
6
6
  crawlo/crawler.py,sha256=KPVgLpDJIZ0k3-U9CtrYgvnLatFG9eYPdjUtqoPBHXU,40305
@@ -21,7 +21,7 @@ crawlo/commands/genspider.py,sha256=NAeXmfb9ElLOuLc4F2xIzuG779KvAAesy3EZa_QxqR8,
21
21
  crawlo/commands/help.py,sha256=pHp0m6QBF4NZnZncPBxOLRlJQXJZQsztlyn5euFD9po,5186
22
22
  crawlo/commands/list.py,sha256=VR4AWrOiLQgw2t6u8dm9uxsCoCPzX36_6XeTs3tzcQk,5772
23
23
  crawlo/commands/run.py,sha256=-5psh1ui0zJmHjmzkSo8Tnl99B1e4Rz90lk0Ak-2SuA,12236
24
- crawlo/commands/startproject.py,sha256=AmYdsLVl1ETel3DcPyHIQKijXo9WZyF9fHbED2lLPwY,15329
24
+ crawlo/commands/startproject.py,sha256=8tMokyCBN4nEpWjAca3NQorU5t8WR5ffBQZ7eyvUWpA,16260
25
25
  crawlo/commands/stats.py,sha256=fCNk7ix6v9PrscC-wKQl6ddA4xaQLogrN_1SNEj66tk,6128
26
26
  crawlo/commands/utils.py,sha256=3nP8bMgPH36ELqg1ilzPhHS873ncNvZNxOEDYY4uDm4,5087
27
27
  crawlo/core/__init__.py,sha256=PnFyJdVNHBoPmV1sW0AHQXijeoSTQ8cMYrbNM1JK8kA,41
@@ -85,16 +85,16 @@ crawlo/settings/default_settings.py,sha256=FnytMOyx4bpzfLkdbppBTkkKn12bxYuWBShVM
85
85
  crawlo/settings/setting_manager.py,sha256=0RYAk07qoJ5WTw_mvV4ECWGS2QNpCnGmBZVTHjqOVIg,3707
86
86
  crawlo/spider/__init__.py,sha256=Z_rK23l5yt-DuwJPg8bcqodM_FIs4-iHLaKOimGumcE,20452
87
87
  crawlo/templates/crawlo.cfg.tmpl,sha256=9BAmwEibS5Tvy6HIcGXPb0BGeuesmibebmTW0iAEkmo,230
88
+ crawlo/templates/run.py.tmpl,sha256=v_g-LQMYJ6pC8TZgyWj0yB2yTTKrwy9lEJufAYCXyxY,1228
88
89
  crawlo/templates/project/__init__.py.tmpl,sha256=f3ETIXw_O6K-lkL6lXM5znMPJW1FZYGFrwDs2BnHcnQ,58
89
90
  crawlo/templates/project/items.py.tmpl,sha256=mt1Mm--H2Ouos3r7JPkYh0r33rgYJf1YOMz0OZy8TYs,297
90
91
  crawlo/templates/project/middlewares.py.tmpl,sha256=T67p8j0laL4NJJ_3xzPM9yivgZRjTEMiEtEWLPwbkmw,4160
91
92
  crawlo/templates/project/pipelines.py.tmpl,sha256=GBHYU0Jx8sKDCdGJp44FMSH7u2slxoFg6a-R9Uwg_-I,2608
92
- crawlo/templates/project/run.py.tmpl,sha256=v_g-LQMYJ6pC8TZgyWj0yB2yTTKrwy9lEJufAYCXyxY,1228
93
- crawlo/templates/project/settings.py.tmpl,sha256=RmlHMvRln7YXxjC02QzPma71kYOyU83TPEsHlXGf3xc,13429
94
- crawlo/templates/project/settings_distributed.py.tmpl,sha256=jwUsq30EjldOKmS5mspHJcR1zm_R5Sqlk8boriTjI7Y,5800
95
- crawlo/templates/project/settings_gentle.py.tmpl,sha256=wVZtUEgtKmvtaAnG7VFJ4usF0xXvp_VkD4fv2VNur4Y,4868
96
- crawlo/templates/project/settings_high_performance.py.tmpl,sha256=FmUV3w_W5pIy3G68IoY4r50t5ldMwuJ9e8sZ_17kQTc,5373
97
- crawlo/templates/project/settings_simple.py.tmpl,sha256=K0ZQ1QQBCVm7nJkPQqzNdu8gZl9gZAPMSoyEyhCX7og,3869
93
+ crawlo/templates/project/settings.py.tmpl,sha256=DFXWWMo88KQyvg-VVEURyEfEd5EEpAdTrehbUaK0FEM,12661
94
+ crawlo/templates/project/settings_distributed.py.tmpl,sha256=wqrQjQuydhnP8O1xs-8g5txu42aV5YVC4QHVhvpc1-c,5691
95
+ crawlo/templates/project/settings_gentle.py.tmpl,sha256=7UTZe8JqA1SUKozlxiGjxgWsakzIhKAtPvU7EfB28wQ,4759
96
+ crawlo/templates/project/settings_high_performance.py.tmpl,sha256=LzswtNUV6Iw0ewWfnM2FXt2aHNJtUOceXSrg8HBy2mY,5264
97
+ crawlo/templates/project/settings_simple.py.tmpl,sha256=ifn0VhDv8ZGcsahdzhHol18ATwUoq-B2lfbFeJk-ISk,3760
98
98
  crawlo/templates/project/spiders/__init__.py.tmpl,sha256=j_YKsw6HQMJyqlk3WUouP3bsr-XVxshRoSNakHBc00g,106
99
99
  crawlo/templates/spider/spider.py.tmpl,sha256=jMhzyxpIpV_KigB-pmN-5mGMiYtu4mfQIOvpZcCGGJI,5055
100
100
  crawlo/tools/__init__.py,sha256=5H6rAhjfNSqRMjjlLDVq-vEJWRFyCO-J6HN2kexnXJU,3671
@@ -199,8 +199,8 @@ tests/test_template_content.py,sha256=5QAnhKZFDKg-_uFryllLMpCk3a1nCS44hMmYfXm8gW
199
199
  tests/test_template_redis_key.py,sha256=U6L5HtnDyGp3s6-O4F_yG2Q2nNIGTqB_Q-ESv2EMeOU,4812
200
200
  tests/test_tools.py,sha256=IWiu9JGV-5Ow0ivFtiDw62REht-8Hn7NfyR9rnYSlbU,5113
201
201
  tests/tools_example.py,sha256=MfVBYemKvHs6MUbydgrJfhiGnygp5dRoIE-eIXCOR7M,7669
202
- crawlo-1.2.4.dist-info/METADATA,sha256=7n1JTTKPVs4kOjG2WAleNTxBub7Y6gXb3816BOryQr8,19185
203
- crawlo-1.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
204
- crawlo-1.2.4.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
205
- crawlo-1.2.4.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
206
- crawlo-1.2.4.dist-info/RECORD,,
202
+ crawlo-1.2.5.dist-info/METADATA,sha256=NPGU5IbQFGu8iNyapjE3faSy_A84yHeI9gzjuGv1PbE,19185
203
+ crawlo-1.2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
204
+ crawlo-1.2.5.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
205
+ crawlo-1.2.5.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
206
+ crawlo-1.2.5.dist-info/RECORD,,
File without changes
File without changes