crawlo 1.2.4__py3-none-any.whl → 1.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__version__.py +1 -1
- crawlo/cli.py +12 -5
- crawlo/commands/startproject.py +22 -6
- crawlo/templates/project/settings.py.tmpl +0 -26
- crawlo/templates/project/settings_distributed.py.tmpl +0 -6
- crawlo/templates/project/settings_gentle.py.tmpl +0 -6
- crawlo/templates/project/settings_high_performance.py.tmpl +0 -6
- crawlo/templates/project/settings_simple.py.tmpl +0 -6
- {crawlo-1.2.4.dist-info → crawlo-1.2.5.dist-info}/METADATA +1 -1
- {crawlo-1.2.4.dist-info → crawlo-1.2.5.dist-info}/RECORD +14 -14
- /crawlo/templates/{project/run.py.tmpl → run.py.tmpl} +0 -0
- {crawlo-1.2.4.dist-info → crawlo-1.2.5.dist-info}/WHEEL +0 -0
- {crawlo-1.2.4.dist-info → crawlo-1.2.5.dist-info}/entry_points.txt +0 -0
- {crawlo-1.2.4.dist-info → crawlo-1.2.5.dist-info}/top_level.txt +0 -0
crawlo/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.5"
|
crawlo/cli.py
CHANGED
|
@@ -10,12 +10,19 @@ from crawlo.commands import get_commands
|
|
|
10
10
|
def main():
|
|
11
11
|
# 获取框架版本号
|
|
12
12
|
version_file = os.path.join(os.path.dirname(__file__), '__version__.py')
|
|
13
|
+
VERSION = '1.0.0' # 默认版本号
|
|
13
14
|
if os.path.exists(version_file):
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
15
|
+
try:
|
|
16
|
+
with open(version_file, 'r') as f:
|
|
17
|
+
content = f.read()
|
|
18
|
+
# 使用正则表达式提取版本号
|
|
19
|
+
import re
|
|
20
|
+
version_match = re.search(r"__version__\s*=\s*['\"]([^'\"]*)['\"]", content)
|
|
21
|
+
if version_match:
|
|
22
|
+
VERSION = version_match.group(1)
|
|
23
|
+
except Exception:
|
|
24
|
+
# 如果读取失败,使用默认版本号
|
|
25
|
+
pass
|
|
19
26
|
|
|
20
27
|
# 获取所有可用命令
|
|
21
28
|
commands = get_commands()
|
crawlo/commands/startproject.py
CHANGED
|
@@ -108,7 +108,11 @@ def _copytree_with_templates(src, dst, context, template_type='default', modules
|
|
|
108
108
|
|
|
109
109
|
for item in src_path.rglob('*'):
|
|
110
110
|
rel_path = item.relative_to(src_path)
|
|
111
|
-
|
|
111
|
+
# 对于run.py.tmpl文件,需要特殊处理,将其放到项目根目录
|
|
112
|
+
if item.name == 'run.py.tmpl':
|
|
113
|
+
dst_item = dst_path.parent / rel_path # 放到项目根目录
|
|
114
|
+
else:
|
|
115
|
+
dst_item = dst_path / rel_path
|
|
112
116
|
|
|
113
117
|
# 检查是否应该包含此文件
|
|
114
118
|
path_str = str(rel_path).replace('\\', '/')
|
|
@@ -147,6 +151,9 @@ def _copytree_with_templates(src, dst, context, template_type='default', modules
|
|
|
147
151
|
if item.name == 'settings.py.tmpl':
|
|
148
152
|
# 特殊处理设置模板文件,统一生成为 settings.py
|
|
149
153
|
final_dst = dst_item.parent / 'settings.py'
|
|
154
|
+
# 特殊处理run.py.tmpl文件
|
|
155
|
+
elif item.name == 'run.py.tmpl':
|
|
156
|
+
final_dst = dst_item.with_suffix('') # 去掉.tmpl后缀
|
|
150
157
|
else:
|
|
151
158
|
final_dst = dst_item.with_suffix('')
|
|
152
159
|
|
|
@@ -171,8 +178,8 @@ def _should_include_file(rel_path, modules: List[str]) -> bool:
|
|
|
171
178
|
'settings.py.tmpl',
|
|
172
179
|
'spiders/__init__.py.tmpl',
|
|
173
180
|
'items.py.tmpl',
|
|
174
|
-
'middlewares.py.tmpl'
|
|
175
|
-
'run.py.tmpl'
|
|
181
|
+
'middlewares.py.tmpl'
|
|
182
|
+
# 移除了'run.py.tmpl',因为它现在在模板根目录
|
|
176
183
|
]
|
|
177
184
|
|
|
178
185
|
path_str = str(rel_path).replace('\\', '/')
|
|
@@ -364,16 +371,25 @@ def main(args):
|
|
|
364
371
|
else:
|
|
365
372
|
console.print("[yellow]⚠ 警告:[/yellow] 找不到模板 'crawlo.cfg.tmpl'。")
|
|
366
373
|
|
|
367
|
-
# 3.
|
|
374
|
+
# 3. 渲染 run.py.tmpl (放在项目根目录)
|
|
375
|
+
run_template = TEMPLATES_DIR / 'run.py.tmpl'
|
|
376
|
+
if run_template.exists():
|
|
377
|
+
run_content = _render_template(run_template, context)
|
|
378
|
+
(project_dir / 'run.py').write_text(run_content, encoding='utf-8')
|
|
379
|
+
console.print(f":white_check_mark: 已创建 [green]{project_dir / 'run.py'}[/green]")
|
|
380
|
+
else:
|
|
381
|
+
console.print("[yellow]⚠ 警告:[/yellow] 找不到模板 'run.py.tmpl'。")
|
|
382
|
+
|
|
383
|
+
# 4. 复制并渲染项目包内容
|
|
368
384
|
package_dir = project_dir / project_name
|
|
369
385
|
_copytree_with_templates(template_dir, package_dir, context, template_type, modules)
|
|
370
386
|
console.print(f":white_check_mark: 已创建项目包: [green]{package_dir}[/green]")
|
|
371
387
|
|
|
372
|
-
#
|
|
388
|
+
# 5. 创建 logs 目录
|
|
373
389
|
(project_dir / 'logs').mkdir(exist_ok=True)
|
|
374
390
|
console.print(":white_check_mark: 已创建 logs 目录")
|
|
375
391
|
|
|
376
|
-
#
|
|
392
|
+
# 6. 创建 output 目录(用于数据输出)
|
|
377
393
|
(project_dir / 'output').mkdir(exist_ok=True)
|
|
378
394
|
console.print(":white_check_mark: 已创建 output 目录")
|
|
379
395
|
|
|
@@ -1,35 +1,9 @@
|
|
|
1
1
|
# -*- coding: UTF-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
{{project_name}} 项目配置文件
|
|
4
|
-
=============================
|
|
5
|
-
基于 Crawlo 框架的爬虫项目配置。
|
|
6
|
-
|
|
7
|
-
🎯 快速开始:
|
|
8
|
-
|
|
9
|
-
# 方式1:使用默认配置(推荐)
|
|
10
|
-
from crawlo.crawler import CrawlerProcess
|
|
11
|
-
process = CrawlerProcess() # 无需任何配置
|
|
12
|
-
|
|
13
|
-
# 方式2:使用配置工厂
|
|
14
|
-
from crawlo.config import CrawloConfig
|
|
15
|
-
config = CrawloConfig.standalone() # 单机模式
|
|
16
|
-
config = CrawloConfig.distributed(redis_host='192.168.1.100') # 分布式模式
|
|
17
|
-
process = CrawlerProcess(settings=config.to_dict())
|
|
18
|
-
|
|
19
|
-
# 方式3:使用环境变量
|
|
20
|
-
from crawlo.config import CrawloConfig
|
|
21
|
-
config = CrawloConfig.from_env() # 从环境变量读取
|
|
22
|
-
"""
|
|
23
2
|
import os
|
|
24
3
|
from crawlo.config import CrawloConfig
|
|
25
4
|
|
|
26
5
|
# ============================== 项目基本信息 ==============================
|
|
27
6
|
PROJECT_NAME = '{{project_name}}'
|
|
28
|
-
try:
|
|
29
|
-
from crawlo import __version__
|
|
30
|
-
VERSION = __version__
|
|
31
|
-
except ImportError:
|
|
32
|
-
VERSION = '1.0.0'
|
|
33
7
|
|
|
34
8
|
# ============================== 运行模式选择 ==============================
|
|
35
9
|
|
|
@@ -5,17 +5,11 @@
|
|
|
5
5
|
基于 Crawlo 框架的分布式爬虫项目配置。
|
|
6
6
|
适合大规模数据采集和多节点部署。
|
|
7
7
|
"""
|
|
8
|
-
|
|
9
8
|
import os
|
|
10
9
|
from crawlo.config import CrawloConfig
|
|
11
10
|
|
|
12
11
|
# ============================== 项目基本信息 ==============================
|
|
13
12
|
PROJECT_NAME = '{{project_name}}'
|
|
14
|
-
try:
|
|
15
|
-
from crawlo import __version__
|
|
16
|
-
VERSION = __version__
|
|
17
|
-
except ImportError:
|
|
18
|
-
VERSION = '1.0.0'
|
|
19
13
|
|
|
20
14
|
# ============================== 分布式配置说明 ==============================
|
|
21
15
|
#
|
|
@@ -5,17 +5,11 @@
|
|
|
5
5
|
基于 Crawlo 框架的温和爬虫项目配置。
|
|
6
6
|
适合对目标网站友好的低负载爬取。
|
|
7
7
|
"""
|
|
8
|
-
|
|
9
8
|
import os
|
|
10
9
|
from crawlo.config import CrawloConfig
|
|
11
10
|
|
|
12
11
|
# ============================== 项目基本信息 ==============================
|
|
13
12
|
PROJECT_NAME = '{{project_name}}'
|
|
14
|
-
try:
|
|
15
|
-
from crawlo import __version__
|
|
16
|
-
VERSION = __version__
|
|
17
|
-
except ImportError:
|
|
18
|
-
VERSION = '1.0.0'
|
|
19
13
|
|
|
20
14
|
# ============================== 温和模式配置说明 ==============================
|
|
21
15
|
#
|
|
@@ -5,17 +5,11 @@
|
|
|
5
5
|
基于 Crawlo 框架的高性能爬虫项目配置。
|
|
6
6
|
针对大规模、高并发场景优化。
|
|
7
7
|
"""
|
|
8
|
-
|
|
9
8
|
import os
|
|
10
9
|
from crawlo.config import CrawloConfig
|
|
11
10
|
|
|
12
11
|
# ============================== 项目基本信息 ==============================
|
|
13
12
|
PROJECT_NAME = '{{project_name}}'
|
|
14
|
-
try:
|
|
15
|
-
from crawlo import __version__
|
|
16
|
-
VERSION = __version__
|
|
17
|
-
except ImportError:
|
|
18
|
-
VERSION = '1.0.0'
|
|
19
13
|
|
|
20
14
|
# ============================== 高性能配置 ==============================
|
|
21
15
|
# 使用配置工厂创建高性能配置
|
|
@@ -5,17 +5,11 @@
|
|
|
5
5
|
基于 Crawlo 框架的简化爬虫项目配置。
|
|
6
6
|
适合快速开始和小型项目。
|
|
7
7
|
"""
|
|
8
|
-
|
|
9
8
|
import os
|
|
10
9
|
from crawlo.config import CrawloConfig
|
|
11
10
|
|
|
12
11
|
# ============================== 项目基本信息 ==============================
|
|
13
12
|
PROJECT_NAME = '{{project_name}}'
|
|
14
|
-
try:
|
|
15
|
-
from crawlo import __version__
|
|
16
|
-
VERSION = __version__
|
|
17
|
-
except ImportError:
|
|
18
|
-
VERSION = '1.0.0'
|
|
19
13
|
|
|
20
14
|
# ============================== 简化版配置说明 ==============================
|
|
21
15
|
#
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
crawlo/__init__.py,sha256=1tc6uUDF1yRNU7K_k-Dl6h9FGy7Jp8fdhRsXu9PctFI,1312
|
|
2
|
-
crawlo/__version__.py,sha256=
|
|
3
|
-
crawlo/cli.py,sha256=
|
|
2
|
+
crawlo/__version__.py,sha256=jBmZf3HLbiQlWiolOsAA6J5-BbxXD2bqFqEqDH3lfqo,22
|
|
3
|
+
crawlo/cli.py,sha256=8OD2us3YYG55AMTm_3YmHiZ8_Plokkx6p2DVR7w0JGs,2872
|
|
4
4
|
crawlo/config.py,sha256=zbFn2HfcbDcD1QOSgNOB9kjw12UDb7zo4Y7RLcJJirQ,9506
|
|
5
5
|
crawlo/config_validator.py,sha256=eQWrSY4xNnbDgwAmPOENyMb0wb3rdzOL55yBCKmpIak,9883
|
|
6
6
|
crawlo/crawler.py,sha256=KPVgLpDJIZ0k3-U9CtrYgvnLatFG9eYPdjUtqoPBHXU,40305
|
|
@@ -21,7 +21,7 @@ crawlo/commands/genspider.py,sha256=NAeXmfb9ElLOuLc4F2xIzuG779KvAAesy3EZa_QxqR8,
|
|
|
21
21
|
crawlo/commands/help.py,sha256=pHp0m6QBF4NZnZncPBxOLRlJQXJZQsztlyn5euFD9po,5186
|
|
22
22
|
crawlo/commands/list.py,sha256=VR4AWrOiLQgw2t6u8dm9uxsCoCPzX36_6XeTs3tzcQk,5772
|
|
23
23
|
crawlo/commands/run.py,sha256=-5psh1ui0zJmHjmzkSo8Tnl99B1e4Rz90lk0Ak-2SuA,12236
|
|
24
|
-
crawlo/commands/startproject.py,sha256=
|
|
24
|
+
crawlo/commands/startproject.py,sha256=8tMokyCBN4nEpWjAca3NQorU5t8WR5ffBQZ7eyvUWpA,16260
|
|
25
25
|
crawlo/commands/stats.py,sha256=fCNk7ix6v9PrscC-wKQl6ddA4xaQLogrN_1SNEj66tk,6128
|
|
26
26
|
crawlo/commands/utils.py,sha256=3nP8bMgPH36ELqg1ilzPhHS873ncNvZNxOEDYY4uDm4,5087
|
|
27
27
|
crawlo/core/__init__.py,sha256=PnFyJdVNHBoPmV1sW0AHQXijeoSTQ8cMYrbNM1JK8kA,41
|
|
@@ -85,16 +85,16 @@ crawlo/settings/default_settings.py,sha256=FnytMOyx4bpzfLkdbppBTkkKn12bxYuWBShVM
|
|
|
85
85
|
crawlo/settings/setting_manager.py,sha256=0RYAk07qoJ5WTw_mvV4ECWGS2QNpCnGmBZVTHjqOVIg,3707
|
|
86
86
|
crawlo/spider/__init__.py,sha256=Z_rK23l5yt-DuwJPg8bcqodM_FIs4-iHLaKOimGumcE,20452
|
|
87
87
|
crawlo/templates/crawlo.cfg.tmpl,sha256=9BAmwEibS5Tvy6HIcGXPb0BGeuesmibebmTW0iAEkmo,230
|
|
88
|
+
crawlo/templates/run.py.tmpl,sha256=v_g-LQMYJ6pC8TZgyWj0yB2yTTKrwy9lEJufAYCXyxY,1228
|
|
88
89
|
crawlo/templates/project/__init__.py.tmpl,sha256=f3ETIXw_O6K-lkL6lXM5znMPJW1FZYGFrwDs2BnHcnQ,58
|
|
89
90
|
crawlo/templates/project/items.py.tmpl,sha256=mt1Mm--H2Ouos3r7JPkYh0r33rgYJf1YOMz0OZy8TYs,297
|
|
90
91
|
crawlo/templates/project/middlewares.py.tmpl,sha256=T67p8j0laL4NJJ_3xzPM9yivgZRjTEMiEtEWLPwbkmw,4160
|
|
91
92
|
crawlo/templates/project/pipelines.py.tmpl,sha256=GBHYU0Jx8sKDCdGJp44FMSH7u2slxoFg6a-R9Uwg_-I,2608
|
|
92
|
-
crawlo/templates/project/
|
|
93
|
-
crawlo/templates/project/
|
|
94
|
-
crawlo/templates/project/
|
|
95
|
-
crawlo/templates/project/
|
|
96
|
-
crawlo/templates/project/
|
|
97
|
-
crawlo/templates/project/settings_simple.py.tmpl,sha256=K0ZQ1QQBCVm7nJkPQqzNdu8gZl9gZAPMSoyEyhCX7og,3869
|
|
93
|
+
crawlo/templates/project/settings.py.tmpl,sha256=DFXWWMo88KQyvg-VVEURyEfEd5EEpAdTrehbUaK0FEM,12661
|
|
94
|
+
crawlo/templates/project/settings_distributed.py.tmpl,sha256=wqrQjQuydhnP8O1xs-8g5txu42aV5YVC4QHVhvpc1-c,5691
|
|
95
|
+
crawlo/templates/project/settings_gentle.py.tmpl,sha256=7UTZe8JqA1SUKozlxiGjxgWsakzIhKAtPvU7EfB28wQ,4759
|
|
96
|
+
crawlo/templates/project/settings_high_performance.py.tmpl,sha256=LzswtNUV6Iw0ewWfnM2FXt2aHNJtUOceXSrg8HBy2mY,5264
|
|
97
|
+
crawlo/templates/project/settings_simple.py.tmpl,sha256=ifn0VhDv8ZGcsahdzhHol18ATwUoq-B2lfbFeJk-ISk,3760
|
|
98
98
|
crawlo/templates/project/spiders/__init__.py.tmpl,sha256=j_YKsw6HQMJyqlk3WUouP3bsr-XVxshRoSNakHBc00g,106
|
|
99
99
|
crawlo/templates/spider/spider.py.tmpl,sha256=jMhzyxpIpV_KigB-pmN-5mGMiYtu4mfQIOvpZcCGGJI,5055
|
|
100
100
|
crawlo/tools/__init__.py,sha256=5H6rAhjfNSqRMjjlLDVq-vEJWRFyCO-J6HN2kexnXJU,3671
|
|
@@ -199,8 +199,8 @@ tests/test_template_content.py,sha256=5QAnhKZFDKg-_uFryllLMpCk3a1nCS44hMmYfXm8gW
|
|
|
199
199
|
tests/test_template_redis_key.py,sha256=U6L5HtnDyGp3s6-O4F_yG2Q2nNIGTqB_Q-ESv2EMeOU,4812
|
|
200
200
|
tests/test_tools.py,sha256=IWiu9JGV-5Ow0ivFtiDw62REht-8Hn7NfyR9rnYSlbU,5113
|
|
201
201
|
tests/tools_example.py,sha256=MfVBYemKvHs6MUbydgrJfhiGnygp5dRoIE-eIXCOR7M,7669
|
|
202
|
-
crawlo-1.2.
|
|
203
|
-
crawlo-1.2.
|
|
204
|
-
crawlo-1.2.
|
|
205
|
-
crawlo-1.2.
|
|
206
|
-
crawlo-1.2.
|
|
202
|
+
crawlo-1.2.5.dist-info/METADATA,sha256=NPGU5IbQFGu8iNyapjE3faSy_A84yHeI9gzjuGv1PbE,19185
|
|
203
|
+
crawlo-1.2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
204
|
+
crawlo-1.2.5.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
|
|
205
|
+
crawlo-1.2.5.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
|
|
206
|
+
crawlo-1.2.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|