cobweb-launcher 3.1.23__py3-none-any.whl → 3.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/base/task_queue.py +7 -6
- cobweb/constant.py +1 -1
- cobweb/launchers/uploader.py +13 -30
- cobweb/schedulers/scheduler_with_redis.py +2 -1
- {cobweb_launcher-3.1.23.dist-info → cobweb_launcher-3.1.25.dist-info}/METADATA +9 -5
- {cobweb_launcher-3.1.23.dist-info → cobweb_launcher-3.1.25.dist-info}/RECORD +9 -9
- {cobweb_launcher-3.1.23.dist-info → cobweb_launcher-3.1.25.dist-info}/LICENSE +0 -0
- {cobweb_launcher-3.1.23.dist-info → cobweb_launcher-3.1.25.dist-info}/WHEEL +0 -0
- {cobweb_launcher-3.1.23.dist-info → cobweb_launcher-3.1.25.dist-info}/top_level.txt +0 -0
cobweb/base/task_queue.py
CHANGED
@@ -10,8 +10,8 @@ class Status(Enum):
|
|
10
10
|
PENDING = 0 # 待处理
|
11
11
|
PROCESSING = 1 # 处理中
|
12
12
|
FINISHED = 2 # 已完成
|
13
|
-
INSERT = 3 #
|
14
|
-
UPLOAD = 4 #
|
13
|
+
INSERT = 3 # 新增
|
14
|
+
UPLOAD = 4 # 上传
|
15
15
|
|
16
16
|
|
17
17
|
@dataclass
|
@@ -138,9 +138,10 @@ class TaskQueue:
|
|
138
138
|
if data:
|
139
139
|
task_item.data = data
|
140
140
|
|
141
|
-
|
142
|
-
|
143
|
-
|
141
|
+
if task_item.status != Status.FINISHED:
|
142
|
+
for tid in task_item.children_ids:
|
143
|
+
if self._tasks[tid].status == Status.INSERT:
|
144
|
+
del self._tasks[tid]
|
144
145
|
|
145
146
|
task_item.children_ids = []
|
146
147
|
self._tasks[task_id] = task_item
|
@@ -176,4 +177,4 @@ class TaskQueue:
|
|
176
177
|
# expired_ids.append(seed_id)
|
177
178
|
# for seed_id in expired_ids:
|
178
179
|
# self._seeds[seed_id] = self._seeds[seed_id]._replace(status=SeedStatus.EXPIRED)
|
179
|
-
# print(f"清理了 {len(expired_ids)} 个过期种子")
|
180
|
+
# print(f"清理了 {len(expired_ids)} 个过期种子")
|
cobweb/constant.py
CHANGED
cobweb/launchers/uploader.py
CHANGED
@@ -39,45 +39,28 @@ class Uploader(threading.Thread):
|
|
39
39
|
|
40
40
|
@check_pause
|
41
41
|
def upload_data(self):
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
try:
|
43
|
+
data_info, task_ids = dict(), set()
|
44
|
+
if task_list := self.task_queue.get_task_by_status(
|
45
|
+
status=Status.UPLOAD, limit=self.upload_size
|
46
|
+
):
|
47
47
|
for task_item in task_list:
|
48
48
|
upload_data = self.pipeline.build(task_item.data)
|
49
49
|
data_info.setdefault(task_item.data.table, []).append(upload_data)
|
50
|
+
task_ids.add(task_item.task_id)
|
50
51
|
|
51
52
|
for table, datas in data_info.items():
|
52
53
|
try:
|
53
54
|
self.pipeline.upload(table, datas)
|
54
55
|
except Exception as e:
|
55
56
|
logger.info(e)
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
# try:
|
64
|
-
# for _ in range(self.upload_size):
|
65
|
-
# item = self.upload.pop()
|
66
|
-
# if not item:
|
67
|
-
# break
|
68
|
-
# # seeds.append(item.seed)
|
69
|
-
# data = self.pipeline.build(item)
|
70
|
-
# data_info.setdefault(item.table, []).append(data)
|
71
|
-
# for table, datas in data_info.items():
|
72
|
-
# try:
|
73
|
-
# self.pipeline.upload(table, datas)
|
74
|
-
# # TODO: 上传打点
|
75
|
-
# except Exception as e:
|
76
|
-
# logger.info(e)
|
77
|
-
# except Exception as e:
|
78
|
-
# logger.info(e)
|
79
|
-
# if self.upload.length < self.upload_size:
|
80
|
-
# time.sleep(self.wait_seconds)
|
57
|
+
|
58
|
+
self.task_queue.remove(task_ids)
|
59
|
+
except Exception as e:
|
60
|
+
logger.info(e)
|
61
|
+
|
62
|
+
if self.task_queue.status_length(status=Status.UPLOAD) < self.upload_size:
|
63
|
+
time.sleep(self.wait_seconds)
|
81
64
|
|
82
65
|
def run(self):
|
83
66
|
self.callback_register(self.upload_data, tag="Uploader")
|
@@ -52,7 +52,8 @@ class RedisScheduler(Scheduler):
|
|
52
52
|
time.sleep(self.scheduler_wait_seconds)
|
53
53
|
return
|
54
54
|
|
55
|
-
if self.task_queue.status_length(Status.PENDING) >= self.todo_queue_size
|
55
|
+
if self.task_queue.status_length(Status.PENDING) >= self.todo_queue_size\
|
56
|
+
or self.task_queue.length() > 5 * self.todo_queue_size:
|
56
57
|
time.sleep(self.todo_queue_full_wait_seconds)
|
57
58
|
return
|
58
59
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cobweb-launcher
|
3
|
-
Version: 3.1.
|
3
|
+
Version: 3.1.25
|
4
4
|
Summary: spider_hole
|
5
5
|
Home-page: https://github.com/Juannie-PP/cobweb
|
6
6
|
Author: Juannie-PP
|
@@ -177,12 +177,16 @@ app.start()
|
|
177
177
|
> upload_item["text"] = item.response.text
|
178
178
|
> yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
|
179
179
|
## todo
|
180
|
-
- 队列优化完善,使用queue的机制wait()同步各模块执行?
|
181
|
-
- 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
182
|
-
- 去重过滤(布隆过滤器等)
|
183
|
-
-
|
180
|
+
- [ ] 队列优化完善,使用queue的机制wait()同步各模块执行?
|
181
|
+
- [x] 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
182
|
+
- [ ] 去重过滤(布隆过滤器等)
|
183
|
+
- [ ] 请求检验
|
184
|
+
- [ ] 异常回调
|
185
|
+
- [ ] 失败回调
|
184
186
|
|
185
187
|
> 未更新流程图!!!
|
186
188
|

|
187
189
|
|
188
190
|
|
191
|
+
|
192
|
+
|
@@ -1,5 +1,5 @@
|
|
1
1
|
cobweb/__init__.py,sha256=YdBi3uytEFRXan155xU1kKMpiUKUupO2RGeJyXmH0zk,129
|
2
|
-
cobweb/constant.py,sha256=
|
2
|
+
cobweb/constant.py,sha256=s6W4Fz3DhH-4RutoWnR2bylL8eU44rc-CeOsovj87I0,2839
|
3
3
|
cobweb/setting.py,sha256=rHPQfc4a1xMTbkt3_KXBfUomhYcbTXogsz7ew-QsqHw,1670
|
4
4
|
cobweb/base/__init__.py,sha256=NanSxJr0WsqjqCNOQAlxlkt-vQEsERHYBzacFC057oI,222
|
5
5
|
cobweb/base/common_queue.py,sha256=hYdaM70KrWjvACuLKaGhkI2VqFCnd87NVvWzmnfIg8Q,1423
|
@@ -8,7 +8,7 @@ cobweb/base/logger.py,sha256=Vsg1bD4LXW91VgY-ANsmaUu-mD88hU_WS83f7jX3qF8,2011
|
|
8
8
|
cobweb/base/request.py,sha256=MBYYjWpbRQRulPG0zPbK0DO3LKmScqQ4tBzFXekYkao,2652
|
9
9
|
cobweb/base/response.py,sha256=g8e5H0hEiRfqseh3nD7t6a1rhIJYRMV7nI47kqNOd-U,446
|
10
10
|
cobweb/base/seed.py,sha256=ddaWCq_KaWwpmPl1CToJlfCxEEnoJ16kjo6azJs9uls,5000
|
11
|
-
cobweb/base/task_queue.py,sha256=
|
11
|
+
cobweb/base/task_queue.py,sha256=2MqGpHGNmK5B-kqv7z420RWyihzB9zgDHJUiLsmtzOI,6402
|
12
12
|
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
13
13
|
cobweb/crawlers/crawler.py,sha256=ZZVZJ17RWuvzUFGLjqdvyVZpmuq-ynslJwXQzdm_UdQ,709
|
14
14
|
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
@@ -19,22 +19,22 @@ cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BX
|
|
19
19
|
cobweb/launchers/__init__.py,sha256=6_v2jd2sgj6YnOB1nPKiYBskuXVb5xpQnq2YaDGJgQ8,100
|
20
20
|
cobweb/launchers/distributor.py,sha256=I5QBs2hFiyGGkqLLkMw9uzf4_oRW2JvahNW9yc866cc,6748
|
21
21
|
cobweb/launchers/launcher.py,sha256=Shb6o6MAM38d32ybW2gY6qpGmhuiV7jo9TDh0f7rud8,5694
|
22
|
-
cobweb/launchers/uploader.py,sha256=
|
22
|
+
cobweb/launchers/uploader.py,sha256=QwJOmG7jq2T5sRzrT386zJ0YYNz-hAv0i6GOpoEaRdU,2075
|
23
23
|
cobweb/pipelines/__init__.py,sha256=rtkaaCZ4u1XcxpkDLHztETQjEcLZ_6DXTHjdfcJlyxQ,97
|
24
24
|
cobweb/pipelines/pipeline.py,sha256=OgSEZ2DdqofpZcer1Wj1tuBqn8OHVjrYQ5poqt75czQ,357
|
25
25
|
cobweb/pipelines/pipeline_csv.py,sha256=TFqxqgVUqkBF6Jott4zd6fvCSxzG67lpafRQtXPw1eg,807
|
26
26
|
cobweb/pipelines/pipeline_loghub.py,sha256=zwIa_pcWBB2UNGd32Cu-i1jKGNruTbo2STdxl1WGwZ0,1829
|
27
27
|
cobweb/schedulers/__init__.py,sha256=LEya11fdAv0X28YzbQTeC1LQZ156Fj4cyEMGqQHUWW0,49
|
28
28
|
cobweb/schedulers/scheduler.py,sha256=Of-BjbBh679R6glc12Kc8iugeERCSusP7jolpCc1UMI,1740
|
29
|
-
cobweb/schedulers/scheduler_with_redis.py,sha256=
|
29
|
+
cobweb/schedulers/scheduler_with_redis.py,sha256=dafg8jllatBiTz8J-qjTo80Xw80jkdHFW-wKoyaH7G0,7221
|
30
30
|
cobweb/utils/__init__.py,sha256=TRFJyyBjaQH_sejU6G_msOeHpjc3ZXU0dUOO5GQfknM,171
|
31
31
|
cobweb/utils/bloom.py,sha256=A8xqtHXp7jgRoBuUlpovmq8lhU5y7IEF0FOCjfQDb6s,1855
|
32
32
|
cobweb/utils/decorators.py,sha256=ZwVQlz-lYHgXgKf9KRCp15EWPzTDdhoikYUNUCIqNeM,1140
|
33
33
|
cobweb/utils/dotting.py,sha256=L-jGSApdnFIP4jUWH6p5qIme0aJ1vyDrxAx8wOJWvcs,1960
|
34
34
|
cobweb/utils/oss.py,sha256=wmToIIVNO8nCQVRmreVaZejk01aCWS35e1NV6cr0yGI,4192
|
35
35
|
cobweb/utils/tools.py,sha256=14TCedqt07m4z6bCnFAsITOFixeGr8V3aOKk--L7Cr0,879
|
36
|
-
cobweb_launcher-3.1.
|
37
|
-
cobweb_launcher-3.1.
|
38
|
-
cobweb_launcher-3.1.
|
39
|
-
cobweb_launcher-3.1.
|
40
|
-
cobweb_launcher-3.1.
|
36
|
+
cobweb_launcher-3.1.25.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
37
|
+
cobweb_launcher-3.1.25.dist-info/METADATA,sha256=QkLxxH-8qIdxnqsEB6W-dZjX4PtnoYqnCemFTXzgyNg,6051
|
38
|
+
cobweb_launcher-3.1.25.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
39
|
+
cobweb_launcher-3.1.25.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
40
|
+
cobweb_launcher-3.1.25.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|