cobweb-launcher 3.1.23__py3-none-any.whl → 3.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cobweb/base/task_queue.py CHANGED
@@ -10,8 +10,8 @@ class Status(Enum):
10
10
  PENDING = 0 # 待处理
11
11
  PROCESSING = 1 # 处理中
12
12
  FINISHED = 2 # 已完成
13
- INSERT = 3 # 失败
14
- UPLOAD = 4 # 过期
13
+ INSERT = 3 # 新增
14
+ UPLOAD = 4 # 上传
15
15
 
16
16
 
17
17
  @dataclass
@@ -138,9 +138,10 @@ class TaskQueue:
138
138
  if data:
139
139
  task_item.data = data
140
140
 
141
- for tid in task_item.children_ids:
142
- if self._tasks[tid].status == Status.INSERT:
143
- del self._tasks[tid]
141
+ if task_item.status != Status.FINISHED:
142
+ for tid in task_item.children_ids:
143
+ if self._tasks[tid].status == Status.INSERT:
144
+ del self._tasks[tid]
144
145
 
145
146
  task_item.children_ids = []
146
147
  self._tasks[task_id] = task_item
@@ -176,4 +177,4 @@ class TaskQueue:
176
177
  # expired_ids.append(seed_id)
177
178
  # for seed_id in expired_ids:
178
179
  # self._seeds[seed_id] = self._seeds[seed_id]._replace(status=SeedStatus.EXPIRED)
179
- # print(f"清理了 {len(expired_ids)} 个过期种子")
180
+ # print(f"清理了 {len(expired_ids)} 个过期种子")
cobweb/constant.py CHANGED
@@ -51,7 +51,7 @@ class LogTemplate:
51
51
  launcher_pro_polling = """
52
52
  ----------------------- start - 轮训日志: {task} -----------------
53
53
  内存队列
54
- 种子数: {doing_len}
54
+ 消费中: {doing_len}
55
55
  待消费: {todo_len}
56
56
  已消费: {done_len}
57
57
  redis队列
@@ -39,45 +39,28 @@ class Uploader(threading.Thread):
39
39
 
40
40
  @check_pause
41
41
  def upload_data(self):
42
- if task_list := self.task_queue.get_task_by_status(
43
- status=Status.UPLOAD, limit=self.upload_size
44
- ):
45
- try:
46
- data_info, task_ids = dict(), set()
42
+ try:
43
+ data_info, task_ids = dict(), set()
44
+ if task_list := self.task_queue.get_task_by_status(
45
+ status=Status.UPLOAD, limit=self.upload_size
46
+ ):
47
47
  for task_item in task_list:
48
48
  upload_data = self.pipeline.build(task_item.data)
49
49
  data_info.setdefault(task_item.data.table, []).append(upload_data)
50
+ task_ids.add(task_item.task_id)
50
51
 
51
52
  for table, datas in data_info.items():
52
53
  try:
53
54
  self.pipeline.upload(table, datas)
54
55
  except Exception as e:
55
56
  logger.info(e)
56
- except Exception as e:
57
- logger.info(e)
58
-
59
- if self.task_queue.status_length(status=Status.UPLOAD) < self.upload_size:
60
- time.sleep(self.wait_seconds)
61
-
62
- # data_info, seeds = {}, []
63
- # try:
64
- # for _ in range(self.upload_size):
65
- # item = self.upload.pop()
66
- # if not item:
67
- # break
68
- # # seeds.append(item.seed)
69
- # data = self.pipeline.build(item)
70
- # data_info.setdefault(item.table, []).append(data)
71
- # for table, datas in data_info.items():
72
- # try:
73
- # self.pipeline.upload(table, datas)
74
- # # TODO: 上传打点
75
- # except Exception as e:
76
- # logger.info(e)
77
- # except Exception as e:
78
- # logger.info(e)
79
- # if self.upload.length < self.upload_size:
80
- # time.sleep(self.wait_seconds)
57
+
58
+ self.task_queue.remove(task_ids)
59
+ except Exception as e:
60
+ logger.info(e)
61
+
62
+ if self.task_queue.status_length(status=Status.UPLOAD) < self.upload_size:
63
+ time.sleep(self.wait_seconds)
81
64
 
82
65
  def run(self):
83
66
  self.callback_register(self.upload_data, tag="Uploader")
@@ -52,7 +52,8 @@ class RedisScheduler(Scheduler):
52
52
  time.sleep(self.scheduler_wait_seconds)
53
53
  return
54
54
 
55
- if self.task_queue.status_length(Status.PENDING) >= self.todo_queue_size:
55
+ if self.task_queue.status_length(Status.PENDING) >= self.todo_queue_size\
56
+ or self.task_queue.length() > 5 * self.todo_queue_size:
56
57
  time.sleep(self.todo_queue_full_wait_seconds)
57
58
  return
58
59
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 3.1.23
3
+ Version: 3.1.25
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -177,12 +177,16 @@ app.start()
177
177
  > &nbsp;&nbsp;&nbsp;&nbsp;upload_item["text"] = item.response.text
178
178
  > &nbsp;&nbsp;&nbsp;&nbsp;yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
179
179
  ## todo
180
- - 队列优化完善,使用queue的机制wait()同步各模块执行?
181
- - 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
182
- - 去重过滤(布隆过滤器等)
183
- - 单机防丢失
180
+ - [ ] 队列优化完善,使用queue的机制wait()同步各模块执行?
181
+ - [x] 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
182
+ - [ ] 去重过滤(布隆过滤器等)
183
+ - [ ] 请求检验
184
+ - [ ] 异常回调
185
+ - [ ] 失败回调
184
186
 
185
187
  > 未更新流程图!!!
186
188
  ![img.png](https://image-luyuan.oss-cn-hangzhou.aliyuncs.com/image/D2388CDC-B9E5-4CE4-9F2C-7D173763B6A8.png)
187
189
 
188
190
 
191
+
192
+
@@ -1,5 +1,5 @@
1
1
  cobweb/__init__.py,sha256=YdBi3uytEFRXan155xU1kKMpiUKUupO2RGeJyXmH0zk,129
2
- cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
2
+ cobweb/constant.py,sha256=s6W4Fz3DhH-4RutoWnR2bylL8eU44rc-CeOsovj87I0,2839
3
3
  cobweb/setting.py,sha256=rHPQfc4a1xMTbkt3_KXBfUomhYcbTXogsz7ew-QsqHw,1670
4
4
  cobweb/base/__init__.py,sha256=NanSxJr0WsqjqCNOQAlxlkt-vQEsERHYBzacFC057oI,222
5
5
  cobweb/base/common_queue.py,sha256=hYdaM70KrWjvACuLKaGhkI2VqFCnd87NVvWzmnfIg8Q,1423
@@ -8,7 +8,7 @@ cobweb/base/logger.py,sha256=Vsg1bD4LXW91VgY-ANsmaUu-mD88hU_WS83f7jX3qF8,2011
8
8
  cobweb/base/request.py,sha256=MBYYjWpbRQRulPG0zPbK0DO3LKmScqQ4tBzFXekYkao,2652
9
9
  cobweb/base/response.py,sha256=g8e5H0hEiRfqseh3nD7t6a1rhIJYRMV7nI47kqNOd-U,446
10
10
  cobweb/base/seed.py,sha256=ddaWCq_KaWwpmPl1CToJlfCxEEnoJ16kjo6azJs9uls,5000
11
- cobweb/base/task_queue.py,sha256=3ScPKnjlPEuuCzWyG9D2iHiND3L9lLM7fo1LNOkw8CY,6337
11
+ cobweb/base/task_queue.py,sha256=2MqGpHGNmK5B-kqv7z420RWyihzB9zgDHJUiLsmtzOI,6402
12
12
  cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
13
13
  cobweb/crawlers/crawler.py,sha256=ZZVZJ17RWuvzUFGLjqdvyVZpmuq-ynslJwXQzdm_UdQ,709
14
14
  cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
@@ -19,22 +19,22 @@ cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BX
19
19
  cobweb/launchers/__init__.py,sha256=6_v2jd2sgj6YnOB1nPKiYBskuXVb5xpQnq2YaDGJgQ8,100
20
20
  cobweb/launchers/distributor.py,sha256=I5QBs2hFiyGGkqLLkMw9uzf4_oRW2JvahNW9yc866cc,6748
21
21
  cobweb/launchers/launcher.py,sha256=Shb6o6MAM38d32ybW2gY6qpGmhuiV7jo9TDh0f7rud8,5694
22
- cobweb/launchers/uploader.py,sha256=dDBv6Vfy1ciaTAJA3TebJV-2oM3OMrqTfzpNX8VGv-0,2766
22
+ cobweb/launchers/uploader.py,sha256=QwJOmG7jq2T5sRzrT386zJ0YYNz-hAv0i6GOpoEaRdU,2075
23
23
  cobweb/pipelines/__init__.py,sha256=rtkaaCZ4u1XcxpkDLHztETQjEcLZ_6DXTHjdfcJlyxQ,97
24
24
  cobweb/pipelines/pipeline.py,sha256=OgSEZ2DdqofpZcer1Wj1tuBqn8OHVjrYQ5poqt75czQ,357
25
25
  cobweb/pipelines/pipeline_csv.py,sha256=TFqxqgVUqkBF6Jott4zd6fvCSxzG67lpafRQtXPw1eg,807
26
26
  cobweb/pipelines/pipeline_loghub.py,sha256=zwIa_pcWBB2UNGd32Cu-i1jKGNruTbo2STdxl1WGwZ0,1829
27
27
  cobweb/schedulers/__init__.py,sha256=LEya11fdAv0X28YzbQTeC1LQZ156Fj4cyEMGqQHUWW0,49
28
28
  cobweb/schedulers/scheduler.py,sha256=Of-BjbBh679R6glc12Kc8iugeERCSusP7jolpCc1UMI,1740
29
- cobweb/schedulers/scheduler_with_redis.py,sha256=SUiEjYhzbbzc5kt_zpK8bXaEjIpwqC-JBk8ApHcVa18,7149
29
+ cobweb/schedulers/scheduler_with_redis.py,sha256=dafg8jllatBiTz8J-qjTo80Xw80jkdHFW-wKoyaH7G0,7221
30
30
  cobweb/utils/__init__.py,sha256=TRFJyyBjaQH_sejU6G_msOeHpjc3ZXU0dUOO5GQfknM,171
31
31
  cobweb/utils/bloom.py,sha256=A8xqtHXp7jgRoBuUlpovmq8lhU5y7IEF0FOCjfQDb6s,1855
32
32
  cobweb/utils/decorators.py,sha256=ZwVQlz-lYHgXgKf9KRCp15EWPzTDdhoikYUNUCIqNeM,1140
33
33
  cobweb/utils/dotting.py,sha256=L-jGSApdnFIP4jUWH6p5qIme0aJ1vyDrxAx8wOJWvcs,1960
34
34
  cobweb/utils/oss.py,sha256=wmToIIVNO8nCQVRmreVaZejk01aCWS35e1NV6cr0yGI,4192
35
35
  cobweb/utils/tools.py,sha256=14TCedqt07m4z6bCnFAsITOFixeGr8V3aOKk--L7Cr0,879
36
- cobweb_launcher-3.1.23.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
37
- cobweb_launcher-3.1.23.dist-info/METADATA,sha256=QwwqDS7cSVmiivRXj_Kgu2BZW527APBQ-Qe6frnjIls,5998
38
- cobweb_launcher-3.1.23.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
39
- cobweb_launcher-3.1.23.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
40
- cobweb_launcher-3.1.23.dist-info/RECORD,,
36
+ cobweb_launcher-3.1.25.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
37
+ cobweb_launcher-3.1.25.dist-info/METADATA,sha256=QkLxxH-8qIdxnqsEB6W-dZjX4PtnoYqnCemFTXzgyNg,6051
38
+ cobweb_launcher-3.1.25.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
39
+ cobweb_launcher-3.1.25.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
40
+ cobweb_launcher-3.1.25.dist-info/RECORD,,