cobweb-launcher 3.1.23__tar.gz → 3.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cobweb-launcher-3.1.23/cobweb_launcher.egg-info → cobweb-launcher-3.1.25}/PKG-INFO +9 -5
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/README.md +8 -4
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/base/task_queue.py +7 -6
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/constant.py +1 -1
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/launchers/uploader.py +13 -30
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/schedulers/scheduler_with_redis.py +2 -1
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25/cobweb_launcher.egg-info}/PKG-INFO +9 -5
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/setup.py +1 -1
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/LICENSE +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/__init__.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/base/item.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/base/logger.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/base/request.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/base/response.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/base/seed.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/crawlers/crawler.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/db/api_db.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/db/redis_db.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/launchers/distributor.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/launchers/launcher.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/pipelines/pipeline_csv.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/schedulers/__init__.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/schedulers/scheduler.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/setting.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/utils/bloom.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/utils/decorators.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/utils/dotting.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cobweb-launcher
|
3
|
-
Version: 3.1.
|
3
|
+
Version: 3.1.25
|
4
4
|
Summary: spider_hole
|
5
5
|
Home-page: https://github.com/Juannie-PP/cobweb
|
6
6
|
Author: Juannie-PP
|
@@ -174,12 +174,16 @@ app.start()
|
|
174
174
|
> upload_item["text"] = item.response.text
|
175
175
|
> yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
|
176
176
|
## todo
|
177
|
-
- 队列优化完善,使用queue的机制wait()同步各模块执行?
|
178
|
-
- 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
179
|
-
- 去重过滤(布隆过滤器等)
|
180
|
-
-
|
177
|
+
- [ ] 队列优化完善,使用queue的机制wait()同步各模块执行?
|
178
|
+
- [x] 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
179
|
+
- [ ] 去重过滤(布隆过滤器等)
|
180
|
+
- [ ] 请求检验
|
181
|
+
- [ ] 异常回调
|
182
|
+
- [ ] 失败回调
|
181
183
|
|
182
184
|
> 未更新流程图!!!
|
183
185
|

|
184
186
|
|
185
187
|
|
188
|
+
|
189
|
+
|
@@ -159,10 +159,14 @@ app.start()
|
|
159
159
|
> upload_item["text"] = item.response.text
|
160
160
|
> yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
|
161
161
|
## todo
|
162
|
-
- 队列优化完善,使用queue的机制wait()同步各模块执行?
|
163
|
-
- 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
164
|
-
- 去重过滤(布隆过滤器等)
|
165
|
-
-
|
162
|
+
- [ ] 队列优化完善,使用queue的机制wait()同步各模块执行?
|
163
|
+
- [x] 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
164
|
+
- [ ] 去重过滤(布隆过滤器等)
|
165
|
+
- [ ] 请求检验
|
166
|
+
- [ ] 异常回调
|
167
|
+
- [ ] 失败回调
|
166
168
|
|
167
169
|
> 未更新流程图!!!
|
168
170
|

|
171
|
+
|
172
|
+
|
@@ -10,8 +10,8 @@ class Status(Enum):
|
|
10
10
|
PENDING = 0 # 待处理
|
11
11
|
PROCESSING = 1 # 处理中
|
12
12
|
FINISHED = 2 # 已完成
|
13
|
-
INSERT = 3 #
|
14
|
-
UPLOAD = 4 #
|
13
|
+
INSERT = 3 # 新增
|
14
|
+
UPLOAD = 4 # 上传
|
15
15
|
|
16
16
|
|
17
17
|
@dataclass
|
@@ -138,9 +138,10 @@ class TaskQueue:
|
|
138
138
|
if data:
|
139
139
|
task_item.data = data
|
140
140
|
|
141
|
-
|
142
|
-
|
143
|
-
|
141
|
+
if task_item.status != Status.FINISHED:
|
142
|
+
for tid in task_item.children_ids:
|
143
|
+
if self._tasks[tid].status == Status.INSERT:
|
144
|
+
del self._tasks[tid]
|
144
145
|
|
145
146
|
task_item.children_ids = []
|
146
147
|
self._tasks[task_id] = task_item
|
@@ -176,4 +177,4 @@ class TaskQueue:
|
|
176
177
|
# expired_ids.append(seed_id)
|
177
178
|
# for seed_id in expired_ids:
|
178
179
|
# self._seeds[seed_id] = self._seeds[seed_id]._replace(status=SeedStatus.EXPIRED)
|
179
|
-
# print(f"清理了 {len(expired_ids)} 个过期种子")
|
180
|
+
# print(f"清理了 {len(expired_ids)} 个过期种子")
|
@@ -39,45 +39,28 @@ class Uploader(threading.Thread):
|
|
39
39
|
|
40
40
|
@check_pause
|
41
41
|
def upload_data(self):
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
try:
|
43
|
+
data_info, task_ids = dict(), set()
|
44
|
+
if task_list := self.task_queue.get_task_by_status(
|
45
|
+
status=Status.UPLOAD, limit=self.upload_size
|
46
|
+
):
|
47
47
|
for task_item in task_list:
|
48
48
|
upload_data = self.pipeline.build(task_item.data)
|
49
49
|
data_info.setdefault(task_item.data.table, []).append(upload_data)
|
50
|
+
task_ids.add(task_item.task_id)
|
50
51
|
|
51
52
|
for table, datas in data_info.items():
|
52
53
|
try:
|
53
54
|
self.pipeline.upload(table, datas)
|
54
55
|
except Exception as e:
|
55
56
|
logger.info(e)
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
# try:
|
64
|
-
# for _ in range(self.upload_size):
|
65
|
-
# item = self.upload.pop()
|
66
|
-
# if not item:
|
67
|
-
# break
|
68
|
-
# # seeds.append(item.seed)
|
69
|
-
# data = self.pipeline.build(item)
|
70
|
-
# data_info.setdefault(item.table, []).append(data)
|
71
|
-
# for table, datas in data_info.items():
|
72
|
-
# try:
|
73
|
-
# self.pipeline.upload(table, datas)
|
74
|
-
# # TODO: 上传打点
|
75
|
-
# except Exception as e:
|
76
|
-
# logger.info(e)
|
77
|
-
# except Exception as e:
|
78
|
-
# logger.info(e)
|
79
|
-
# if self.upload.length < self.upload_size:
|
80
|
-
# time.sleep(self.wait_seconds)
|
57
|
+
|
58
|
+
self.task_queue.remove(task_ids)
|
59
|
+
except Exception as e:
|
60
|
+
logger.info(e)
|
61
|
+
|
62
|
+
if self.task_queue.status_length(status=Status.UPLOAD) < self.upload_size:
|
63
|
+
time.sleep(self.wait_seconds)
|
81
64
|
|
82
65
|
def run(self):
|
83
66
|
self.callback_register(self.upload_data, tag="Uploader")
|
@@ -52,7 +52,8 @@ class RedisScheduler(Scheduler):
|
|
52
52
|
time.sleep(self.scheduler_wait_seconds)
|
53
53
|
return
|
54
54
|
|
55
|
-
if self.task_queue.status_length(Status.PENDING) >= self.todo_queue_size
|
55
|
+
if self.task_queue.status_length(Status.PENDING) >= self.todo_queue_size\
|
56
|
+
or self.task_queue.length() > 5 * self.todo_queue_size:
|
56
57
|
time.sleep(self.todo_queue_full_wait_seconds)
|
57
58
|
return
|
58
59
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cobweb-launcher
|
3
|
-
Version: 3.1.
|
3
|
+
Version: 3.1.25
|
4
4
|
Summary: spider_hole
|
5
5
|
Home-page: https://github.com/Juannie-PP/cobweb
|
6
6
|
Author: Juannie-PP
|
@@ -174,12 +174,16 @@ app.start()
|
|
174
174
|
> upload_item["text"] = item.response.text
|
175
175
|
> yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
|
176
176
|
## todo
|
177
|
-
- 队列优化完善,使用queue的机制wait()同步各模块执行?
|
178
|
-
- 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
179
|
-
- 去重过滤(布隆过滤器等)
|
180
|
-
-
|
177
|
+
- [ ] 队列优化完善,使用queue的机制wait()同步各模块执行?
|
178
|
+
- [x] 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
179
|
+
- [ ] 去重过滤(布隆过滤器等)
|
180
|
+
- [ ] 请求检验
|
181
|
+
- [ ] 异常回调
|
182
|
+
- [ ] 失败回调
|
181
183
|
|
182
184
|
> 未更新流程图!!!
|
183
185
|

|
184
186
|
|
185
187
|
|
188
|
+
|
189
|
+
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cobweb-launcher-3.1.23 → cobweb-launcher-3.1.25}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|