cobweb-launcher 1.3.1__tar.gz → 1.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {cobweb-launcher-1.3.1/cobweb_launcher.egg-info → cobweb-launcher-1.3.3}/PKG-INFO +1 -1
  2. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/base/__init__.py +9 -3
  3. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/base/basic.py +78 -26
  4. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/crawlers/crawler.py +4 -4
  5. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/launchers/launcher.py +8 -6
  6. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/launchers/launcher_pro.py +9 -11
  7. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/pipelines/pipeline.py +2 -1
  8. cobweb-launcher-1.3.3/cobweb/schedulers/__init__.py +1 -0
  9. cobweb-launcher-1.3.3/cobweb/schedulers/scheduler_redis.py +69 -0
  10. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3/cobweb_launcher.egg-info}/PKG-INFO +1 -1
  11. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_launcher.egg-info/SOURCES.txt +2 -0
  12. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/setup.py +1 -1
  13. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/LICENSE +0 -0
  14. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/README.md +0 -0
  15. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/__init__.py +0 -0
  16. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/base/common_queue.py +0 -0
  17. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/base/dotting.py +0 -0
  18. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/base/item.py +0 -0
  19. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/base/log.py +0 -0
  20. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/base/request.py +0 -0
  21. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/base/response.py +0 -0
  22. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/base/seed.py +0 -0
  23. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/constant.py +0 -0
  24. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/crawlers/__init__.py +0 -0
  25. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/db/__init__.py +0 -0
  26. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/db/api_db.py +0 -0
  27. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/db/redis_db.py +0 -0
  28. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/exceptions/__init__.py +0 -0
  29. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/exceptions/oss_db_exception.py +0 -0
  30. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/launchers/__init__.py +0 -0
  31. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/launchers/launcher_air.py +0 -0
  32. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/launchers/launcher_api.py +0 -0
  33. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/pipelines/__init__.py +0 -0
  34. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/pipelines/pipeline_console.py +0 -0
  35. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/pipelines/pipeline_loghub.py +0 -0
  36. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/setting.py +0 -0
  37. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/utils/__init__.py +0 -0
  38. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/utils/bloom.py +0 -0
  39. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/utils/oss.py +0 -0
  40. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb/utils/tools.py +0 -0
  41. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/__init__.py +0 -0
  42. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/base/__init__.py +0 -0
  43. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/base/common_queue.py +0 -0
  44. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/base/decorators.py +0 -0
  45. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/base/item.py +0 -0
  46. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/base/log.py +0 -0
  47. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/base/request.py +0 -0
  48. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/base/response.py +0 -0
  49. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/base/seed.py +0 -0
  50. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/constant.py +0 -0
  51. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/crawlers/__init__.py +0 -0
  52. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/crawlers/crawler.py +0 -0
  53. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/db/__init__.py +0 -0
  54. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/db/api_db.py +0 -0
  55. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/db/redis_db.py +0 -0
  56. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/exceptions/__init__.py +0 -0
  57. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/exceptions/oss_db_exception.py +0 -0
  58. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/launchers/__init__.py +0 -0
  59. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/launchers/launcher.py +0 -0
  60. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/launchers/launcher_air.py +0 -0
  61. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/launchers/launcher_api.py +0 -0
  62. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/launchers/launcher_pro.py +0 -0
  63. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/pipelines/__init__.py +0 -0
  64. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/pipelines/pipeline.py +0 -0
  65. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/pipelines/pipeline_console.py +0 -0
  66. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/pipelines/pipeline_loghub.py +0 -0
  67. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/setting.py +0 -0
  68. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/utils/__init__.py +0 -0
  69. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/utils/bloom.py +0 -0
  70. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/utils/dotting.py +0 -0
  71. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/utils/oss.py +0 -0
  72. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_/utils/tools.py +0 -0
  73. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  74. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_launcher.egg-info/requires.txt +0 -0
  75. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/cobweb_launcher.egg-info/top_level.txt +0 -0
  76. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/setup.cfg +0 -0
  77. {cobweb-launcher-1.3.1 → cobweb-launcher-1.3.3}/test/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.3.1
3
+ Version: 1.3.3
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -66,11 +66,17 @@ class TaskQueue:
66
66
  except Exception as e:
67
67
  it.params.retry += 1
68
68
  if isinstance(it, Request):
69
- TaskQueue.REQUEST.push(it)
69
+ TaskQueue.DOWNLOAD.push(it)
70
70
  elif isinstance(it, Response):
71
71
  TaskQueue.RESPONSE.push(it)
72
72
  elif isinstance(it, Seed):
73
- TaskQueue.SEED.push(it)
73
+ TaskQueue.TODO.push(it)
74
+ elif isinstance(it, BaseItem):
75
+ TaskQueue.UPLOAD.push(it)
76
+ logger.info(
77
+ f"{crawler_func.__name__} failed: "
78
+ f"{''.join(traceback.format_exception(type(e), e, e.__traceback__))}"
79
+ )
74
80
  time.sleep(1)
75
81
 
76
82
 
@@ -95,7 +101,7 @@ class Decorators:
95
101
  def wrapper(self, *args, **kwargs):
96
102
  while not self.pause.is_set():
97
103
  try:
98
- func(self, *args, **kwargs)
104
+ func(self)
99
105
  except Exception as e:
100
106
  logger.info(f"{func.__name__}: " + str(e))
101
107
  finally:
@@ -15,11 +15,21 @@ class Params:
15
15
 
16
16
 
17
17
  class Seed:
18
+ __SEED_PARAMS__ = [
19
+ "retry",
20
+ "priority",
21
+ "version",
22
+ "status"
23
+ ]
18
24
 
19
25
  def __init__(
20
26
  self,
21
27
  seed,
22
- params = Params(),
28
+ sid=None,
29
+ retry=None,
30
+ priority=None,
31
+ version=None,
32
+ status=None,
23
33
  **kwargs
24
34
  ):
25
35
  if any(isinstance(seed, t) for t in (str, bytes)):
@@ -37,11 +47,27 @@ class Seed:
37
47
  f"seed: {seed}"
38
48
  ))
39
49
 
50
+ seed_params = {
51
+ "retry": retry,
52
+ "priority": priority,
53
+ "version": version,
54
+ "status": status,
55
+ }
56
+
40
57
  if kwargs:
58
+ # for k, v in kwargs.items():
59
+ # if k in seed_params.keys():
60
+ # seed_params[k] = v
61
+ # else:
62
+ # self.__setattr__(k, v)
41
63
  self._init_seed(kwargs)
42
- if not getattr(self, "sid", None):
43
- self._init_id()
44
- self.params = params or Params()
64
+ seed_params.update({
65
+ k: v for k, v in kwargs.items()
66
+ if k in self.__SEED_PARAMS__
67
+ })
68
+ if sid or not getattr(self, "sid", None):
69
+ self._init_id(sid)
70
+ self.params = Params(**seed_params)
45
71
 
46
72
  def __getattr__(self, name):
47
73
  return None
@@ -59,13 +85,14 @@ class Seed:
59
85
  chars = [f"{k}={v}" for k, v in self.__dict__.items()]
60
86
  return f'{self.__class__.__name__}({", ".join(chars)})'
61
87
 
62
- def _init_seed(self, seed_info:dict):
88
+ def _init_seed(self, seed_info: dict):
63
89
  for k, v in seed_info.items():
64
90
  if k not in self.__SEED_PARAMS__:
65
91
  self.__setattr__(k, v)
66
92
 
67
- def _init_id(self):
68
- sid = hashlib.md5(self.to_string.encode()).hexdigest()
93
+ def _init_id(self, sid):
94
+ if not sid:
95
+ sid = hashlib.md5(self.to_string.encode()).hexdigest()
69
96
  self.__setattr__("sid", sid)
70
97
 
71
98
  @property
@@ -85,16 +112,15 @@ class Seed:
85
112
 
86
113
  @property
87
114
  def seed(self):
88
- return self
115
+ return self.to_string
89
116
 
90
117
 
91
118
  class Request:
92
-
93
119
  __SEED_PARAMS__ = [
94
120
  "retry",
95
121
  "priority",
96
- "seed_version",
97
- "seed_status"
122
+ "version",
123
+ "status"
98
124
  ]
99
125
 
100
126
  __REQUEST_ATTRS__ = {
@@ -122,8 +148,8 @@ class Request:
122
148
  check_status_code=True,
123
149
  retry=None,
124
150
  priority=None,
125
- seed_version=None,
126
- seed_status=None,
151
+ version=None,
152
+ status=None,
127
153
  **kwargs
128
154
  ):
129
155
  self.url = url
@@ -133,10 +159,15 @@ class Request:
133
159
  seed_params = {
134
160
  "retry": retry,
135
161
  "priority": priority,
136
- "seed_version": seed_version,
137
- "seed_status": seed_status,
162
+ "version": version,
163
+ "status": status,
138
164
  }
139
165
 
166
+ if isinstance(seed, Seed):
167
+ kwargs.update(**seed.to_dict)
168
+ elif isinstance(seed, str):
169
+ kwargs.update(**json.loads(seed))
170
+
140
171
  for k, v in kwargs.items():
141
172
  if k in self.__class__.__REQUEST_ATTRS__:
142
173
  self.request_setting[k] = v
@@ -152,12 +183,7 @@ class Request:
152
183
  self._build_header()
153
184
 
154
185
  self.params = Params(**seed_params)
155
-
156
- if isinstance(seed, Seed):
157
- kwargs.update(**seed.to_dict)
158
- elif isinstance(seed, str):
159
- kwargs.update(**json.loads(seed))
160
- self.seed = self.to_string
186
+ # self.seed = self.to_string
161
187
 
162
188
  @property
163
189
  def _random_ua(self) -> str:
@@ -183,10 +209,19 @@ class Request:
183
209
  response.raise_for_status()
184
210
  return response
185
211
 
212
+ def __getattr__(self, name):
213
+ return None
214
+
215
+ def __setitem__(self, key, value):
216
+ setattr(self, key, value)
217
+
218
+ def __getitem__(self, item):
219
+ return getattr(self, item)
220
+
186
221
  @property
187
222
  def to_dict(self):
188
223
  _dict = self.__dict__.copy()
189
- _dict.pop('seed')
224
+ # _dict.pop('seed')
190
225
  _dict.pop('params')
191
226
  _dict.pop('check_status_code')
192
227
  # _dict.pop('request_setting')
@@ -200,6 +235,10 @@ class Request:
200
235
  separators=(",", ":")
201
236
  )
202
237
 
238
+ @property
239
+ def seed(self):
240
+ return self.to_string
241
+
203
242
 
204
243
  class Response:
205
244
 
@@ -209,8 +248,8 @@ class Response:
209
248
  response,
210
249
  retry=None,
211
250
  priority=None,
212
- seed_version=None,
213
- seed_status=None,
251
+ version=None,
252
+ status=None,
214
253
  **kwargs
215
254
  ):
216
255
  self.seed = seed
@@ -218,20 +257,24 @@ class Response:
218
257
  seed_params = {
219
258
  "retry": retry,
220
259
  "priority": priority,
221
- "seed_version": seed_version,
222
- "seed_status": seed_status,
260
+ "version": version,
261
+ "status": status,
223
262
  }
224
263
  for k, v in kwargs.items():
225
264
  if k in seed_params.keys():
226
265
  seed_params[k] = v
227
266
  else:
228
267
  self.__setattr__(k, v)
268
+ self.params = Params(**seed_params)
229
269
 
230
270
  @property
231
271
  def to_dict(self):
232
272
  _dict = self.__dict__.copy()
233
273
  _dict.pop('seed')
234
274
  _dict.pop('response')
275
+ _dict.pop('method')
276
+ _dict.pop('params')
277
+ _dict.pop('request_setting')
235
278
  return _dict
236
279
 
237
280
  @property
@@ -241,3 +284,12 @@ class Response:
241
284
  ensure_ascii=False,
242
285
  separators=(",", ":")
243
286
  )
287
+
288
+ def __getattr__(self, name):
289
+ return None
290
+
291
+ def __setitem__(self, key, value):
292
+ setattr(self, key, value)
293
+
294
+ def __getitem__(self, item):
295
+ return getattr(self, item)
@@ -3,7 +3,7 @@ import time
3
3
  import threading
4
4
  from typing import Union, Callable, Mapping
5
5
 
6
- import setting
6
+ from cobweb import setting
7
7
  from cobweb.base import (
8
8
  Seed,
9
9
  BaseItem,
@@ -14,7 +14,7 @@ from cobweb.base import (
14
14
  TaskQueue,
15
15
  logger
16
16
  )
17
- from constant import DealModel
17
+ from cobweb.constant import DealModel
18
18
 
19
19
 
20
20
  class Crawler(threading.Thread):
@@ -66,8 +66,8 @@ class Crawler(threading.Thread):
66
66
  def build_download_item(self):
67
67
  thread_sleep = 0.1
68
68
  if TaskQueue.RESPONSE.length >= self.download_queue_size:
69
- logger.info(f"download queue is full, sleep {thread_sleep}s")
70
69
  thread_sleep = 5
70
+ # logger.info(f"download queue is full, sleep {thread_sleep}s")
71
71
  elif request_info := TaskQueue.DOWNLOAD.pop():
72
72
  member, priority = request_info
73
73
  request_setting = json.loads(member)
@@ -79,7 +79,7 @@ class Crawler(threading.Thread):
79
79
  def build_parse_item(self):
80
80
  thread_sleep = 0.1
81
81
  if TaskQueue.UPLOAD.length >= self.upload_queue_size:
82
- logger.info(f"upload queue is full, sleep {thread_sleep}s")
82
+ # logger.info(f"upload queue is full, sleep {thread_sleep}s")
83
83
  thread_sleep = 5
84
84
  if response_item := TaskQueue.RESPONSE.pop():
85
85
  TaskQueue.process_task(response_item, self.parse)
@@ -137,10 +137,10 @@ class Launcher(threading.Thread):
137
137
 
138
138
  def _add_thread(self, func, num=1, obj=None, name=None, args=()):
139
139
  obj = obj or self
140
- name = obj.__class__.__name__ + name or func.__name__
140
+ name = obj.__class__.__name__ + ":" + (name or func.__name__)
141
141
  for i in range(num):
142
142
  func_name = name + "_" + str(i) if num > 1 else name
143
- self._threads.append(threading.Thread(name=func_name, target=func, args=(obj,) + args))
143
+ self._threads.append(threading.Thread(name=func_name, target=func, args=()))
144
144
 
145
145
  @Decorators.stop
146
146
  def _polling(self):
@@ -150,6 +150,10 @@ class Launcher(threading.Thread):
150
150
  if not self.task_model and run_time > self.before_scheduler_wait_seconds:
151
151
  logger.info("Done! ready to close thread...")
152
152
  self.stop.set()
153
+ elif TaskQueue.TODO.length or TaskQueue.DOWNLOAD.length:
154
+ logger.info(f"Recovery {self.task} task run!")
155
+ self.check_emtpy_times = 0
156
+ self.pause.clear()
153
157
  else:
154
158
  logger.info("pause! waiting for resume...")
155
159
  elif self.check_emtpy_times > 2:
@@ -164,10 +168,6 @@ class Launcher(threading.Thread):
164
168
  f"reset times {3 - self.check_emtpy_times}"
165
169
  )
166
170
  self.check_emtpy_times += 1
167
- elif TaskQueue.TODO.length:
168
- logger.info(f"Recovery {self.task} task run!")
169
- self.check_emtpy_times = 0
170
- self.pause.clear()
171
171
  else:
172
172
  logger.info(LogTemplate.launcher_polling.format(
173
173
  task=self.task,
@@ -179,6 +179,8 @@ class Launcher(threading.Thread):
179
179
  response_queue_len=TaskQueue.RESPONSE.length,
180
180
  done_queue_len=TaskQueue.DONE.length,
181
181
  upload_queue_len=TaskQueue.UPLOAD.length,
182
+ seed_queue_len=TaskQueue.SEED.length,
183
+ download_queue_len=TaskQueue.DOWNLOAD.length
182
184
  ))
183
185
  time.sleep(10)
184
186
 
@@ -1,7 +1,7 @@
1
1
  import time
2
2
 
3
- from base import TaskQueue, Decorators
4
- from schedulers.scheduler_redis import RedisScheduler
3
+ from cobweb.base import TaskQueue, Decorators
4
+ from cobweb.schedulers import RedisScheduler
5
5
  from .launcher import Launcher
6
6
 
7
7
 
@@ -21,21 +21,19 @@ class LauncherPro(Launcher):
21
21
  (TaskQueue.DOWNLOAD, self._redis_download, self.download_queue_size, self._task_info["download"]),
22
22
  ]:
23
23
  if q.length < size:
24
- for member, priority in self._scheduler.schedule(
25
- key, self.scheduling_size
26
- ):
24
+ for member, priority in self._scheduler.schedule(key, self.scheduling_size):
27
25
  q.push((member, priority), direct_insertion=True)
28
26
  self.add_working_item(key.split(":")[-1], member, priority)
29
27
  thread_sleep = 0.1
30
28
  time.sleep(thread_sleep)
31
29
 
32
- @Decorators.pause
30
+ @Decorators.stop
33
31
  def _heartbeat(self):
34
32
  if self._scheduler.working.is_set():
35
33
  self._scheduler.set_heartbeat()
36
34
  time.sleep(3)
37
35
 
38
- @Decorators.pause
36
+ @Decorators.stop
39
37
  def _reset(self):
40
38
  self._scheduler.reset(
41
39
  keys=[self._redis_todo, self._redis_download],
@@ -51,7 +49,7 @@ class LauncherPro(Launcher):
51
49
  (TaskQueue.REQUEST, self._redis_download, self.request_queue_size),
52
50
  ]:
53
51
  item_info = {}
54
- while item := q.pop() and len(item_info.keys()) < self.inserting_size:
52
+ while (item := q.pop()) and len(item_info.keys()) < self.inserting_size:
55
53
  item_info[item.seed] = item.params.priority
56
54
  if q.length >= size:
57
55
  thread_sleep = self.inserting_wait_time
@@ -72,9 +70,9 @@ class LauncherPro(Launcher):
72
70
  (TaskQueue.DONE, self._redis_download, self.done_queue_size),
73
71
  ]:
74
72
  items = []
75
- while item := q.pop() and len(items) < self.removing_size:
73
+ while (item := q.pop()) and len(items) < self.removing_size:
76
74
  items.append(item)
77
- self._scheduler.delete(key, *items)
75
+ self._scheduler.delete(key, items)
78
76
  self.remove_working_items(key.split(":")[-1], items)
79
77
  if q.length >= size:
80
78
  thread_sleep = 0.1
@@ -87,4 +85,4 @@ class LauncherPro(Launcher):
87
85
  self._add_thread(func=self._schedule)
88
86
  self._add_thread(func=self._insert)
89
87
  self._add_thread(func=self._remove)
90
- self._add_thread(func=self._polling)
88
+ # self._add_thread(func=self._polling)
@@ -30,7 +30,7 @@ class Pipeline(ABC):
30
30
  data_info, seeds = {}, []
31
31
  thread_sleep = self.upload_wait_time if TaskQueue.UPLOAD.length < self.upload_queue_size else 0.1
32
32
  try:
33
- while item := TaskQueue.UPLOAD.pop() and len(seeds) <= self.upload_queue_size:
33
+ while (item := TaskQueue.UPLOAD.pop()) and len(seeds) <= self.upload_queue_size:
34
34
  data = self.build(item)
35
35
  data_info.setdefault(item.table, []).append(data)
36
36
  seeds.append(item.seed)
@@ -39,6 +39,7 @@ class Pipeline(ABC):
39
39
  except Exception as e:
40
40
  logger.info(e)
41
41
  seeds = None
42
+ # todo: retry
42
43
  finally:
43
44
  TaskQueue.DONE.push(seeds)
44
45
 
@@ -0,0 +1 @@
1
+ from .scheduler_redis import RedisScheduler
@@ -0,0 +1,69 @@
1
+ import threading
2
+ import time
3
+
4
+ # from cobweb.base import Seed
5
+ from cobweb.db import RedisDB
6
+
7
+
8
+ class RedisScheduler:
9
+
10
+ def __init__(self, task, project, scheduler_wait_seconds=30):
11
+ self._todo_key = "{%s:%s}:todo" % (project, task)
12
+ self._download_key = "{%s:%s}:download" % (project, task)
13
+ self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
14
+ self._speed_control_key = "speed_control:%s_%s" % (project, task)
15
+ self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
16
+ self._db = RedisDB()
17
+
18
+ self.scheduler_wait_seconds = scheduler_wait_seconds
19
+ self.working = threading.Event()
20
+
21
+ @property
22
+ def heartbeat(self):
23
+ return self._db.exists(self._heartbeat_key)
24
+
25
+ def set_heartbeat(self):
26
+ return self._db.setex(self._heartbeat_key, 5)
27
+
28
+ def schedule(self, key, count):
29
+ if not self._db.zcount(key, 0, "(1000"):
30
+ time.sleep(self.scheduler_wait_seconds)
31
+ else:
32
+ source = int(time.time())
33
+ members = self._db.members(key, source, count=count, _min=0, _max="(1000")
34
+ for member, priority in members:
35
+ # seed = Seed(member, priority=priority)
36
+ yield member.decode(), priority
37
+
38
+ def insert(self, key, items):
39
+ if items:
40
+ self._db.zadd(key, items, nx=True)
41
+
42
+ def reset(self, keys, reset_time=30):
43
+ if self._db.lock(self._reset_lock_key, t=120):
44
+
45
+ if isinstance(keys, str):
46
+ keys = [keys]
47
+
48
+ _min = reset_time - int(time.time()) if self.heartbeat else "-inf"
49
+
50
+ for key in keys:
51
+ self._db.members(key, 0, _min=_min, _max="(0")
52
+
53
+ if not self.heartbeat:
54
+ self.working.set()
55
+ time.sleep(10)
56
+
57
+ self._db.delete(self._reset_lock_key)
58
+
59
+ def refresh(self, key, items: dict[str, int]):
60
+ refresh_time = int(time.time())
61
+ its = {k: -refresh_time - v / 1000 for k, v in items}
62
+ self._db.zadd(key, item=its, xx=True)
63
+
64
+ def delete(self, key, values):
65
+ self._db.zrem(key, *values)
66
+
67
+
68
+
69
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.3.1
3
+ Version: 1.3.3
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -29,6 +29,8 @@ cobweb/pipelines/__init__.py
29
29
  cobweb/pipelines/pipeline.py
30
30
  cobweb/pipelines/pipeline_console.py
31
31
  cobweb/pipelines/pipeline_loghub.py
32
+ cobweb/schedulers/__init__.py
33
+ cobweb/schedulers/scheduler_redis.py
32
34
  cobweb/utils/__init__.py
33
35
  cobweb/utils/bloom.py
34
36
  cobweb/utils/oss.py
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="1.3.1",
8
+ version="1.3.3",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",
File without changes