cobweb-launcher 1.2.25__py3-none-any.whl → 1.2.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cobweb-launcher might be problematic. Click here for more details.

Files changed (44) hide show
  1. cobweb/crawlers/crawler.py +14 -1
  2. cobweb/launchers/launcher.py +3 -0
  3. cobweb/utils/__init__.py +1 -0
  4. cobweb/utils/dotting.py +28 -0
  5. {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-1.2.27.dist-info}/METADATA +1 -1
  6. cobweb_launcher-1.2.27.dist-info/RECORD +74 -0
  7. cobweb_launcher-1.2.27.dist-info/top_level.txt +2 -0
  8. cobweb_new/__init__.py +2 -0
  9. cobweb_new/base/__init__.py +72 -0
  10. cobweb_new/base/common_queue.py +53 -0
  11. cobweb_new/base/decorators.py +72 -0
  12. cobweb_new/base/item.py +46 -0
  13. cobweb_new/base/log.py +94 -0
  14. cobweb_new/base/request.py +82 -0
  15. cobweb_new/base/response.py +23 -0
  16. cobweb_new/base/seed.py +118 -0
  17. cobweb_new/constant.py +105 -0
  18. cobweb_new/crawlers/__init__.py +1 -0
  19. cobweb_new/crawlers/crawler-new.py +85 -0
  20. cobweb_new/crawlers/crawler.py +170 -0
  21. cobweb_new/db/__init__.py +2 -0
  22. cobweb_new/db/api_db.py +82 -0
  23. cobweb_new/db/redis_db.py +158 -0
  24. cobweb_new/exceptions/__init__.py +1 -0
  25. cobweb_new/exceptions/oss_db_exception.py +28 -0
  26. cobweb_new/launchers/__init__.py +3 -0
  27. cobweb_new/launchers/launcher.py +237 -0
  28. cobweb_new/launchers/launcher_air.py +88 -0
  29. cobweb_new/launchers/launcher_api.py +161 -0
  30. cobweb_new/launchers/launcher_pro.py +96 -0
  31. cobweb_new/launchers/tesss.py +47 -0
  32. cobweb_new/pipelines/__init__.py +3 -0
  33. cobweb_new/pipelines/pipeline.py +68 -0
  34. cobweb_new/pipelines/pipeline_console.py +22 -0
  35. cobweb_new/pipelines/pipeline_loghub.py +34 -0
  36. cobweb_new/setting.py +95 -0
  37. cobweb_new/utils/__init__.py +5 -0
  38. cobweb_new/utils/bloom.py +58 -0
  39. cobweb_new/utils/oss.py +94 -0
  40. cobweb_new/utils/tools.py +42 -0
  41. cobweb_launcher-1.2.25.dist-info/RECORD +0 -40
  42. cobweb_launcher-1.2.25.dist-info/top_level.txt +0 -1
  43. {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-1.2.27.dist-info}/LICENSE +0 -0
  44. {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-1.2.27.dist-info}/WHEEL +0 -0
@@ -15,12 +15,15 @@ from cobweb.base import (
15
15
  ConsoleItem,
16
16
  logger
17
17
  )
18
+ from cobweb.utils import LoghubDot
18
19
 
19
20
 
20
21
  class Crawler(threading.Thread):
21
22
 
22
23
  def __init__(
23
24
  self,
25
+ task: str,
26
+ project: str,
24
27
  stop: threading.Event,
25
28
  pause: threading.Event,
26
29
  # launcher_queue: Union[Mapping[str, Queue]],
@@ -35,7 +38,8 @@ class Crawler(threading.Thread):
35
38
  time_sleep: int,
36
39
  ):
37
40
  super().__init__()
38
-
41
+ self.task = task
42
+ self.project = project
39
43
  self._stop = stop
40
44
  self._pause = pause
41
45
  self._get_seed = get_seed
@@ -52,6 +56,8 @@ class Crawler(threading.Thread):
52
56
  self.time_sleep = time_sleep
53
57
  self.max_retries = max_retries
54
58
 
59
+ self.loghub_dot = LoghubDot()
60
+
55
61
  @staticmethod
56
62
  def request(seed: Seed) -> Union[Request, BaseItem]:
57
63
  yield Request(seed.url, seed, timeout=5)
@@ -115,6 +121,7 @@ class Crawler(threading.Thread):
115
121
 
116
122
  if isinstance(request_item, Request):
117
123
  iterator_status = False
124
+ start_time = time.time()
118
125
  download_iterators = self.download(request_item)
119
126
  if not isgenerator(download_iterators):
120
127
  raise TypeError("download function isn't a generator")
@@ -132,6 +139,12 @@ class Crawler(threading.Thread):
132
139
  status=download_item.response,
133
140
  response=LogTemplate.log_info(download_item.to_dict)
134
141
  ))
142
+ if isinstance(download_item, Response):
143
+ end_time = time.time()
144
+ self.loghub_dot.build(topic=f"{self.project}:{self.task}", data={
145
+ "cost_time": end_time - start_time,
146
+ **download_item.to_dict
147
+ })
135
148
  parse_iterators = self.parse(download_item)
136
149
  if not isgenerator(parse_iterators):
137
150
  raise TypeError("parse function isn't a generator")
@@ -4,6 +4,8 @@ import threading
4
4
  import importlib
5
5
  from functools import wraps
6
6
 
7
+ from mypyc.doc.conf import project
8
+
7
9
  from cobweb import setting
8
10
  from cobweb.base import Seed, Queue, logger
9
11
  from cobweb.utils.tools import dynamic_load_class
@@ -186,6 +188,7 @@ class Launcher(threading.Thread):
186
188
  self.start_seeds()
187
189
 
188
190
  self._Crawler(
191
+ task=self.task, project=self.project,
189
192
  stop=self._stop, pause=self._pause,
190
193
  # launcher_queue=self.__LAUNCHER_QUEUE__,
191
194
  get_seed=self._get_seed,
cobweb/utils/__init__.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from .oss import OssUtil
2
2
  from .tools import *
3
3
  from .bloom import BloomFilter
4
+ from .dotting import LoghubDot
4
5
 
@@ -0,0 +1,28 @@
1
+ import json
2
+
3
+ from aliyun.log import LogClient, PutLogsRequest, LogItem
4
+
5
+ import setting
6
+
7
+
8
+ class LoghubDot:
9
+
10
+ def __init__(self, *args, **kwargs):
11
+ super().__init__(*args, **kwargs)
12
+ self.client = LogClient(**setting.LOGHUB_CONFIG)
13
+
14
+ def build(self, topic, data):
15
+ log_item = LogItem()
16
+ for key, value in data.items():
17
+ if not isinstance(value, str):
18
+ data[key] = json.dumps(value, ensure_ascii=False)
19
+ contents = sorted(data.items())
20
+ log_item.set_contents(contents)
21
+ request = PutLogsRequest(
22
+ project=setting.LOGHUB_PROJECT,
23
+ logstore="cobweb_log",
24
+ topic=topic,
25
+ logitems=contents,
26
+ compress=True
27
+ )
28
+ self.client.put_logs(request=request)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.25
3
+ Version: 1.2.27
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -0,0 +1,74 @@
1
+ cobweb/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
2
+ cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
3
+ cobweb/setting.py,sha256=47HZsw40HLpsmOmvij1lyQALPQQCN_tWlKZ0wbn2MtM,2216
4
+ cobweb/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
5
+ cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
6
+ cobweb/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
7
+ cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
8
+ cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
9
+ cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
10
+ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
11
+ cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
12
+ cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
13
+ cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
14
+ cobweb/crawlers/crawler.py,sha256=zQbItrLECjDYoI4KXHMA-Mu8YXY6lm6N8NIw9cjKS3w,6988
15
+ cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
16
+ cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
17
+ cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
18
+ cobweb/db/redis_db.py,sha256=fumNZJiio-uQqRcSrymx8eJ1PqsdOwITe_Y-9JOXxrQ,4298
19
+ cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
20
+ cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
21
+ cobweb/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
22
+ cobweb/launchers/launcher.py,sha256=bXPPS6wx3Si05f2arf_9S4XqE3HFrDA-lhAX7tLZEqo,7064
23
+ cobweb/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
24
+ cobweb/launchers/launcher_api.py,sha256=Ih8f5xDcFlGBn6VSnlrpxcchMB48ugsj2NTWYgGYWfY,8669
25
+ cobweb/launchers/launcher_pro.py,sha256=NBJstQuB0o_jMiySJ14lk0Y3WAxxiScaQvXa1qtTSo4,8683
26
+ cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
27
+ cobweb/pipelines/base_pipeline.py,sha256=fYnWf79GmhufXpcnMa3te18SbmnVeYLwxfyo-zLd9CY,1577
28
+ cobweb/pipelines/loghub_pipeline.py,sha256=cjPO6w6UJ0jNw2fVvdX0BCdlm58T7dmYXlxzXOBpvfY,1027
29
+ cobweb/pipelines/pipeline.py,sha256=4TJLX0sUHRxYndF5A4Vs5btUGI-wigkOcFvhTW1hLXI,2009
30
+ cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
31
+ cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
32
+ cobweb/utils/__init__.py,sha256=Ev2LZZ1-S56iQYDqFZrqadizEv4Gk8Of-DraH-_WnKY,109
33
+ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
34
+ cobweb/utils/dotting.py,sha256=0yo0KjjGAU2fb3VLCGKhHwdOzW4QtZ4SDqUcX13tetI,791
35
+ cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
36
+ cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
37
+ cobweb_new/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
38
+ cobweb_new/constant.py,sha256=wy2bLpGZrl1MtgVv-Z1Tmtj5uWh-KGzDrrGKdVStxV4,3236
39
+ cobweb_new/setting.py,sha256=Ya3X4HbvDfSmMF2kSJwaaP1naxrWETTFW88T11agP7k,3035
40
+ cobweb_new/base/__init__.py,sha256=L74KN3qZn6s33EXyxQ_vB3FF8mA7pZJd_ekkWiUKd5Y,2229
41
+ cobweb_new/base/common_queue.py,sha256=Po6yY8HqpC6Wt6csd3Co3lBd7ygN2vmEECczgyc_sM8,1292
42
+ cobweb_new/base/decorators.py,sha256=8VDpANSIhxhrFnwgQzAxM_8ZyDXKdn3zTH0oZIXqRPE,1801
43
+ cobweb_new/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
44
+ cobweb_new/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
45
+ cobweb_new/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
46
+ cobweb_new/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
47
+ cobweb_new/base/seed.py,sha256=KBVxVU4jMB6oiw8HPtu-nDUVUZ6jiTjzR917jTYGCZs,2977
48
+ cobweb_new/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
49
+ cobweb_new/crawlers/crawler-new.py,sha256=TAYMH2E3BTkjU6bFLlIMVfsR3cV2ggjA0moUpaXOe1Y,2762
50
+ cobweb_new/crawlers/crawler.py,sha256=xiFNM0t69f5xlm59hPbO2MpqtdirVAUhD84-CLpyHPM,6349
51
+ cobweb_new/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
52
+ cobweb_new/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
53
+ cobweb_new/db/redis_db.py,sha256=FvMzckJtmhwKhZqKoS23iXmJti5P2dnMVD5rJ__5LUw,5139
54
+ cobweb_new/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
55
+ cobweb_new/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
56
+ cobweb_new/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
57
+ cobweb_new/launchers/launcher.py,sha256=87P_2rRjzqyQXcG_EJ5Y6lMAk7saM8k1WBJcl9ANX6k,8309
58
+ cobweb_new/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
59
+ cobweb_new/launchers/launcher_api.py,sha256=qPazoC7U-UmgebbiTkhl6f4yQmN34XMl6HawekhAhEo,5789
60
+ cobweb_new/launchers/launcher_pro.py,sha256=QLjAiN8qMk4NklSY7ldBAR5OEEUB8sECuCCwRrFEC68,3414
61
+ cobweb_new/launchers/tesss.py,sha256=pDe0wwhXbdjjmtfc7JLPfVOvs9yuc7Y8wLT1b1ueeEs,912
62
+ cobweb_new/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
63
+ cobweb_new/pipelines/pipeline.py,sha256=3IRHHqrHblZ_18Cps2bGK6iugDjs-dde7p3AbarfiN8,1958
64
+ cobweb_new/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
65
+ cobweb_new/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
66
+ cobweb_new/utils/__init__.py,sha256=c9macpjc15hrCUCdzO5RR_sgK_B9kvJKreSGprZ1ld4,112
67
+ cobweb_new/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
68
+ cobweb_new/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
69
+ cobweb_new/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
70
+ cobweb_launcher-1.2.27.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
71
+ cobweb_launcher-1.2.27.dist-info/METADATA,sha256=acbwlBay3FlD7jmGuLhMYSjtoF2pW2BngiW-cbgptRk,6510
72
+ cobweb_launcher-1.2.27.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
73
+ cobweb_launcher-1.2.27.dist-info/top_level.txt,sha256=UKwd478nkspJ6_fos3jtuc7SKXfmU42bJa_Ek5n2zsE,18
74
+ cobweb_launcher-1.2.27.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ cobweb
2
+ cobweb_new
cobweb_new/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ from .launchers import LauncherAir, LauncherPro, LauncherApi
2
+ from .constant import CrawlerModel
@@ -0,0 +1,72 @@
1
+ import time
2
+ from inspect import isgenerator
3
+ from typing import Callable, Union
4
+
5
+ from .common_queue import Queue
6
+ from .response import Response
7
+ from .request import Request
8
+ from .item import BaseItem, ConsoleItem
9
+ from .seed import Seed
10
+
11
+ from .log import logger
12
+ # from .decorators import decorator_oss_db, stop, pause
13
+ import decorators
14
+
15
+
16
+ class TaskQueue:
17
+
18
+ SEED = Queue() # 添加任务种子队列
19
+ TODO = Queue() # 任务种子队列
20
+ REQUEST = Queue() # 请求队列
21
+
22
+ DOWNLOAD = Queue() # 下载任务队列
23
+ RESPONSE = Queue() # 响应队列
24
+ DONE = Queue() # 下载完成队列
25
+
26
+ UPLOAD = Queue() # 任务上传队列
27
+
28
+ DELETE = Queue() # 任务删除队列
29
+
30
+ def __init__(self, db):
31
+ self.db = db
32
+
33
+ @staticmethod
34
+ def is_empty():
35
+ total_length = TaskQueue.SEED.length
36
+ total_length += TaskQueue.TODO.length
37
+ total_length += TaskQueue.REQUEST.length
38
+ total_length += TaskQueue.DOWNLOAD.length
39
+ total_length += TaskQueue.RESPONSE.length
40
+ total_length += TaskQueue.UPLOAD.length
41
+ total_length += TaskQueue.DONE.length
42
+ total_length += TaskQueue.DELETE.length
43
+ return not bool(total_length)
44
+ # @staticmethod
45
+ # def distribute(it):
46
+
47
+ @staticmethod
48
+ def process_task(it: Union[Seed, Request, Response, BaseItem], crawler_func: Callable):
49
+ try:
50
+ iterators = crawler_func(it)
51
+ if not isgenerator(iterators):
52
+ raise TypeError(f"{crawler_func.__name__} function isn't a generator")
53
+ for tk in iterators:
54
+ if isinstance(tk, Request):
55
+ TaskQueue.DOWNLOAD.push(tk)
56
+ elif isinstance(tk, Response):
57
+ TaskQueue.RESPONSE.push(tk)
58
+ elif isinstance(tk, BaseItem):
59
+ TaskQueue.UPLOAD.push(tk)
60
+ elif isinstance(tk, Seed):
61
+ TaskQueue.SEED.push(tk)
62
+ except Exception as e:
63
+ if not isinstance(it, BaseItem):
64
+ it.seed.params.retry += 1
65
+
66
+ time.sleep(5)
67
+
68
+
69
+ class Distribute:
70
+ """
71
+ 数据分发器,将数据分发到各个队列中
72
+ """
@@ -0,0 +1,53 @@
1
+ import time
2
+ from collections import deque
3
+
4
+
5
+ class Queue:
6
+
7
+ def __init__(self):
8
+ self._queue = deque()
9
+
10
+ @property
11
+ def length(self) -> int:
12
+ return len(self._queue)
13
+
14
+ def push(self, data, left: bool = False, direct_insertion: bool = False):
15
+ try:
16
+ if not data:
17
+ return None
18
+ if not direct_insertion and any(isinstance(data, t) for t in (list, tuple)):
19
+ self._queue.extendleft(data) if left else self._queue.extend(data)
20
+ else:
21
+ self._queue.appendleft(data) if left else self._queue.append(data)
22
+ except AttributeError:
23
+ pass
24
+
25
+ def pop(self, left: bool = True):
26
+ try:
27
+ return self._queue.popleft() if left else self._queue.pop()
28
+ except IndexError:
29
+ return None
30
+ except AttributeError:
31
+ return None
32
+
33
+ def clear(self):
34
+ self._queue.clear()
35
+
36
+ def get(self):
37
+ try:
38
+ yield self._queue.popleft()
39
+ except IndexError:
40
+ time.sleep(1)
41
+ yield None
42
+ except AttributeError:
43
+ yield None
44
+
45
+
46
+ class RedisQueue(Queue):
47
+
48
+ def __init__(self, db):
49
+ super().__init__()
50
+ self.db = db
51
+
52
+ def pop(self, left: bool = True):
53
+ ...
@@ -0,0 +1,72 @@
1
+ import time
2
+ import threading
3
+ from functools import wraps
4
+
5
+
6
+ def add_thread(num=1):
7
+ def decorator(func):
8
+ @wraps(func)
9
+ def wrapper(self, *args):
10
+ for i in range(num):
11
+ name = func.__name__ + "_" + str(i) if num > 1 else func.__name__
12
+ self._threads.append(threading.Thread(name=name, target=func, args=(self,) + args))
13
+ return wrapper
14
+
15
+ return decorator
16
+
17
+
18
+ def pause(func):
19
+ @wraps(func)
20
+ def wrapper(self, *args, **kwargs):
21
+ while not self.pause.is_set():
22
+ try:
23
+ func(self, *args, **kwargs)
24
+ except Exception as e:
25
+ pass
26
+ # logger.info(f"{func.__name__}: " + str(e))
27
+ finally:
28
+ time.sleep(0.1)
29
+
30
+ return wrapper
31
+
32
+
33
+ def stop(func):
34
+ @wraps(func)
35
+ def wrapper(self, *args, **kwargs):
36
+ while not self.stop.is_set():
37
+ try:
38
+ func(self, *args, **kwargs)
39
+ except Exception as e:
40
+ # logger.info(f"{func.__name__}: " + str(e))
41
+ pass
42
+ finally:
43
+ time.sleep(0.1)
44
+
45
+ return wrapper
46
+
47
+
48
+ def decorator_oss_db(exception, retries=3):
49
+ def decorator(func):
50
+ @wraps(func)
51
+ def wrapper(callback_func, *args, **kwargs):
52
+ result = None
53
+ for i in range(retries):
54
+ msg = None
55
+ try:
56
+ return func(callback_func, *args, **kwargs)
57
+ except Exception as e:
58
+ result = None
59
+ msg = e
60
+ finally:
61
+ if result:
62
+ return result
63
+
64
+ if i >= 2 and msg:
65
+ raise exception(msg)
66
+
67
+ return wrapper
68
+
69
+ return decorator
70
+
71
+
72
+
@@ -0,0 +1,46 @@
1
+ from .seed import Seed
2
+ from collections import namedtuple
3
+
4
+
5
+ class Item(type):
6
+
7
+ def __new__(cls, name, bases, dct):
8
+ new_class_instance = type.__new__(cls, name, bases, dct)
9
+ if name != "BaseItem":
10
+ table = getattr(new_class_instance, "__TABLE__")
11
+ fields = getattr(new_class_instance, "__FIELDS__")
12
+ new_class_instance.Data = namedtuple(table, fields)
13
+ return new_class_instance
14
+
15
+
16
+ class BaseItem(metaclass=Item):
17
+
18
+ __TABLE__ = ""
19
+ __FIELDS__ = ""
20
+
21
+ def __init__(self, seed: Seed, **kwargs):
22
+ self.seed = seed
23
+
24
+ data = {}
25
+ for key, value in kwargs.items():
26
+ if key not in self.__FIELDS__:
27
+ self.__setattr__(key, value)
28
+ else:
29
+ data[key] = value
30
+
31
+ self.data = self.Data(**data)
32
+
33
+ @property
34
+ def to_dict(self):
35
+ return self.data._asdict()
36
+
37
+ @property
38
+ def table(self):
39
+ return self.Data.__name__
40
+
41
+
42
+ class ConsoleItem(BaseItem):
43
+
44
+ __TABLE__ = "console"
45
+ __FIELDS__ = "data"
46
+
cobweb_new/base/log.py ADDED
@@ -0,0 +1,94 @@
1
+ import logging
2
+
3
+
4
+ class ColorCodes:
5
+ # Text Reset
6
+ RESET = "\033[0m"
7
+
8
+ # Regular Colors
9
+ RED = "\033[31m"
10
+ GREEN = "\033[32m"
11
+ YELLOW = "\033[33m"
12
+ BLUE = "\033[34m"
13
+ PURPLE = "\033[35m"
14
+ CYAN = "\033[36m"
15
+ WHITE = "\033[37m"
16
+
17
+ # Bright Colors
18
+ BRIGHT_RED = "\033[91m"
19
+ BRIGHT_GREEN = "\033[92m"
20
+ BRIGHT_YELLOW = "\033[93m"
21
+ BRIGHT_BLUE = "\033[94m"
22
+ BRIGHT_PURPLE = "\033[95m"
23
+ BRIGHT_CYAN = "\033[96m"
24
+ BRIGHT_WHITE = "\033[97m"
25
+
26
+ # Background Colors
27
+ BG_RED = "\033[41m"
28
+ BG_GREEN = "\033[42m"
29
+ BG_YELLOW = "\033[43m"
30
+ BG_BLUE = "\033[44m"
31
+ BG_PURPLE = "\033[45m"
32
+ BG_CYAN = "\033[46m"
33
+ BG_WHITE = "\033[47m"
34
+
35
+ # Bright Background Colors
36
+ BG_BRIGHT_RED = "\033[101m"
37
+ BG_BRIGHT_GREEN = "\033[102m"
38
+ BG_BRIGHT_YELLOW = "\033[103m"
39
+ BG_BRIGHT_BLUE = "\033[104m"
40
+ BG_BRIGHT_PURPLE = "\033[105m"
41
+ BG_BRIGHT_CYAN = "\033[106m"
42
+ BG_BRIGHT_WHITE = "\033[107m"
43
+
44
+ # Text Styles
45
+ BOLD = "\033[1m"
46
+ DIM = "\033[2m"
47
+ ITALIC = "\033[3m"
48
+ UNDERLINE = "\033[4m"
49
+ BLINK = "\033[5m"
50
+ REVERSE = "\033[7m"
51
+ HIDDEN = "\033[8m"
52
+
53
+
54
+ class Log:
55
+ logging.getLogger('oss2.api').setLevel(logging.WARNING)
56
+ logging.basicConfig(
57
+ level=logging.INFO,
58
+ format=f'%(asctime)s %(name)s [%(filename)s:%(lineno)d %(funcName)s]'
59
+ f' %(levelname)s -> %(message)s'
60
+ )
61
+ log = logging.getLogger()
62
+
63
+ def set_log_name(self, name):
64
+ self.__class__.log = logging.getLogger(name)
65
+
66
+ @property
67
+ def debug(self):
68
+ return self.__class__.log.debug
69
+
70
+ @property
71
+ def info(self):
72
+ return self.__class__.log.info
73
+
74
+ @property
75
+ def warning(self):
76
+ return self.__class__.log.warning
77
+
78
+ @property
79
+ def exception(self):
80
+ return self.__class__.log.exception
81
+
82
+ @property
83
+ def error(self):
84
+ return self.__class__.log.error
85
+
86
+ @property
87
+ def critical(self):
88
+ return self.__class__.log.critical
89
+
90
+
91
+ logger = Log()
92
+
93
+
94
+
@@ -0,0 +1,82 @@
1
+ import random
2
+ import requests
3
+
4
+
5
+ class Request:
6
+
7
+ __REQUEST_ATTRS__ = {
8
+ "params",
9
+ "headers",
10
+ "cookies",
11
+ "data",
12
+ "json",
13
+ "files",
14
+ "auth",
15
+ "timeout",
16
+ "proxies",
17
+ "hooks",
18
+ "stream",
19
+ "verify",
20
+ "cert",
21
+ "allow_redirects",
22
+ }
23
+
24
+ def __init__(
25
+ self,
26
+ url,
27
+ seed,
28
+ random_ua=True,
29
+ check_status_code=True,
30
+ **kwargs
31
+ ):
32
+ self.url = url
33
+ self.seed = seed
34
+ self.check_status_code = check_status_code
35
+ self.request_setting = {}
36
+
37
+ for k, v in kwargs.items():
38
+ if k in self.__class__.__REQUEST_ATTRS__:
39
+ self.request_setting[k] = v
40
+ continue
41
+ self.__setattr__(k, v)
42
+
43
+ if not getattr(self, "method", None):
44
+ self.method = "POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
45
+
46
+ if random_ua:
47
+ self._build_header()
48
+
49
+ @property
50
+ def _random_ua(self) -> str:
51
+ v1 = random.randint(4, 15)
52
+ v2 = random.randint(3, 11)
53
+ v3 = random.randint(1, 16)
54
+ v4 = random.randint(533, 605)
55
+ v5 = random.randint(1000, 6000)
56
+ v6 = random.randint(10, 80)
57
+ user_agent = (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) AppleWebKit/{v4}.{v3} "
58
+ f"(KHTML, like Gecko) Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
59
+ return user_agent
60
+
61
+ def _build_header(self) -> dict:
62
+ if not self.request_setting.get("headers"):
63
+ self.request_setting["headers"] = {"accept": "*/*", "user-agent": self._random_ua}
64
+ elif "user-agent" not in [key.lower() for key in self.request_setting["headers"].keys()]:
65
+ self.request_setting["headers"]["user-agent"] = self._random_ua
66
+
67
+ def download(self) -> requests.Response:
68
+ response = requests.request(self.method, self.url, **self.request_setting)
69
+ if self.check_status_code:
70
+ response.raise_for_status()
71
+ return response
72
+
73
+ @property
74
+ def to_dict(self):
75
+ _dict = self.__dict__.copy()
76
+ _dict.pop('url')
77
+ _dict.pop('seed')
78
+ _dict.pop('check_status_code')
79
+ _dict.pop('request_setting')
80
+ return _dict
81
+
82
+
@@ -0,0 +1,23 @@
1
+
2
+
3
+ class Response:
4
+
5
+ def __init__(
6
+ self,
7
+ seed,
8
+ response,
9
+ **kwargs
10
+ ):
11
+ self.seed = seed
12
+ self.response = response
13
+
14
+ for k, v in kwargs.items():
15
+ self.__setattr__(k, v)
16
+
17
+ @property
18
+ def to_dict(self):
19
+ _dict = self.__dict__.copy()
20
+ _dict.pop('seed')
21
+ _dict.pop('response')
22
+ return _dict
23
+