cobweb-launcher 0.1.20__py3-none-any.whl → 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cobweb-launcher might be problematic. Click here for more details.

cobweb/config.py ADDED
@@ -0,0 +1,164 @@
1
+ import json
2
+ from collections import namedtuple
3
+ from base.utils import struct_table_name
4
+
5
+ StorerInfo = namedtuple(
6
+ "StorerInfo",
7
+ "DB, table, fields, length, config"
8
+ )
9
+ SchedulerInfo = namedtuple(
10
+ "SchedulerInfo",
11
+ "DB, table, sql, length, size, config",
12
+ )
13
+ RedisInfo = namedtuple(
14
+ "RedisInfo",
15
+ "host, port, username, password, db",
16
+ defaults=("localhost", 6379, None, None, 0)
17
+ )
18
+
19
+ # redis_info = dict(
20
+ # host="localhost",
21
+ # port=6379,
22
+ # username=None,
23
+ # password=None,
24
+ # db=0
25
+ # )
26
+
27
+
28
+ class SchedulerDB:
29
+
30
+ @staticmethod
31
+ def default():
32
+ from db.scheduler.default import Default
33
+ return SchedulerInfo(DB=Default, table="", sql="", length=100, size=500000, config=None)
34
+
35
+ @staticmethod
36
+ def textfile(table, sql=None, length=100, size=500000):
37
+ from db.scheduler.textfile import Textfile
38
+ return SchedulerInfo(DB=Textfile, table=table, sql=sql, length=length, size=size, config=None)
39
+
40
+ @staticmethod
41
+ def diy(DB, table, sql=None, length=100, size=500000, config=None):
42
+ from base.interface import SchedulerInterface
43
+ if not isinstance(DB, SchedulerInterface):
44
+ raise Exception("DB must be inherit from SchedulerInterface")
45
+ return SchedulerInfo(DB=DB, table=table, sql=sql, length=length, size=size, config=config)
46
+
47
+ # @staticmethod
48
+ # def info(scheduler_info):
49
+ # if not scheduler_info:
50
+ # return SchedulerDB.default()
51
+ #
52
+ # if isinstance(scheduler_info, SchedulerInfo):
53
+ # return scheduler_info
54
+ #
55
+ # if isinstance(scheduler_info, str):
56
+ # scheduler = json.loads(scheduler_info)
57
+ # if isinstance(scheduler, dict):
58
+ # db_name = scheduler["DB"]
59
+ # if db_name in dir(SchedulerDB):
60
+ # del scheduler["DB"]
61
+ # else:
62
+ # db_name = "diy"
63
+ # func = getattr(SchedulerDB, db_name)
64
+ # return func(**scheduler)
65
+
66
+
67
+ class StorerDB:
68
+
69
+ @staticmethod
70
+ def console(table, fields, length=200):
71
+ from db.storer.console import Console
72
+ table = struct_table_name(table)
73
+ return StorerInfo(DB=Console, table=table, fields=fields, length=length, config=None)
74
+
75
+ @staticmethod
76
+ def textfile(table, fields, length=200):
77
+ from db.storer.textfile import Textfile
78
+ table = struct_table_name(table)
79
+ return StorerInfo(DB=Textfile, table=table, fields=fields, length=length, config=None)
80
+
81
+ @staticmethod
82
+ def loghub(table, fields, length=200, config=None):
83
+ from db.storer.loghub import Loghub
84
+ table = struct_table_name(table)
85
+ return StorerInfo(DB=Loghub, table=table, fields=fields, length=length, config=config)
86
+
87
+ @staticmethod
88
+ def diy(DB, table, fields, length=200, config=None):
89
+ from base.interface import StorerInterface
90
+ if not isinstance(DB, StorerInterface):
91
+ raise Exception("DB must be inherit from StorerInterface")
92
+ table = struct_table_name(table)
93
+ return StorerInfo(DB=DB, table=table, fields=fields, length=length, config=config)
94
+
95
+ # @staticmethod
96
+ # def info(storer_info):
97
+ # if not storer_info:
98
+ # return None
99
+ #
100
+ # if isinstance(storer_info, str):
101
+ # storer_info = json.loads(storer_info)
102
+ #
103
+ # if any(isinstance(storer_info, t) for t in (dict, StorerInfo)):
104
+ # storer_info = [storer_info]
105
+ #
106
+ # if not isinstance(storer_info, list):
107
+ # raise Exception("StorerDB.info storer_info")
108
+ #
109
+ # storer_info_list = []
110
+ # for storer in storer_info:
111
+ # if isinstance(storer, StorerInfo):
112
+ # storer_info_list.append(storer)
113
+ # else:
114
+ # db_name = storer["DB"]
115
+ # if db_name in dir(StorerDB):
116
+ # del storer["DB"]
117
+ # else:
118
+ # db_name = "diy"
119
+ # func = getattr(StorerDB, db_name)
120
+ # storer_info_list.append(func(**storer))
121
+ # return storer_info_list
122
+
123
+
124
+
125
+ def deal(config, tag):
126
+ if isinstance(config, dict):
127
+ if tag == 0:
128
+ return RedisInfo(**config)
129
+ elif tag == 1:
130
+ db_name = config["DB"]
131
+ if db_name in dir(SchedulerDB):
132
+ del config["DB"]
133
+ else:
134
+ db_name = "diy"
135
+ func = getattr(SchedulerDB, db_name)
136
+ return func(**config)
137
+ elif tag == 2:
138
+ db_name = config["DB"]
139
+ if db_name in dir(StorerDB):
140
+ del config["DB"]
141
+ else:
142
+ db_name = "diy"
143
+ func = getattr(StorerDB, db_name)
144
+ return func(**config)
145
+ raise ValueError("tag must be in [0, 1, 2]")
146
+ elif any(isinstance(config, t) for t in (StorerInfo, SchedulerInfo, RedisInfo)):
147
+ return config
148
+ raise TypeError("config must be in [StorerInfo, SchedulerInfo, RedisInfo]")
149
+
150
+
151
+ def info(configs, tag = 0):
152
+ if configs is None:
153
+ return SchedulerDB.default() if tag == 1 else None
154
+
155
+ if isinstance(configs, str):
156
+ configs = json.loads(configs)
157
+
158
+ if tag == 0:
159
+ return deal(configs, tag)
160
+
161
+ if not isinstance(configs, list):
162
+ configs = [configs]
163
+
164
+ return [deal(config, tag) for config in configs]
cobweb/db/redis_db.py CHANGED
@@ -119,7 +119,7 @@ class RedisDB:
119
119
  def get_seed(self, length: int = 200):
120
120
  cs = time.time()
121
121
 
122
- if self._get_lock(key=self.update_lock, t=Setting.SCHEDULER_LOCK_TIME):
122
+ if self._get_lock(key=self.update_lock):
123
123
 
124
124
  update_item, result = {}, []
125
125
 
@@ -171,7 +171,8 @@ def launcher(task):
171
171
  target=spider.spider_task,
172
172
  args=(
173
173
  stop, func, item,
174
- redis_db.del_seed
174
+ redis_db.del_seed,
175
+ redis_db.add_seed
175
176
  )
176
177
  ).start()
177
178
 
@@ -53,7 +53,7 @@ class Spider:
53
53
  self.storage = storage
54
54
  self.queue = queue
55
55
 
56
- def spider_task(self, stop, func, item, del_seed):
56
+ def spider_task(self, stop, func, item, del_seed, add_seed):
57
57
  while not stop.is_set():
58
58
 
59
59
  seed = self.queue.pop()
@@ -72,6 +72,7 @@ class Spider:
72
72
 
73
73
  store_queue = None
74
74
  store_data = list()
75
+ add_seed_list = list()
75
76
  iterators = func(item, seed)
76
77
 
77
78
  if not isgenerator(iterators):
@@ -94,7 +95,7 @@ class Spider:
94
95
  store_queue = it.queue()
95
96
  store_data.append(it.struct_data)
96
97
  elif isinstance(it, Seed):
97
- self.queue.push(it)
98
+ add_seed_list.append(it)
98
99
 
99
100
  elif isinstance(it, str) and it == DealModel.polling:
100
101
  self.queue.push(seed)
@@ -111,10 +112,12 @@ class Spider:
111
112
  if not status:
112
113
  seed._retry += 1
113
114
  self.queue.push(seed)
114
-
115
115
  elif store_queue and store_data:
116
116
  store_data.append(seed)
117
117
  store_queue.push(store_data)
118
+ elif add_seed_list:
119
+ del_seed(seed, spider_status=True)
120
+ add_seed(add_seed_list)
118
121
 
119
122
  except Exception as e:
120
123
  seed._retry += 1
cobweb/new.py ADDED
@@ -0,0 +1,20 @@
1
+
2
+
3
+
4
+
5
+ class Launcher:
6
+
7
+ def __init__(self):
8
+ pass
9
+
10
+ def register(self, task_name, func):
11
+ pass
12
+
13
+ def launch(self, task_name):
14
+
15
+ def decorator(func):
16
+ # 注册爬虫程序
17
+ self.register(task_name, func)
18
+ return func
19
+
20
+ return decorator
cobweb/task.py CHANGED
@@ -6,7 +6,6 @@ from .utils import parse_info, struct_start_seeds
6
6
  def init_task_env():
7
7
  Setting.RESET_SCORE = int(os.getenv("RESET_SCORE", 600))
8
8
  Setting.CHECK_LOCK_TIME = int(os.getenv("CHECK_LOCK_TIME", 30))
9
- Setting.SCHEDULER_LOCK_TIME = int(os.getenv("SCHEDULER_LOCK_TIME", 15))
10
9
  Setting.DEAL_MODEL = os.getenv("DEAL_MODEL", DealModel.failure)
11
10
  Setting.LAUNCHER_MODEL = os.getenv("LAUNCHER_MODEL", LauncherModel.task)
12
11
  Setting.SCHEDULER_WAIT_TIME = float(os.getenv("SCHEDULER_WAIT_TIME", 5))
@@ -1,19 +1,20 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.1.20
3
+ Version: 0.1.21
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
7
7
  Author-email: 2604868278@qq.com
8
8
  License: MIT
9
9
  Keywords: cobweb-launcher, cobweb
10
+ Platform: UNKNOWN
10
11
  Classifier: Programming Language :: Python :: 3
11
12
  Requires-Python: >=3.7
12
13
  Description-Content-Type: text/markdown
13
14
  License-File: LICENSE
14
- Requires-Dist: requests >=2.19.1
15
- Requires-Dist: oss2 >=2.18.1
16
- Requires-Dist: redis >=4.4.4
15
+ Requires-Dist: requests (>=2.19.1)
16
+ Requires-Dist: oss2 (>=2.18.1)
17
+ Requires-Dist: redis (>=4.4.4)
17
18
  Requires-Dist: aliyun-log-python-sdk
18
19
 
19
20
  # cobweb
@@ -43,3 +44,5 @@ need deal
43
44
 
44
45
 
45
46
  ![img.png](https://image-luyuan.oss-cn-hangzhou.aliyuncs.com/image/D2388CDC-B9E5-4CE4-9F2C-7D173763B6A8.png)
47
+
48
+
@@ -1,15 +1,17 @@
1
1
  cobweb/__init__.py,sha256=zVims_awYxKNqULuKSjFXdZAaDJaqzFBJJo_SqeakMs,286
2
2
  cobweb/bbb.py,sha256=zKeCeBVFQfhEv6M8CCktUTM7tXDZmAu6ZN0-ET44pUY,5707
3
+ cobweb/config.py,sha256=iWrep4vW9UyUi3hvgUWb4RL2IpEwpo_ttY0EUDKWN4g,5362
3
4
  cobweb/constant.py,sha256=oT2Y-qm2du2QGLA2D53aGi52Eda9FdV3hBomYRpbXl8,613
4
5
  cobweb/decorators.py,sha256=eYQI9rddPVJihAlomLTmbtQhIOzPw8dCrOFpxAq2pLY,318
5
6
  cobweb/interface.py,sha256=um_k2AAQl1HTOvfUlq914DjkpfZVwt2m1B65EpPKrmE,802
6
7
  cobweb/log.py,sha256=Gb3_y4IzTo5pJohTggBCU9rK6-ZN3hgTOHkoXHyN6CU,2384
8
+ cobweb/new.py,sha256=-ZHoLZE4ZGRMBU5YWmWHMfLn2ipuTKEaCTO1pU2fX5o,303
7
9
  cobweb/setting.py,sha256=UAu_dLuIFYO98MxtlZ5sZqJcwKAUKq4Bu4KoKlV50Mc,288
8
- cobweb/task.py,sha256=wOGnVJ82BOiOwCmqhsoCcAgD6GAYaXodJvR4yRyyBAc,2082
10
+ cobweb/task.py,sha256=SyWC43C7hqQAqH-1HECXEEgp_6L6lwDhYe1kZNnHUvA,2006
9
11
  cobweb/utils.py,sha256=ivmRqJJNtwdOKYT4G7qQCWnL8ar9c-shxeDZzGB2E9c,2651
10
12
  cobweb/db/__init__.py,sha256=jC-uOThYLtiDUG6cTJRkDITgOzR4nIOeaZQeZhuk-v0,139
11
13
  cobweb/db/oss_db.py,sha256=59HwMMfoGUr6l_BI4p84YHYoQwEn22cCM_1GZK34Uo4,4244
12
- cobweb/db/redis_db.py,sha256=PJqiEurv3oFB5JEflOqIJYgH8A2VoAPyaA6ZxlUabfE,8210
14
+ cobweb/db/redis_db.py,sha256=go9IPQQZAl_jXiHDTruWF7N8svs9QEpQPOr7JhuLCQs,8179
13
15
  cobweb/db/scheduler/__init__.py,sha256=w5uIGEB1wLJ-H9RqGpzRwOEWW-BBVSk6Cc7FxZIlWCs,51
14
16
  cobweb/db/scheduler/default.py,sha256=XDtxNyu5KTpVAbfCOW8mR1zNFNHiMuaQ4sAhZuIYBoM,79
15
17
  cobweb/db/scheduler/textfile.py,sha256=P5pk75DUnbXbLNPOaMIbHh2lbwBGBlv0mitX58yK-MU,786
@@ -29,14 +31,14 @@ cobweb/equip/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
29
31
  cobweb/equip/distributed/launcher.py,sha256=1LzxibGXWR20XpXawakiRpEMaa9yfaj2rFSKnmEwjFc,7475
30
32
  cobweb/equip/distributed/models.py,sha256=qTGzxLdb2arsZSZK2HE4-MrqhraUhc2Ol5wBvlv_aWA,5008
31
33
  cobweb/equip/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
- cobweb/equip/single/launcher.py,sha256=KRsw7yxklvFM85cel-EyLsNPLyrC9Hd26BMSx6-4Hac,6785
33
- cobweb/equip/single/models.py,sha256=VwaGgZXq72a48zsZja100Lj2GBHF1GkYTddTi1R-RxM,5422
34
+ cobweb/equip/single/launcher.py,sha256=lZt4WNar0_QQjUSDhaJnxvHDiuQReXeFxquVgJjI5T4,6824
35
+ cobweb/equip/single/models.py,sha256=uH6VXHiUSEOV0PN0s63NKRveTCRXy9_QH6qJRQgYv9Q,5610
34
36
  cobweb/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
37
  cobweb/single/launcher.py,sha256=IoJbn87j7t7Pib_FxoWZmmX8asXOqNGb-9ospw6EYJI,7302
36
38
  cobweb/single/models.py,sha256=wIEV35666lxdzqjDqBHPjjh-r6zD0x24rtQYz7d4Oxw,4332
37
39
  cobweb/single/nest.py,sha256=49K6KQ934INfPrWQsrq9rIFpQauLbLGOFbDaHvoQzOk,5015
38
- cobweb_launcher-0.1.20.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
39
- cobweb_launcher-0.1.20.dist-info/METADATA,sha256=OQv2x6SduQV3xYh5VvWtUKOcBVNi1rgbb4ukD4lXhAc,1220
40
- cobweb_launcher-0.1.20.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
41
- cobweb_launcher-0.1.20.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
42
- cobweb_launcher-0.1.20.dist-info/RECORD,,
40
+ cobweb_launcher-0.1.21.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
41
+ cobweb_launcher-0.1.21.dist-info/METADATA,sha256=10RvainuQ8tnyG0H_wFa6NMBlRZmjfL-oVXLEFHDFB8,1246
42
+ cobweb_launcher-0.1.21.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
+ cobweb_launcher-0.1.21.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
44
+ cobweb_launcher-0.1.21.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: bdist_wheel (0.37.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5