cobweb-launcher 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- cobweb/config.py +164 -0
- cobweb/constant.py +1 -0
- cobweb/equip/single/launcher.py +2 -1
- cobweb/equip/single/models.py +6 -3
- cobweb/new.py +20 -0
- {cobweb_launcher-0.1.19.dist-info → cobweb_launcher-0.1.21.dist-info}/METADATA +7 -4
- {cobweb_launcher-0.1.19.dist-info → cobweb_launcher-0.1.21.dist-info}/RECORD +10 -8
- {cobweb_launcher-0.1.19.dist-info → cobweb_launcher-0.1.21.dist-info}/WHEEL +1 -1
- {cobweb_launcher-0.1.19.dist-info → cobweb_launcher-0.1.21.dist-info}/LICENSE +0 -0
- {cobweb_launcher-0.1.19.dist-info → cobweb_launcher-0.1.21.dist-info}/top_level.txt +0 -0
cobweb/config.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from collections import namedtuple
|
|
3
|
+
from base.utils import struct_table_name
|
|
4
|
+
|
|
5
|
+
StorerInfo = namedtuple(
|
|
6
|
+
"StorerInfo",
|
|
7
|
+
"DB, table, fields, length, config"
|
|
8
|
+
)
|
|
9
|
+
SchedulerInfo = namedtuple(
|
|
10
|
+
"SchedulerInfo",
|
|
11
|
+
"DB, table, sql, length, size, config",
|
|
12
|
+
)
|
|
13
|
+
RedisInfo = namedtuple(
|
|
14
|
+
"RedisInfo",
|
|
15
|
+
"host, port, username, password, db",
|
|
16
|
+
defaults=("localhost", 6379, None, None, 0)
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# redis_info = dict(
|
|
20
|
+
# host="localhost",
|
|
21
|
+
# port=6379,
|
|
22
|
+
# username=None,
|
|
23
|
+
# password=None,
|
|
24
|
+
# db=0
|
|
25
|
+
# )
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SchedulerDB:
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def default():
|
|
32
|
+
from db.scheduler.default import Default
|
|
33
|
+
return SchedulerInfo(DB=Default, table="", sql="", length=100, size=500000, config=None)
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def textfile(table, sql=None, length=100, size=500000):
|
|
37
|
+
from db.scheduler.textfile import Textfile
|
|
38
|
+
return SchedulerInfo(DB=Textfile, table=table, sql=sql, length=length, size=size, config=None)
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def diy(DB, table, sql=None, length=100, size=500000, config=None):
|
|
42
|
+
from base.interface import SchedulerInterface
|
|
43
|
+
if not isinstance(DB, SchedulerInterface):
|
|
44
|
+
raise Exception("DB must be inherit from SchedulerInterface")
|
|
45
|
+
return SchedulerInfo(DB=DB, table=table, sql=sql, length=length, size=size, config=config)
|
|
46
|
+
|
|
47
|
+
# @staticmethod
|
|
48
|
+
# def info(scheduler_info):
|
|
49
|
+
# if not scheduler_info:
|
|
50
|
+
# return SchedulerDB.default()
|
|
51
|
+
#
|
|
52
|
+
# if isinstance(scheduler_info, SchedulerInfo):
|
|
53
|
+
# return scheduler_info
|
|
54
|
+
#
|
|
55
|
+
# if isinstance(scheduler_info, str):
|
|
56
|
+
# scheduler = json.loads(scheduler_info)
|
|
57
|
+
# if isinstance(scheduler, dict):
|
|
58
|
+
# db_name = scheduler["DB"]
|
|
59
|
+
# if db_name in dir(SchedulerDB):
|
|
60
|
+
# del scheduler["DB"]
|
|
61
|
+
# else:
|
|
62
|
+
# db_name = "diy"
|
|
63
|
+
# func = getattr(SchedulerDB, db_name)
|
|
64
|
+
# return func(**scheduler)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class StorerDB:
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def console(table, fields, length=200):
|
|
71
|
+
from db.storer.console import Console
|
|
72
|
+
table = struct_table_name(table)
|
|
73
|
+
return StorerInfo(DB=Console, table=table, fields=fields, length=length, config=None)
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def textfile(table, fields, length=200):
|
|
77
|
+
from db.storer.textfile import Textfile
|
|
78
|
+
table = struct_table_name(table)
|
|
79
|
+
return StorerInfo(DB=Textfile, table=table, fields=fields, length=length, config=None)
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def loghub(table, fields, length=200, config=None):
|
|
83
|
+
from db.storer.loghub import Loghub
|
|
84
|
+
table = struct_table_name(table)
|
|
85
|
+
return StorerInfo(DB=Loghub, table=table, fields=fields, length=length, config=config)
|
|
86
|
+
|
|
87
|
+
@staticmethod
|
|
88
|
+
def diy(DB, table, fields, length=200, config=None):
|
|
89
|
+
from base.interface import StorerInterface
|
|
90
|
+
if not isinstance(DB, StorerInterface):
|
|
91
|
+
raise Exception("DB must be inherit from StorerInterface")
|
|
92
|
+
table = struct_table_name(table)
|
|
93
|
+
return StorerInfo(DB=DB, table=table, fields=fields, length=length, config=config)
|
|
94
|
+
|
|
95
|
+
# @staticmethod
|
|
96
|
+
# def info(storer_info):
|
|
97
|
+
# if not storer_info:
|
|
98
|
+
# return None
|
|
99
|
+
#
|
|
100
|
+
# if isinstance(storer_info, str):
|
|
101
|
+
# storer_info = json.loads(storer_info)
|
|
102
|
+
#
|
|
103
|
+
# if any(isinstance(storer_info, t) for t in (dict, StorerInfo)):
|
|
104
|
+
# storer_info = [storer_info]
|
|
105
|
+
#
|
|
106
|
+
# if not isinstance(storer_info, list):
|
|
107
|
+
# raise Exception("StorerDB.info storer_info")
|
|
108
|
+
#
|
|
109
|
+
# storer_info_list = []
|
|
110
|
+
# for storer in storer_info:
|
|
111
|
+
# if isinstance(storer, StorerInfo):
|
|
112
|
+
# storer_info_list.append(storer)
|
|
113
|
+
# else:
|
|
114
|
+
# db_name = storer["DB"]
|
|
115
|
+
# if db_name in dir(StorerDB):
|
|
116
|
+
# del storer["DB"]
|
|
117
|
+
# else:
|
|
118
|
+
# db_name = "diy"
|
|
119
|
+
# func = getattr(StorerDB, db_name)
|
|
120
|
+
# storer_info_list.append(func(**storer))
|
|
121
|
+
# return storer_info_list
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def deal(config, tag):
|
|
126
|
+
if isinstance(config, dict):
|
|
127
|
+
if tag == 0:
|
|
128
|
+
return RedisInfo(**config)
|
|
129
|
+
elif tag == 1:
|
|
130
|
+
db_name = config["DB"]
|
|
131
|
+
if db_name in dir(SchedulerDB):
|
|
132
|
+
del config["DB"]
|
|
133
|
+
else:
|
|
134
|
+
db_name = "diy"
|
|
135
|
+
func = getattr(SchedulerDB, db_name)
|
|
136
|
+
return func(**config)
|
|
137
|
+
elif tag == 2:
|
|
138
|
+
db_name = config["DB"]
|
|
139
|
+
if db_name in dir(StorerDB):
|
|
140
|
+
del config["DB"]
|
|
141
|
+
else:
|
|
142
|
+
db_name = "diy"
|
|
143
|
+
func = getattr(StorerDB, db_name)
|
|
144
|
+
return func(**config)
|
|
145
|
+
raise ValueError("tag must be in [0, 1, 2]")
|
|
146
|
+
elif any(isinstance(config, t) for t in (StorerInfo, SchedulerInfo, RedisInfo)):
|
|
147
|
+
return config
|
|
148
|
+
raise TypeError("config must be in [StorerInfo, SchedulerInfo, RedisInfo]")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def info(configs, tag = 0):
|
|
152
|
+
if configs is None:
|
|
153
|
+
return SchedulerDB.default() if tag == 1 else None
|
|
154
|
+
|
|
155
|
+
if isinstance(configs, str):
|
|
156
|
+
configs = json.loads(configs)
|
|
157
|
+
|
|
158
|
+
if tag == 0:
|
|
159
|
+
return deal(configs, tag)
|
|
160
|
+
|
|
161
|
+
if not isinstance(configs, list):
|
|
162
|
+
configs = [configs]
|
|
163
|
+
|
|
164
|
+
return [deal(config, tag) for config in configs]
|
cobweb/constant.py
CHANGED
cobweb/equip/single/launcher.py
CHANGED
cobweb/equip/single/models.py
CHANGED
|
@@ -53,7 +53,7 @@ class Spider:
|
|
|
53
53
|
self.storage = storage
|
|
54
54
|
self.queue = queue
|
|
55
55
|
|
|
56
|
-
def spider_task(self, stop, func, item, del_seed):
|
|
56
|
+
def spider_task(self, stop, func, item, del_seed, add_seed):
|
|
57
57
|
while not stop.is_set():
|
|
58
58
|
|
|
59
59
|
seed = self.queue.pop()
|
|
@@ -72,6 +72,7 @@ class Spider:
|
|
|
72
72
|
|
|
73
73
|
store_queue = None
|
|
74
74
|
store_data = list()
|
|
75
|
+
add_seed_list = list()
|
|
75
76
|
iterators = func(item, seed)
|
|
76
77
|
|
|
77
78
|
if not isgenerator(iterators):
|
|
@@ -94,7 +95,7 @@ class Spider:
|
|
|
94
95
|
store_queue = it.queue()
|
|
95
96
|
store_data.append(it.struct_data)
|
|
96
97
|
elif isinstance(it, Seed):
|
|
97
|
-
|
|
98
|
+
add_seed_list.append(it)
|
|
98
99
|
|
|
99
100
|
elif isinstance(it, str) and it == DealModel.polling:
|
|
100
101
|
self.queue.push(seed)
|
|
@@ -111,10 +112,12 @@ class Spider:
|
|
|
111
112
|
if not status:
|
|
112
113
|
seed._retry += 1
|
|
113
114
|
self.queue.push(seed)
|
|
114
|
-
|
|
115
115
|
elif store_queue and store_data:
|
|
116
116
|
store_data.append(seed)
|
|
117
117
|
store_queue.push(store_data)
|
|
118
|
+
elif add_seed_list:
|
|
119
|
+
del_seed(seed, spider_status=True)
|
|
120
|
+
add_seed(add_seed_list)
|
|
118
121
|
|
|
119
122
|
except Exception as e:
|
|
120
123
|
seed._retry += 1
|
cobweb/new.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Launcher:
|
|
6
|
+
|
|
7
|
+
def __init__(self):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
def register(self, task_name, func):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
def launch(self, task_name):
|
|
14
|
+
|
|
15
|
+
def decorator(func):
|
|
16
|
+
# 注册爬虫程序
|
|
17
|
+
self.register(task_name, func)
|
|
18
|
+
return func
|
|
19
|
+
|
|
20
|
+
return decorator
|
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cobweb-launcher
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.21
|
|
4
4
|
Summary: spider_hole
|
|
5
5
|
Home-page: https://github.com/Juannie-PP/cobweb
|
|
6
6
|
Author: Juannie-PP
|
|
7
7
|
Author-email: 2604868278@qq.com
|
|
8
8
|
License: MIT
|
|
9
9
|
Keywords: cobweb-launcher, cobweb
|
|
10
|
+
Platform: UNKNOWN
|
|
10
11
|
Classifier: Programming Language :: Python :: 3
|
|
11
12
|
Requires-Python: >=3.7
|
|
12
13
|
Description-Content-Type: text/markdown
|
|
13
14
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: requests >=2.19.1
|
|
15
|
-
Requires-Dist: oss2 >=2.18.1
|
|
16
|
-
Requires-Dist: redis >=4.4.4
|
|
15
|
+
Requires-Dist: requests (>=2.19.1)
|
|
16
|
+
Requires-Dist: oss2 (>=2.18.1)
|
|
17
|
+
Requires-Dist: redis (>=4.4.4)
|
|
17
18
|
Requires-Dist: aliyun-log-python-sdk
|
|
18
19
|
|
|
19
20
|
# cobweb
|
|
@@ -43,3 +44,5 @@ need deal
|
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|

|
|
47
|
+
|
|
48
|
+
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
cobweb/__init__.py,sha256=zVims_awYxKNqULuKSjFXdZAaDJaqzFBJJo_SqeakMs,286
|
|
2
2
|
cobweb/bbb.py,sha256=zKeCeBVFQfhEv6M8CCktUTM7tXDZmAu6ZN0-ET44pUY,5707
|
|
3
|
-
cobweb/
|
|
3
|
+
cobweb/config.py,sha256=iWrep4vW9UyUi3hvgUWb4RL2IpEwpo_ttY0EUDKWN4g,5362
|
|
4
|
+
cobweb/constant.py,sha256=oT2Y-qm2du2QGLA2D53aGi52Eda9FdV3hBomYRpbXl8,613
|
|
4
5
|
cobweb/decorators.py,sha256=eYQI9rddPVJihAlomLTmbtQhIOzPw8dCrOFpxAq2pLY,318
|
|
5
6
|
cobweb/interface.py,sha256=um_k2AAQl1HTOvfUlq914DjkpfZVwt2m1B65EpPKrmE,802
|
|
6
7
|
cobweb/log.py,sha256=Gb3_y4IzTo5pJohTggBCU9rK6-ZN3hgTOHkoXHyN6CU,2384
|
|
8
|
+
cobweb/new.py,sha256=-ZHoLZE4ZGRMBU5YWmWHMfLn2ipuTKEaCTO1pU2fX5o,303
|
|
7
9
|
cobweb/setting.py,sha256=UAu_dLuIFYO98MxtlZ5sZqJcwKAUKq4Bu4KoKlV50Mc,288
|
|
8
10
|
cobweb/task.py,sha256=SyWC43C7hqQAqH-1HECXEEgp_6L6lwDhYe1kZNnHUvA,2006
|
|
9
11
|
cobweb/utils.py,sha256=ivmRqJJNtwdOKYT4G7qQCWnL8ar9c-shxeDZzGB2E9c,2651
|
|
@@ -29,14 +31,14 @@ cobweb/equip/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
|
29
31
|
cobweb/equip/distributed/launcher.py,sha256=1LzxibGXWR20XpXawakiRpEMaa9yfaj2rFSKnmEwjFc,7475
|
|
30
32
|
cobweb/equip/distributed/models.py,sha256=qTGzxLdb2arsZSZK2HE4-MrqhraUhc2Ol5wBvlv_aWA,5008
|
|
31
33
|
cobweb/equip/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
-
cobweb/equip/single/launcher.py,sha256=
|
|
33
|
-
cobweb/equip/single/models.py,sha256=
|
|
34
|
+
cobweb/equip/single/launcher.py,sha256=lZt4WNar0_QQjUSDhaJnxvHDiuQReXeFxquVgJjI5T4,6824
|
|
35
|
+
cobweb/equip/single/models.py,sha256=uH6VXHiUSEOV0PN0s63NKRveTCRXy9_QH6qJRQgYv9Q,5610
|
|
34
36
|
cobweb/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
37
|
cobweb/single/launcher.py,sha256=IoJbn87j7t7Pib_FxoWZmmX8asXOqNGb-9ospw6EYJI,7302
|
|
36
38
|
cobweb/single/models.py,sha256=wIEV35666lxdzqjDqBHPjjh-r6zD0x24rtQYz7d4Oxw,4332
|
|
37
39
|
cobweb/single/nest.py,sha256=49K6KQ934INfPrWQsrq9rIFpQauLbLGOFbDaHvoQzOk,5015
|
|
38
|
-
cobweb_launcher-0.1.
|
|
39
|
-
cobweb_launcher-0.1.
|
|
40
|
-
cobweb_launcher-0.1.
|
|
41
|
-
cobweb_launcher-0.1.
|
|
42
|
-
cobweb_launcher-0.1.
|
|
40
|
+
cobweb_launcher-0.1.21.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
|
41
|
+
cobweb_launcher-0.1.21.dist-info/METADATA,sha256=10RvainuQ8tnyG0H_wFa6NMBlRZmjfL-oVXLEFHDFB8,1246
|
|
42
|
+
cobweb_launcher-0.1.21.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
|
43
|
+
cobweb_launcher-0.1.21.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
|
44
|
+
cobweb_launcher-0.1.21.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|