cobweb-launcher 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/__init__.py +1 -12
- cobweb/bbb.py +1 -1
- cobweb/db/redis_db.py +2 -12
- cobweb/db/storer/loghub.py +2 -2
- cobweb/distributed/launcher.py +35 -11
- {cobweb_launcher-0.0.5.dist-info → cobweb_launcher-0.0.7.dist-info}/METADATA +1 -1
- {cobweb_launcher-0.0.5.dist-info → cobweb_launcher-0.0.7.dist-info}/RECORD +10 -10
- {cobweb_launcher-0.0.5.dist-info → cobweb_launcher-0.0.7.dist-info}/LICENSE +0 -0
- {cobweb_launcher-0.0.5.dist-info → cobweb_launcher-0.0.7.dist-info}/WHEEL +0 -0
- {cobweb_launcher-0.0.5.dist-info → cobweb_launcher-0.0.7.dist-info}/top_level.txt +0 -0
cobweb/__init__.py
CHANGED
@@ -1,19 +1,8 @@
|
|
1
|
-
# from .distributed.launcher import launcher
|
2
|
-
# from .distributed import models
|
3
1
|
from .bbb import Seed, Queue, DBItem
|
4
2
|
from .task import Task
|
5
3
|
from .log import log
|
6
4
|
from .interface import SchedulerInterface, StorerInterface
|
7
5
|
from .db.redis_db import RedisDB
|
8
|
-
|
9
|
-
# from .base.bbb import Seed
|
10
|
-
# from .base import interface
|
11
|
-
# from .base import config
|
12
|
-
# from .base import utils
|
13
|
-
# from .base import log as logger
|
14
|
-
#
|
15
|
-
# from .db.scheduler import *
|
16
|
-
# from .db.storer import *
|
17
|
-
# from .db.base import *
|
6
|
+
from .distributed.launcher import launcher
|
18
7
|
|
19
8
|
|
cobweb/bbb.py
CHANGED
cobweb/db/redis_db.py
CHANGED
@@ -9,19 +9,9 @@ class RedisDB:
|
|
9
9
|
self,
|
10
10
|
project: str,
|
11
11
|
task_name: str,
|
12
|
-
|
13
|
-
port=None,
|
14
|
-
username=None,
|
15
|
-
password=None,
|
16
|
-
db=0
|
12
|
+
config: dict
|
17
13
|
):
|
18
|
-
pool = redis.ConnectionPool(
|
19
|
-
host=host,
|
20
|
-
port=port,
|
21
|
-
username=username,
|
22
|
-
password=password,
|
23
|
-
db=db
|
24
|
-
)
|
14
|
+
pool = redis.ConnectionPool(**config)
|
25
15
|
self.heartbeat_key = f"{project}:{task_name}:heartbeat" # redis type string
|
26
16
|
self.spider_key = f"{project}:{task_name}:seed_info:spider" # redis type zset, .format(priority)
|
27
17
|
self.storer_key = f"{project}:{task_name}:seed_info:storer:%s" # redis type set,
|
cobweb/db/storer/loghub.py
CHANGED
@@ -5,8 +5,8 @@ from cobweb import log, StorerInterface
|
|
5
5
|
|
6
6
|
class Loghub(StorerInterface):
|
7
7
|
|
8
|
-
def __init__(self,
|
9
|
-
super().__init__(
|
8
|
+
def __init__(self, **kwargs):
|
9
|
+
super().__init__(**kwargs)
|
10
10
|
self.client = None
|
11
11
|
|
12
12
|
def init_loghub_clint(self):
|
cobweb/distributed/launcher.py
CHANGED
@@ -2,10 +2,11 @@ import time
|
|
2
2
|
import threading
|
3
3
|
from threading import Thread
|
4
4
|
from importlib import import_module
|
5
|
-
from models import Scheduler, Spider, Storer
|
6
|
-
from cobweb.utils import struct_queue_name, restore_table_name
|
7
|
-
from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
|
8
5
|
|
6
|
+
from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
|
7
|
+
from cobweb.utils import struct_queue_name, restore_table_name
|
8
|
+
from .models import Scheduler, Spider, Storer
|
9
|
+
from collections import namedtuple
|
9
10
|
|
10
11
|
# def start_seeds(seeds):
|
11
12
|
# if not seeds:
|
@@ -29,14 +30,14 @@ from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
|
|
29
30
|
# storer_data[db_name]["db_args_list"].append(info[1:])
|
30
31
|
# return storer_data
|
31
32
|
|
32
|
-
def
|
33
|
+
def get_scheduler_db(db):
|
33
34
|
if isinstance(db, str):
|
34
35
|
if "." in db:
|
35
36
|
model_path = db.split(".")
|
36
37
|
model = import_module(db)
|
37
38
|
obj = getattr(model, db)
|
38
39
|
else:
|
39
|
-
model = import_module(f"cobweb.db.
|
40
|
+
model = import_module(f"cobweb.db.scheduler.{db.lower()}")
|
40
41
|
obj = getattr(model, db.capitalize())
|
41
42
|
return obj
|
42
43
|
# if db.lower() in dir(StorerDB):
|
@@ -48,6 +49,25 @@ def get_storer_db(db):
|
|
48
49
|
raise TypeError()
|
49
50
|
|
50
51
|
|
52
|
+
def get_storer_db(db):
|
53
|
+
if isinstance(db, str):
|
54
|
+
if "." in db:
|
55
|
+
model_path = db.split(".")
|
56
|
+
model = import_module(db)
|
57
|
+
obj = getattr(model, db)
|
58
|
+
else:
|
59
|
+
model = import_module(f"cobweb.db.storer.{db.lower()}")
|
60
|
+
obj = getattr(model, db.capitalize())
|
61
|
+
return obj, db.lower()
|
62
|
+
# if db.lower() in dir(StorerDB):
|
63
|
+
# return getattr(StorerDB, db)
|
64
|
+
# else:
|
65
|
+
# pass
|
66
|
+
elif issubclass(db, StorerInterface):
|
67
|
+
return db, db.__name__.lower()
|
68
|
+
raise TypeError()
|
69
|
+
|
70
|
+
|
51
71
|
def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
|
52
72
|
time.sleep(5)
|
53
73
|
while True:
|
@@ -101,18 +121,22 @@ def launcher(task):
|
|
101
121
|
stop = threading.Event()
|
102
122
|
|
103
123
|
# 初始化redis信息
|
104
|
-
redis_db = RedisDB(task.project, task.task_name,
|
124
|
+
redis_db = RedisDB(task.project, task.task_name, task.redis_info)
|
105
125
|
|
106
126
|
log.info("初始化cobweb!")
|
107
127
|
|
108
128
|
seed_queue = Queue()
|
109
129
|
|
130
|
+
if task.scheduler_info is None:
|
131
|
+
task.scheduler_info = dict()
|
132
|
+
|
110
133
|
# 调度器动态继承
|
111
134
|
sql = task.scheduler_info.get("sql")
|
112
135
|
table = task.scheduler_info.get("table")
|
113
136
|
size = task.scheduler_info.get("size")
|
114
137
|
scheduler_config = task.scheduler_info.get("config")
|
115
|
-
|
138
|
+
scheduler_db = task.scheduler_info.get("db", "default")
|
139
|
+
DB = get_scheduler_db(scheduler_db)
|
116
140
|
# SchedulerDB, table, sql, length, size, config = task.scheduler_info
|
117
141
|
SchedulerTmp = type(DB.__name__, (Scheduler, DB), {})
|
118
142
|
|
@@ -134,12 +158,12 @@ def launcher(task):
|
|
134
158
|
item = type("Item", (object,), {"redis_client": redis_db})()
|
135
159
|
|
136
160
|
for storer_info in storer_info_list:
|
137
|
-
|
161
|
+
storer_db = storer_info["db"]
|
138
162
|
fields = storer_info["fields"]
|
139
163
|
storer_table = storer_info.get("table", "console")
|
140
164
|
storer_config = storer_info.get("config")
|
141
165
|
|
142
|
-
StorerDB, db_name = get_storer_db(
|
166
|
+
StorerDB, db_name = get_storer_db(storer_db)
|
143
167
|
StorerTmp = type(StorerDB.__name__, (Storer, StorerDB), {})
|
144
168
|
|
145
169
|
if not getattr(item, db_name, None):
|
@@ -154,9 +178,9 @@ def launcher(task):
|
|
154
178
|
# 初始话存储器
|
155
179
|
table_name = restore_table_name(table_name=storer_table)
|
156
180
|
storer = StorerTmp(
|
157
|
-
table=table_name,
|
181
|
+
table=table_name, fields=fields,
|
158
182
|
length=task.storer_queue_length,
|
159
|
-
config=storer_config
|
183
|
+
queue=queue, config=storer_config
|
160
184
|
)
|
161
185
|
storer_list.append(storer)
|
162
186
|
|
@@ -1,28 +1,28 @@
|
|
1
|
-
cobweb/__init__.py,sha256=
|
2
|
-
cobweb/bbb.py,sha256=
|
1
|
+
cobweb/__init__.py,sha256=IKQkcts73m_K-b6TGs_IxkdkncnZRU0O_N0MJct2COI,218
|
2
|
+
cobweb/bbb.py,sha256=DWqnCiBhzD4UzPxgDHo4mhvYIB5JnYFrAtpF_vcTtT0,5614
|
3
3
|
cobweb/interface.py,sha256=um_k2AAQl1HTOvfUlq914DjkpfZVwt2m1B65EpPKrmE,802
|
4
4
|
cobweb/log.py,sha256=Gb3_y4IzTo5pJohTggBCU9rK6-ZN3hgTOHkoXHyN6CU,2384
|
5
5
|
cobweb/task.py,sha256=awZWFwON34WAJs08TKPaYTbyRjmoNOCBkLCNf2l9C-Q,1282
|
6
6
|
cobweb/utils.py,sha256=tHSj_T1Ct7Y-QdIo5w4hCwkO59RlQiq9yGxUOjojMOg,2158
|
7
7
|
cobweb/db/__init__.py,sha256=4m9lqmxZCRbaih3Z3rl_BT0GugMd0dkOIgu_P9aeC84,63
|
8
8
|
cobweb/db/oss_db.py,sha256=l-Xbqawg1HJgedz9MumXQrr1jMK6_EePXCis11CZEkE,3810
|
9
|
-
cobweb/db/redis_db.py,sha256=
|
9
|
+
cobweb/db/redis_db.py,sha256=keVlFpUT7spfNwZ4g_5teROo_uOsjfDWtR-WvAcqZIE,7415
|
10
10
|
cobweb/db/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
cobweb/db/scheduler/default.py,sha256=OxmFX7OvMEhKEq-NF7A8I9cA4V4qWw5vayS-yIbng0A,114
|
12
12
|
cobweb/db/scheduler/textfile.py,sha256=atRDeNT-e5toNvyGsCXAxL1FJi77uSYktdCzH_hXGo8,821
|
13
13
|
cobweb/db/storer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
cobweb/db/storer/console.py,sha256=f7yZFo4qTieaB9JxbGfrVAclAb2H_wji82dWoZp7HUw,182
|
15
|
-
cobweb/db/storer/loghub.py,sha256=
|
15
|
+
cobweb/db/storer/loghub.py,sha256=4VqZacXWhidzINHXQu2_-E0HOBRCcc86f6LkKfnXD5I,1731
|
16
16
|
cobweb/db/storer/redis.py,sha256=7Q2XEQwBL6X_M1uvxzzuSBt6iw9piKw-_FWKm2INZDQ,412
|
17
17
|
cobweb/db/storer/textfile.py,sha256=3mDHMvF6Sh5fn3IHzWQxyTUd45V-zUoH8vY3EoRlMx0,415
|
18
18
|
cobweb/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
cobweb/distributed/launcher.py,sha256=
|
19
|
+
cobweb/distributed/launcher.py,sha256=WEEfZDdOXpEN5ljrYEGQWzPtDM7XcVczEnDO81yCoWg,9135
|
20
20
|
cobweb/distributed/models.py,sha256=zRZfSOiP-OyvTVdI5_KScyf_jZmYmaYbxJvohf7ffDA,4390
|
21
21
|
cobweb/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
cobweb/single/models.py,sha256=lu8teNWnWcUwZFra8XmqyhzOAf3UyuEztwBr1Ne6pUs,2898
|
23
23
|
cobweb/single/nest.py,sha256=mL8q9a5BjtoeUyzXCIVw_vyUsNY8ltbvQpYIIpZEDFU,5012
|
24
|
-
cobweb_launcher-0.0.
|
25
|
-
cobweb_launcher-0.0.
|
26
|
-
cobweb_launcher-0.0.
|
27
|
-
cobweb_launcher-0.0.
|
28
|
-
cobweb_launcher-0.0.
|
24
|
+
cobweb_launcher-0.0.7.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
25
|
+
cobweb_launcher-0.0.7.dist-info/METADATA,sha256=ymYuybr2CYckZMJVx6diBVimAeGlu3X8OvkuxNeveRQ,1225
|
26
|
+
cobweb_launcher-0.0.7.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
27
|
+
cobweb_launcher-0.0.7.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
28
|
+
cobweb_launcher-0.0.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|