cobweb-launcher 0.0.5__tar.gz → 0.0.6__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/PKG-INFO +1 -1
- cobweb-launcher-0.0.6/cobweb/__init__.py +8 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/redis_db.py +2 -12
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/loghub.py +2 -2
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/distributed/launcher.py +35 -11
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/PKG-INFO +1 -1
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/setup.py +1 -1
- cobweb-launcher-0.0.5/cobweb/__init__.py +0 -19
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/LICENSE +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/README.md +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/bbb.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/oss_db.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/scheduler/__init__.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/scheduler/default.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/scheduler/textfile.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/__init__.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/console.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/redis.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/textfile.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/distributed/__init__.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/distributed/models.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/interface.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/log.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/single/__init__.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/single/models.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/single/nest.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/task.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/utils.py +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/setup.cfg +0 -0
@@ -9,19 +9,9 @@ class RedisDB:
|
|
9
9
|
self,
|
10
10
|
project: str,
|
11
11
|
task_name: str,
|
12
|
-
|
13
|
-
port=None,
|
14
|
-
username=None,
|
15
|
-
password=None,
|
16
|
-
db=0
|
12
|
+
config: dict
|
17
13
|
):
|
18
|
-
pool = redis.ConnectionPool(
|
19
|
-
host=host,
|
20
|
-
port=port,
|
21
|
-
username=username,
|
22
|
-
password=password,
|
23
|
-
db=db
|
24
|
-
)
|
14
|
+
pool = redis.ConnectionPool(**config)
|
25
15
|
self.heartbeat_key = f"{project}:{task_name}:heartbeat" # redis type string
|
26
16
|
self.spider_key = f"{project}:{task_name}:seed_info:spider" # redis type zset, .format(priority)
|
27
17
|
self.storer_key = f"{project}:{task_name}:seed_info:storer:%s" # redis type set,
|
@@ -5,8 +5,8 @@ from cobweb import log, StorerInterface
|
|
5
5
|
|
6
6
|
class Loghub(StorerInterface):
|
7
7
|
|
8
|
-
def __init__(self,
|
9
|
-
super().__init__(
|
8
|
+
def __init__(self, **kwargs):
|
9
|
+
super().__init__(**kwargs)
|
10
10
|
self.client = None
|
11
11
|
|
12
12
|
def init_loghub_clint(self):
|
@@ -2,10 +2,11 @@ import time
|
|
2
2
|
import threading
|
3
3
|
from threading import Thread
|
4
4
|
from importlib import import_module
|
5
|
-
from models import Scheduler, Spider, Storer
|
6
|
-
from cobweb.utils import struct_queue_name, restore_table_name
|
7
|
-
from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
|
8
5
|
|
6
|
+
from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
|
7
|
+
from cobweb.utils import struct_queue_name, restore_table_name
|
8
|
+
from .models import Scheduler, Spider, Storer
|
9
|
+
from collections import namedtuple
|
9
10
|
|
10
11
|
# def start_seeds(seeds):
|
11
12
|
# if not seeds:
|
@@ -29,14 +30,14 @@ from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
|
|
29
30
|
# storer_data[db_name]["db_args_list"].append(info[1:])
|
30
31
|
# return storer_data
|
31
32
|
|
32
|
-
def
|
33
|
+
def get_scheduler_db(db):
|
33
34
|
if isinstance(db, str):
|
34
35
|
if "." in db:
|
35
36
|
model_path = db.split(".")
|
36
37
|
model = import_module(db)
|
37
38
|
obj = getattr(model, db)
|
38
39
|
else:
|
39
|
-
model = import_module(f"cobweb.db.
|
40
|
+
model = import_module(f"cobweb.db.scheduler.{db.lower()}")
|
40
41
|
obj = getattr(model, db.capitalize())
|
41
42
|
return obj
|
42
43
|
# if db.lower() in dir(StorerDB):
|
@@ -48,6 +49,25 @@ def get_storer_db(db):
|
|
48
49
|
raise TypeError()
|
49
50
|
|
50
51
|
|
52
|
+
def get_storer_db(db):
|
53
|
+
if isinstance(db, str):
|
54
|
+
if "." in db:
|
55
|
+
model_path = db.split(".")
|
56
|
+
model = import_module(db)
|
57
|
+
obj = getattr(model, db)
|
58
|
+
else:
|
59
|
+
model = import_module(f"cobweb.db.storer.{db.lower()}")
|
60
|
+
obj = getattr(model, db.capitalize())
|
61
|
+
return obj, db.lower()
|
62
|
+
# if db.lower() in dir(StorerDB):
|
63
|
+
# return getattr(StorerDB, db)
|
64
|
+
# else:
|
65
|
+
# pass
|
66
|
+
elif issubclass(db, StorerInterface):
|
67
|
+
return db, db.__name__.lower()
|
68
|
+
raise TypeError()
|
69
|
+
|
70
|
+
|
51
71
|
def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
|
52
72
|
time.sleep(5)
|
53
73
|
while True:
|
@@ -101,18 +121,22 @@ def launcher(task):
|
|
101
121
|
stop = threading.Event()
|
102
122
|
|
103
123
|
# 初始化redis信息
|
104
|
-
redis_db = RedisDB(task.project, task.task_name,
|
124
|
+
redis_db = RedisDB(task.project, task.task_name, task.redis_info)
|
105
125
|
|
106
126
|
log.info("初始化cobweb!")
|
107
127
|
|
108
128
|
seed_queue = Queue()
|
109
129
|
|
130
|
+
if task.scheduler_info is None:
|
131
|
+
task.scheduler_info = dict()
|
132
|
+
|
110
133
|
# 调度器动态继承
|
111
134
|
sql = task.scheduler_info.get("sql")
|
112
135
|
table = task.scheduler_info.get("table")
|
113
136
|
size = task.scheduler_info.get("size")
|
114
137
|
scheduler_config = task.scheduler_info.get("config")
|
115
|
-
|
138
|
+
scheduler_db = task.scheduler_info.get("db", "default")
|
139
|
+
DB = get_scheduler_db(scheduler_db)
|
116
140
|
# SchedulerDB, table, sql, length, size, config = task.scheduler_info
|
117
141
|
SchedulerTmp = type(DB.__name__, (Scheduler, DB), {})
|
118
142
|
|
@@ -134,12 +158,12 @@ def launcher(task):
|
|
134
158
|
item = type("Item", (object,), {"redis_client": redis_db})()
|
135
159
|
|
136
160
|
for storer_info in storer_info_list:
|
137
|
-
|
161
|
+
storer_db = storer_info["db"]
|
138
162
|
fields = storer_info["fields"]
|
139
163
|
storer_table = storer_info.get("table", "console")
|
140
164
|
storer_config = storer_info.get("config")
|
141
165
|
|
142
|
-
StorerDB, db_name = get_storer_db(
|
166
|
+
StorerDB, db_name = get_storer_db(storer_db)
|
143
167
|
StorerTmp = type(StorerDB.__name__, (Storer, StorerDB), {})
|
144
168
|
|
145
169
|
if not getattr(item, db_name, None):
|
@@ -154,9 +178,9 @@ def launcher(task):
|
|
154
178
|
# 初始话存储器
|
155
179
|
table_name = restore_table_name(table_name=storer_table)
|
156
180
|
storer = StorerTmp(
|
157
|
-
table=table_name,
|
181
|
+
table=table_name, fields=fields,
|
158
182
|
length=task.storer_queue_length,
|
159
|
-
config=storer_config
|
183
|
+
queue=queue, config=storer_config
|
160
184
|
)
|
161
185
|
storer_list.append(storer)
|
162
186
|
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# from .distributed.launcher import launcher
|
2
|
-
# from .distributed import models
|
3
|
-
from .bbb import Seed, Queue, DBItem
|
4
|
-
from .task import Task
|
5
|
-
from .log import log
|
6
|
-
from .interface import SchedulerInterface, StorerInterface
|
7
|
-
from .db.redis_db import RedisDB
|
8
|
-
# from . import db
|
9
|
-
# from .base.bbb import Seed
|
10
|
-
# from .base import interface
|
11
|
-
# from .base import config
|
12
|
-
# from .base import utils
|
13
|
-
# from .base import log as logger
|
14
|
-
#
|
15
|
-
# from .db.scheduler import *
|
16
|
-
# from .db.storer import *
|
17
|
-
# from .db.base import *
|
18
|
-
|
19
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|