cobweb-launcher 0.0.4__tar.gz → 0.0.6__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {cobweb-launcher-0.0.4/cobweb_launcher.egg-info → cobweb-launcher-0.0.6}/PKG-INFO +1 -1
- cobweb-launcher-0.0.6/cobweb/__init__.py +8 -0
- {cobweb-launcher-0.0.4/cobweb/base → cobweb-launcher-0.0.6/cobweb}/bbb.py +2 -2
- cobweb-launcher-0.0.6/cobweb/db/__init__.py +2 -0
- {cobweb-launcher-0.0.4/cobweb/db/base → cobweb-launcher-0.0.6/cobweb/db}/oss_db.py +1 -1
- {cobweb-launcher-0.0.4/cobweb/db/base → cobweb-launcher-0.0.6/cobweb/db}/redis_db.py +3 -13
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb/db/scheduler/default.py +1 -1
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb/db/scheduler/textfile.py +1 -3
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb/db/storer/console.py +1 -2
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb/db/storer/loghub.py +3 -4
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb/db/storer/redis.py +1 -2
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb/db/storer/textfile.py +1 -2
- cobweb-launcher-0.0.6/cobweb/distributed/launcher.py +273 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb/distributed/models.py +1 -3
- {cobweb-launcher-0.0.4/cobweb/base → cobweb-launcher-0.0.6/cobweb}/interface.py +3 -13
- cobweb-launcher-0.0.6/cobweb/task.py +44 -0
- cobweb-launcher-0.0.6/cobweb/utils.py +85 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/SOURCES.txt +7 -16
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/setup.py +1 -1
- cobweb-launcher-0.0.4/cobweb/__init__.py +0 -10
- cobweb-launcher-0.0.4/cobweb/base/config.py +0 -164
- cobweb-launcher-0.0.4/cobweb/base/decorators.py +0 -95
- cobweb-launcher-0.0.4/cobweb/base/hash_table.py +0 -60
- cobweb-launcher-0.0.4/cobweb/base/queue_tmp.py +0 -60
- cobweb-launcher-0.0.4/cobweb/base/request.py +0 -62
- cobweb-launcher-0.0.4/cobweb/base/task.py +0 -38
- cobweb-launcher-0.0.4/cobweb/base/utils.py +0 -15
- cobweb-launcher-0.0.4/cobweb/db/base/client_db.py +0 -1
- cobweb-launcher-0.0.4/cobweb/db/base/redis_dbv3.py +0 -231
- cobweb-launcher-0.0.4/cobweb/db/storer/__init__.py +0 -0
- cobweb-launcher-0.0.4/cobweb/distributed/__init__.py +0 -0
- cobweb-launcher-0.0.4/cobweb/distributed/launcher.py +0 -194
- cobweb-launcher-0.0.4/cobweb/single/__init__.py +0 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/LICENSE +0 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/README.md +0 -0
- {cobweb-launcher-0.0.4/cobweb/base → cobweb-launcher-0.0.6/cobweb/db/scheduler}/__init__.py +0 -0
- {cobweb-launcher-0.0.4/cobweb/db → cobweb-launcher-0.0.6/cobweb/db/storer}/__init__.py +0 -0
- {cobweb-launcher-0.0.4/cobweb/db/base → cobweb-launcher-0.0.6/cobweb/distributed}/__init__.py +0 -0
- {cobweb-launcher-0.0.4/cobweb/base → cobweb-launcher-0.0.6/cobweb}/log.py +0 -0
- {cobweb-launcher-0.0.4/cobweb/db/scheduler → cobweb-launcher-0.0.6/cobweb/single}/__init__.py +0 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb/single/models.py +0 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb/single/nest.py +0 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-0.0.4 → cobweb-launcher-0.0.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
import time
|
2
2
|
import redis
|
3
|
-
from
|
3
|
+
from cobweb import Seed
|
4
4
|
|
5
5
|
|
6
6
|
class RedisDB:
|
@@ -9,19 +9,9 @@ class RedisDB:
|
|
9
9
|
self,
|
10
10
|
project: str,
|
11
11
|
task_name: str,
|
12
|
-
|
13
|
-
port=None,
|
14
|
-
username=None,
|
15
|
-
password=None,
|
16
|
-
db=0
|
12
|
+
config: dict
|
17
13
|
):
|
18
|
-
pool = redis.ConnectionPool(
|
19
|
-
host=host,
|
20
|
-
port=port,
|
21
|
-
username=username,
|
22
|
-
password=password,
|
23
|
-
db=db
|
24
|
-
)
|
14
|
+
pool = redis.ConnectionPool(**config)
|
25
15
|
self.heartbeat_key = f"{project}:{task_name}:heartbeat" # redis type string
|
26
16
|
self.spider_key = f"{project}:{task_name}:seed_info:spider" # redis type zset, .format(priority)
|
27
17
|
self.storer_key = f"{project}:{task_name}:seed_info:storer:%s" # redis type set,
|
@@ -1,13 +1,12 @@
|
|
1
1
|
import json
|
2
|
-
from base.log import log
|
3
|
-
from base.interface import StorerInterface
|
4
2
|
from aliyun.log import LogClient, LogItem, PutLogsRequest
|
3
|
+
from cobweb import log, StorerInterface
|
5
4
|
|
6
5
|
|
7
6
|
class Loghub(StorerInterface):
|
8
7
|
|
9
|
-
def __init__(self,
|
10
|
-
super().__init__(
|
8
|
+
def __init__(self, **kwargs):
|
9
|
+
super().__init__(**kwargs)
|
11
10
|
self.client = None
|
12
11
|
|
13
12
|
def init_loghub_clint(self):
|
@@ -0,0 +1,273 @@
|
|
1
|
+
import time
|
2
|
+
import threading
|
3
|
+
from threading import Thread
|
4
|
+
from importlib import import_module
|
5
|
+
|
6
|
+
from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
|
7
|
+
from cobweb.utils import struct_queue_name, restore_table_name
|
8
|
+
from .models import Scheduler, Spider, Storer
|
9
|
+
from collections import namedtuple
|
10
|
+
|
11
|
+
# def start_seeds(seeds):
|
12
|
+
# if not seeds:
|
13
|
+
# return None
|
14
|
+
# if any(isinstance(seeds, t) for t in (list, tuple)):
|
15
|
+
# return [Seed(seed) for seed in seeds]
|
16
|
+
# elif any(isinstance(seeds, t) for t in (str, dict)):
|
17
|
+
# return Seed(seeds)
|
18
|
+
|
19
|
+
|
20
|
+
# def parse_storer_info(storer_info):
|
21
|
+
# storer_data = {}
|
22
|
+
# storer_info_list = []
|
23
|
+
# if storer_info.__class__.__name__ == 'StorerInfo':
|
24
|
+
# storer_info_list.append(storer_info)
|
25
|
+
# elif any(isinstance(storer_info, t) for t in (list, tuple)):
|
26
|
+
# storer_info_list = storer_info
|
27
|
+
# for info in storer_info_list:
|
28
|
+
# db_name = info.DB.__name__
|
29
|
+
# storer_data.setdefault(db_name, {"StorerDB": info.DB, "db_args_list": []})
|
30
|
+
# storer_data[db_name]["db_args_list"].append(info[1:])
|
31
|
+
# return storer_data
|
32
|
+
|
33
|
+
def get_scheduler_db(db):
|
34
|
+
if isinstance(db, str):
|
35
|
+
if "." in db:
|
36
|
+
model_path = db.split(".")
|
37
|
+
model = import_module(db)
|
38
|
+
obj = getattr(model, db)
|
39
|
+
else:
|
40
|
+
model = import_module(f"cobweb.db.scheduler.{db.lower()}")
|
41
|
+
obj = getattr(model, db.capitalize())
|
42
|
+
return obj
|
43
|
+
# if db.lower() in dir(StorerDB):
|
44
|
+
# return getattr(StorerDB, db)
|
45
|
+
# else:
|
46
|
+
# pass
|
47
|
+
elif issubclass(db, StorerInterface):
|
48
|
+
return db
|
49
|
+
raise TypeError()
|
50
|
+
|
51
|
+
|
52
|
+
def get_storer_db(db):
|
53
|
+
if isinstance(db, str):
|
54
|
+
if "." in db:
|
55
|
+
model_path = db.split(".")
|
56
|
+
model = import_module(db)
|
57
|
+
obj = getattr(model, db)
|
58
|
+
else:
|
59
|
+
model = import_module(f"cobweb.db.storer.{db.lower()}")
|
60
|
+
obj = getattr(model, db.capitalize())
|
61
|
+
return obj, db.lower()
|
62
|
+
# if db.lower() in dir(StorerDB):
|
63
|
+
# return getattr(StorerDB, db)
|
64
|
+
# else:
|
65
|
+
# pass
|
66
|
+
elif issubclass(db, StorerInterface):
|
67
|
+
return db, db.__name__.lower()
|
68
|
+
raise TypeError()
|
69
|
+
|
70
|
+
|
71
|
+
def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
|
72
|
+
time.sleep(5)
|
73
|
+
while True:
|
74
|
+
if (
|
75
|
+
scheduler.stop and
|
76
|
+
not ready_seed_length() and
|
77
|
+
not scheduler.queue.length and
|
78
|
+
not spider.spider_in_progress.length
|
79
|
+
):
|
80
|
+
log.info("spider is done?")
|
81
|
+
last.set()
|
82
|
+
time.sleep(5)
|
83
|
+
storer_queue_empty = True
|
84
|
+
for storer in storer_list:
|
85
|
+
if storer.queue.length:
|
86
|
+
storer_queue_empty = False
|
87
|
+
break
|
88
|
+
if storer_queue_empty and not spider_queue_length():
|
89
|
+
log.info("spider done!")
|
90
|
+
break
|
91
|
+
last.clear()
|
92
|
+
time.sleep(3)
|
93
|
+
stop.set()
|
94
|
+
|
95
|
+
|
96
|
+
def launcher(task):
|
97
|
+
"""
|
98
|
+
任务启动装饰器
|
99
|
+
:param task: 任务配置信息
|
100
|
+
"""
|
101
|
+
def decorator(func):
|
102
|
+
"""
|
103
|
+
Item:
|
104
|
+
Textfile()
|
105
|
+
Loghub()
|
106
|
+
Console()
|
107
|
+
e.g.
|
108
|
+
task.fields = "a,b"
|
109
|
+
func(item, seed)
|
110
|
+
a = "a"
|
111
|
+
b = "b"
|
112
|
+
data = {"a": "a", "b": "b"}
|
113
|
+
yield item.Loghub(**data)
|
114
|
+
yield item.Loghub(a=a, b=b)
|
115
|
+
"""
|
116
|
+
storer_list = []
|
117
|
+
|
118
|
+
# 程序结束事件
|
119
|
+
last = threading.Event()
|
120
|
+
# 停止采集事件
|
121
|
+
stop = threading.Event()
|
122
|
+
|
123
|
+
# 初始化redis信息
|
124
|
+
redis_db = RedisDB(task.project, task.task_name, task.redis_info)
|
125
|
+
|
126
|
+
log.info("初始化cobweb!")
|
127
|
+
|
128
|
+
seed_queue = Queue()
|
129
|
+
|
130
|
+
if task.scheduler_info is None:
|
131
|
+
task.scheduler_info = dict()
|
132
|
+
|
133
|
+
# 调度器动态继承
|
134
|
+
sql = task.scheduler_info.get("sql")
|
135
|
+
table = task.scheduler_info.get("table")
|
136
|
+
size = task.scheduler_info.get("size")
|
137
|
+
scheduler_config = task.scheduler_info.get("config")
|
138
|
+
scheduler_db = task.scheduler_info.get("db", "default")
|
139
|
+
DB = get_scheduler_db(scheduler_db)
|
140
|
+
# SchedulerDB, table, sql, length, size, config = task.scheduler_info
|
141
|
+
SchedulerTmp = type(DB.__name__, (Scheduler, DB), {})
|
142
|
+
|
143
|
+
# 初始化调度器
|
144
|
+
scheduler = SchedulerTmp(
|
145
|
+
table=table, sql=sql, size=size, queue=seed_queue,
|
146
|
+
length=task.scheduler_queue_length, config=scheduler_config
|
147
|
+
)
|
148
|
+
|
149
|
+
# 初始化采集器
|
150
|
+
spider = Spider(seed_queue, task.max_retries)
|
151
|
+
|
152
|
+
# 解析存储器信息
|
153
|
+
storer_info_list = task.storer_info
|
154
|
+
if not isinstance(storer_info_list, list):
|
155
|
+
storer_info_list = [storer_info_list]
|
156
|
+
|
157
|
+
# new item
|
158
|
+
item = type("Item", (object,), {"redis_client": redis_db})()
|
159
|
+
|
160
|
+
for storer_info in storer_info_list:
|
161
|
+
storer_db = storer_info["db"]
|
162
|
+
fields = storer_info["fields"]
|
163
|
+
storer_table = storer_info.get("table", "console")
|
164
|
+
storer_config = storer_info.get("config")
|
165
|
+
|
166
|
+
StorerDB, db_name = get_storer_db(storer_db)
|
167
|
+
StorerTmp = type(StorerDB.__name__, (Storer, StorerDB), {})
|
168
|
+
|
169
|
+
if not getattr(item, db_name, None):
|
170
|
+
instance = type(db_name, (DBItem,), {})
|
171
|
+
setattr(item, db_name, instance)
|
172
|
+
|
173
|
+
storer_item_instance = getattr(item, db_name)
|
174
|
+
storer_item_instance.init_item(storer_table, fields)
|
175
|
+
|
176
|
+
storer_queue = struct_queue_name(db_name, storer_table)
|
177
|
+
queue = getattr(storer_item_instance, storer_queue)
|
178
|
+
# 初始话存储器
|
179
|
+
table_name = restore_table_name(table_name=storer_table)
|
180
|
+
storer = StorerTmp(
|
181
|
+
table=table_name, fields=fields,
|
182
|
+
length=task.storer_queue_length,
|
183
|
+
queue=queue, config=storer_config
|
184
|
+
)
|
185
|
+
storer_list.append(storer)
|
186
|
+
|
187
|
+
# for db_name in storer_data.keys():
|
188
|
+
# # 存储器动态继承
|
189
|
+
# StorerDB = storer_data[db_name]["StorerDB"]
|
190
|
+
# StorerTmp = type(StorerDB.__name__, (Storer, StorerDB), {})
|
191
|
+
# db_args_list = storer_data[db_name]["db_args_list"]
|
192
|
+
# for storer_db_args in db_args_list:
|
193
|
+
# table, fields, length, config = storer_db_args
|
194
|
+
# if not getattr(item, db_name, None):
|
195
|
+
# instance = type(db_name, (DBItem,), {})
|
196
|
+
# setattr(item, db_name, instance)
|
197
|
+
# # 创建存储xxx, 创建存储队列
|
198
|
+
# storer_item_instance = getattr(item, db_name)
|
199
|
+
# storer_item_instance.init_item(table, fields)
|
200
|
+
# #
|
201
|
+
# storer_queue = struct_queue_name(db_name, table)
|
202
|
+
# queue = getattr(storer_item_instance, storer_queue)
|
203
|
+
# # 初始话存储器
|
204
|
+
# table_name = restore_table_name(table_name=table)
|
205
|
+
# storer = StorerTmp(table_name, fields, length, queue, config)
|
206
|
+
# storer_list.append(storer)
|
207
|
+
|
208
|
+
Thread(target=redis_db.check_spider_queue, args=(stop, len(storer_list))).start()
|
209
|
+
Thread(target=redis_db.set_heartbeat, args=(stop,)).start()
|
210
|
+
|
211
|
+
# 推送初始种子
|
212
|
+
# seeds = start_seeds(task.start_seed)
|
213
|
+
redis_db.add_seed(task.seeds)
|
214
|
+
# 启动调度器, 调度至redis队列
|
215
|
+
Thread(
|
216
|
+
# name="xxxx_schedule_seeds",
|
217
|
+
target=scheduler.schedule_seed,
|
218
|
+
args=(
|
219
|
+
redis_db.ready_seed_length,
|
220
|
+
redis_db.get_scheduler_lock,
|
221
|
+
redis_db.add_seed
|
222
|
+
)
|
223
|
+
).start()
|
224
|
+
|
225
|
+
# 启动调度器, 调度任务队列
|
226
|
+
Thread(
|
227
|
+
# name="xxxx_schedule_task",
|
228
|
+
target=scheduler.schedule_task,
|
229
|
+
args=(
|
230
|
+
stop, redis_db.get_seed,
|
231
|
+
redis_db.ready_seed_length
|
232
|
+
)
|
233
|
+
).start()
|
234
|
+
|
235
|
+
# 启动采集器
|
236
|
+
for index in range(task.spider_num):
|
237
|
+
Thread(
|
238
|
+
# name=f"xxxx_spider_task:{index}",
|
239
|
+
target=spider.spider_task,
|
240
|
+
args=(
|
241
|
+
stop, func, item,
|
242
|
+
redis_db.del_seed
|
243
|
+
)
|
244
|
+
).start()
|
245
|
+
|
246
|
+
# 启动存储器
|
247
|
+
for storer in storer_list:
|
248
|
+
Thread(
|
249
|
+
# name=f"xxxx_store_task:{storer.table}",
|
250
|
+
target=storer.store_task,
|
251
|
+
args=(
|
252
|
+
stop, last,
|
253
|
+
redis_db.reset_seed,
|
254
|
+
redis_db.set_storer
|
255
|
+
)
|
256
|
+
).start()
|
257
|
+
|
258
|
+
Thread(
|
259
|
+
# name="check_spider",
|
260
|
+
target=check,
|
261
|
+
args=(
|
262
|
+
stop, last, spider,
|
263
|
+
scheduler, storer_list,
|
264
|
+
redis_db.ready_seed_length,
|
265
|
+
redis_db.spider_queue_length,
|
266
|
+
)
|
267
|
+
).start()
|
268
|
+
|
269
|
+
return decorator
|
270
|
+
|
271
|
+
|
272
|
+
# model = get_storer_db("console")
|
273
|
+
# print()
|
@@ -1,8 +1,6 @@
|
|
1
1
|
import time
|
2
2
|
from hashlib import md5
|
3
|
-
from
|
4
|
-
from base.bbb import Queue, Seed
|
5
|
-
from base.interface import SchedulerInterface, StorerInterface
|
3
|
+
from cobweb import log, Queue, Seed, StorerInterface, SchedulerInterface
|
6
4
|
# from pympler import asizeof
|
7
5
|
|
8
6
|
|
@@ -1,15 +1,5 @@
|
|
1
|
-
import json
|
2
1
|
from abc import ABC, abstractmethod
|
3
|
-
|
4
|
-
|
5
|
-
def parse_config(config):
|
6
|
-
if not config:
|
7
|
-
return None
|
8
|
-
if isinstance(config, str):
|
9
|
-
return json.loads(config)
|
10
|
-
if isinstance(config, dict):
|
11
|
-
return config
|
12
|
-
raise TypeError("config type is not in [string, dict]!")
|
2
|
+
from .utils import parse_info
|
13
3
|
|
14
4
|
|
15
5
|
class SchedulerInterface(ABC):
|
@@ -20,7 +10,7 @@ class SchedulerInterface(ABC):
|
|
20
10
|
self.length = length
|
21
11
|
self.size = size
|
22
12
|
self.queue = queue
|
23
|
-
self.config =
|
13
|
+
self.config = parse_info(config)
|
24
14
|
self.stop = False
|
25
15
|
|
26
16
|
@abstractmethod
|
@@ -35,7 +25,7 @@ class StorerInterface(ABC):
|
|
35
25
|
self.fields = fields
|
36
26
|
self.length = length
|
37
27
|
self.queue = queue
|
38
|
-
self.config =
|
28
|
+
self.config = parse_info(config)
|
39
29
|
# self.redis_db = redis_db
|
40
30
|
|
41
31
|
@abstractmethod
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from .utils import parse_info, struct_start_seeds
|
2
|
+
|
3
|
+
|
4
|
+
class Task:
|
5
|
+
|
6
|
+
def __init__(
|
7
|
+
self,
|
8
|
+
seeds=None,
|
9
|
+
project=None,
|
10
|
+
task_name=None,
|
11
|
+
redis_info=None,
|
12
|
+
storer_info=None,
|
13
|
+
scheduler_info=None,
|
14
|
+
spider_num=None,
|
15
|
+
max_retries=None,
|
16
|
+
storer_queue_length=None,
|
17
|
+
scheduler_queue_length=None,
|
18
|
+
):
|
19
|
+
"""
|
20
|
+
|
21
|
+
:param seeds:
|
22
|
+
:param project:
|
23
|
+
:param task_name:
|
24
|
+
:param redis_info:
|
25
|
+
:param storer_info:
|
26
|
+
:param scheduler_info: dict(DB="", table="", size="", config="")
|
27
|
+
:param spider_num:
|
28
|
+
:param max_retries:
|
29
|
+
:param storer_queue_length:
|
30
|
+
:param scheduler_queue_length:
|
31
|
+
"""
|
32
|
+
self.seeds = struct_start_seeds(seeds)
|
33
|
+
self.project = project or "test"
|
34
|
+
self.task_name = task_name or "spider"
|
35
|
+
|
36
|
+
self.redis_info = parse_info(redis_info)
|
37
|
+
self.storer_info = parse_info(storer_info)
|
38
|
+
self.scheduler_info = parse_info(scheduler_info)
|
39
|
+
|
40
|
+
self.spider_num = spider_num or 1
|
41
|
+
self.max_retries = max_retries or 5
|
42
|
+
self.storer_queue_length = storer_queue_length or 100
|
43
|
+
self.scheduler_queue_length = scheduler_queue_length or 100
|
44
|
+
|
@@ -0,0 +1,85 @@
|
|
1
|
+
import json
|
2
|
+
import sys
|
3
|
+
from typing import Iterable
|
4
|
+
|
5
|
+
# from cobweb import Seed
|
6
|
+
|
7
|
+
|
8
|
+
def struct_table_name(table_name):
|
9
|
+
return table_name.replace(".", "__p__").replace(":", "__c__")
|
10
|
+
|
11
|
+
|
12
|
+
def restore_table_name(table_name):
|
13
|
+
return table_name.replace("__p__", ".").replace("__c__", ":")
|
14
|
+
|
15
|
+
|
16
|
+
def struct_queue_name(db_name, table_name):
|
17
|
+
return sys.intern(f"__{db_name}_{table_name}_queue__")
|
18
|
+
|
19
|
+
|
20
|
+
# class StorerDB:
|
21
|
+
#
|
22
|
+
# @staticmethod
|
23
|
+
# def console(self):
|
24
|
+
# from db.storer.console import Console
|
25
|
+
# table = struct_table_name(table)
|
26
|
+
# return StorerInfo(DB=Console, table=table, length=length, config=None)
|
27
|
+
#
|
28
|
+
# @staticmethod
|
29
|
+
# def textfile(table, length=200):
|
30
|
+
# from db.storer.textfile import Textfile
|
31
|
+
# table = struct_table_name(table)
|
32
|
+
# return StorerInfo(DB=Textfile, table=table, length=length, config=None)
|
33
|
+
#
|
34
|
+
# @staticmethod
|
35
|
+
# def loghub(table, length=200, config=None):
|
36
|
+
# from db.storer.loghub import Loghub
|
37
|
+
# table = struct_table_name(table)
|
38
|
+
# return StorerInfo(DB=Loghub, table=table, length=length, config=config)
|
39
|
+
|
40
|
+
|
41
|
+
def parse_info(info):
|
42
|
+
if not info:
|
43
|
+
return info
|
44
|
+
|
45
|
+
if isinstance(info, dict):
|
46
|
+
return info
|
47
|
+
|
48
|
+
if isinstance(info, str):
|
49
|
+
return json.loads(info)
|
50
|
+
|
51
|
+
if isinstance(info, Iterable):
|
52
|
+
result = list()
|
53
|
+
for ii in info:
|
54
|
+
if isinstance(ii, str):
|
55
|
+
result.append(json.loads(ii))
|
56
|
+
elif isinstance(ii, dict):
|
57
|
+
result.append(ii)
|
58
|
+
else:
|
59
|
+
raise TypeError("must be in [str, dict]")
|
60
|
+
|
61
|
+
return result
|
62
|
+
|
63
|
+
|
64
|
+
def struct_start_seeds(seeds):
|
65
|
+
from .bbb import Seed
|
66
|
+
if not seeds:
|
67
|
+
return None
|
68
|
+
if any(isinstance(seeds, t) for t in (list, tuple)):
|
69
|
+
return [Seed(seed) for seed in seeds]
|
70
|
+
elif any(isinstance(seeds, t) for t in (str, dict)):
|
71
|
+
return Seed(seeds)
|
72
|
+
|
73
|
+
|
74
|
+
# def get_storer_db(db):
|
75
|
+
#
|
76
|
+
# if isinstance(db, str):
|
77
|
+
# model = import_module(f" db.storer.{db.lower()}")
|
78
|
+
#
|
79
|
+
# # if db.lower() in dir(StorerDB):
|
80
|
+
# # return getattr(StorerDB, db)
|
81
|
+
# # else:
|
82
|
+
# # pass
|
83
|
+
|
84
|
+
|
85
|
+
|
@@ -2,23 +2,14 @@ LICENSE
|
|
2
2
|
README.md
|
3
3
|
setup.py
|
4
4
|
cobweb/__init__.py
|
5
|
-
cobweb/
|
6
|
-
cobweb/
|
7
|
-
cobweb/
|
8
|
-
cobweb/
|
9
|
-
cobweb/
|
10
|
-
cobweb/base/interface.py
|
11
|
-
cobweb/base/log.py
|
12
|
-
cobweb/base/queue_tmp.py
|
13
|
-
cobweb/base/request.py
|
14
|
-
cobweb/base/task.py
|
15
|
-
cobweb/base/utils.py
|
5
|
+
cobweb/bbb.py
|
6
|
+
cobweb/interface.py
|
7
|
+
cobweb/log.py
|
8
|
+
cobweb/task.py
|
9
|
+
cobweb/utils.py
|
16
10
|
cobweb/db/__init__.py
|
17
|
-
cobweb/db/
|
18
|
-
cobweb/db/
|
19
|
-
cobweb/db/base/oss_db.py
|
20
|
-
cobweb/db/base/redis_db.py
|
21
|
-
cobweb/db/base/redis_dbv3.py
|
11
|
+
cobweb/db/oss_db.py
|
12
|
+
cobweb/db/redis_db.py
|
22
13
|
cobweb/db/scheduler/__init__.py
|
23
14
|
cobweb/db/scheduler/default.py
|
24
15
|
cobweb/db/scheduler/textfile.py
|
@@ -1,10 +0,0 @@
|
|
1
|
-
from .base.config import StorerInfo, SchedulerInfo, RedisInfo, SchedulerDB, StorerDB
|
2
|
-
from .base.interface import StorerInterface, SchedulerInterface
|
3
|
-
from .distributed.launcher import launcher
|
4
|
-
from .distributed import models
|
5
|
-
from .base.task import Task
|
6
|
-
from .base.bbb import Seed
|
7
|
-
from .db.base import *
|
8
|
-
from .db.storer import *
|
9
|
-
from .db.scheduler import *
|
10
|
-
|