cobweb-launcher 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- cobweb/__init__.py +1 -1
- cobweb/launchers/launcher.py +1 -2
- {cobweb_launcher-1.0.0.dist-info → cobweb_launcher-1.0.2.dist-info}/METADATA +1 -1
- cobweb_launcher-1.0.2.dist-info/RECORD +32 -0
- cobweb/bbb.py +0 -191
- cobweb/config.py +0 -164
- cobweb/db/oss_db.py +0 -128
- cobweb/db/scheduler/__init__.py +0 -1
- cobweb/db/scheduler/default.py +0 -8
- cobweb/db/scheduler/textfile.py +0 -27
- cobweb/db/storer/__init__.py +0 -1
- cobweb/db/storer/console.py +0 -9
- cobweb/db/storer/loghub.py +0 -54
- cobweb/db/storer/redis.py +0 -15
- cobweb/db/storer/textfile.py +0 -15
- cobweb/decorators.py +0 -16
- cobweb/distributed/__init__.py +0 -0
- cobweb/distributed/launcher.py +0 -243
- cobweb/distributed/models.py +0 -143
- cobweb/equip/__init__.py +0 -8
- cobweb/equip/dev/__init__.py +0 -0
- cobweb/equip/dev/launcher.py +0 -202
- cobweb/equip/dev/models.py +0 -156
- cobweb/equip/distributed/__init__.py +0 -0
- cobweb/equip/distributed/launcher.py +0 -219
- cobweb/equip/distributed/models.py +0 -158
- cobweb/equip/download/__init__.py +0 -0
- cobweb/equip/download/launcher.py +0 -203
- cobweb/equip/download/models.py +0 -169
- cobweb/equip/single/__init__.py +0 -0
- cobweb/equip/single/launcher.py +0 -203
- cobweb/equip/single/models.py +0 -173
- cobweb/interface.py +0 -34
- cobweb/log.py +0 -96
- cobweb/new.py +0 -20
- cobweb/single/__init__.py +0 -0
- cobweb/single/launcher.py +0 -231
- cobweb/single/models.py +0 -134
- cobweb/single/nest.py +0 -153
- cobweb/task.py +0 -61
- cobweb/utils.py +0 -90
- cobweb_launcher-1.0.0.dist-info/RECORD +0 -69
- {cobweb_launcher-1.0.0.dist-info → cobweb_launcher-1.0.2.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.0.0.dist-info → cobweb_launcher-1.0.2.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.0.0.dist-info → cobweb_launcher-1.0.2.dist-info}/top_level.txt +0 -0
cobweb/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
from launchers import Launcher, LauncherPro
|
|
1
|
+
from .launchers import Launcher, LauncherPro
|
cobweb/launchers/launcher.py
CHANGED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
cobweb/__init__.py,sha256=IkGcdTU6fEBNyzWowcJSSMdErntFM1kmu4WUp1BgImU,45
|
|
2
|
+
cobweb/constant.py,sha256=Aw2ES_nohVRLTWylZp6WMiUAlgyw4kLbae7LpwdZ5y4,1867
|
|
3
|
+
cobweb/setting.py,sha256=T693DAwLFLs9P6ZEvugP99UzXn-8PLeMEgdxRmL6cb4,1955
|
|
4
|
+
cobweb/base/__init__.py,sha256=diiK5MygQaWjlWNLbW6eUIg-93O6glMGC9WLNM5jyOc,209
|
|
5
|
+
cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
|
|
6
|
+
cobweb/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
|
|
7
|
+
cobweb/base/item.py,sha256=pMriHStzUXtSvIf5Z3KXsP-bCvjlG1gM3z33wWeuoH8,966
|
|
8
|
+
cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
|
|
9
|
+
cobweb/base/request.py,sha256=b08AtUSZjlFLEFIEw5uGS__FjU6QSldL20-UjZD0LbI,2128
|
|
10
|
+
cobweb/base/response.py,sha256=7h9TwCNqRlwM_fvNmid9zOoRfHbKB8ABSU0eaVUJdVo,405
|
|
11
|
+
cobweb/base/seed.py,sha256=XswH16eEd6iwIBpt71E2S_AsV5UVCcOEOBFoP0r5QRo,2900
|
|
12
|
+
cobweb/crawlers/__init__.py,sha256=1sMhQ0-NJxiff3IqF2aMCXkSXcJFzzoCKIayQ5go4aI,71
|
|
13
|
+
cobweb/crawlers/base_crawler.py,sha256=ZIdmlvL4f41yV7T77F9IhBBxRt1FH-LFm0BmIXAvP8I,4881
|
|
14
|
+
cobweb/crawlers/file_crawler.py,sha256=VVOZ38qNAUPyNDspu3P7-zzDtrUtqefYLjOMnb_-JOw,9685
|
|
15
|
+
cobweb/db/__init__.py,sha256=ut0iEyBLjcJL06WNG_5_d4hO5PJWvDrKWMkDOdmgh2M,30
|
|
16
|
+
cobweb/db/redis_db.py,sha256=MahFsAyub9OFG-xuZU6Qq0AfWV1lASFAI34g2FRGpq8,4235
|
|
17
|
+
cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
|
|
18
|
+
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
|
19
|
+
cobweb/launchers/__init__.py,sha256=qwlkEJVri7dvCgi45aX3lqAmQS0HrPicAipDvH75kew,69
|
|
20
|
+
cobweb/launchers/launcher.py,sha256=O6Kkvqk-0kOxJom8YO9zW18e_2eMYrA5RTS9Xy4TW5k,5665
|
|
21
|
+
cobweb/launchers/launcher_pro.py,sha256=GlDpyP1XAY2bX5SuSBn3920D5OKNigQgLnJfu6QOmPw,6760
|
|
22
|
+
cobweb/pipelines/__init__.py,sha256=xanY-Z1d7zRR5JhCdW2htzrAywnKBkigiaUlTFa6of0,80
|
|
23
|
+
cobweb/pipelines/base_pipeline.py,sha256=fYnWf79GmhufXpcnMa3te18SbmnVeYLwxfyo-zLd9CY,1577
|
|
24
|
+
cobweb/pipelines/loghub_pipeline.py,sha256=roQ8gLunvuPc4KOMIATN1nKvjOXrc_RSyzXq8YY9ZBU,1015
|
|
25
|
+
cobweb/utils/__init__.py,sha256=JTE4sBfHnKHhD6w9Auk0MIT7O9BMOamCeryhlHNx3Zg,47
|
|
26
|
+
cobweb/utils/oss.py,sha256=8QlVVhXv3GBk53_616MOjbDxgD7c0ooJ0X28BE9pw-M,3220
|
|
27
|
+
cobweb/utils/tools.py,sha256=8oLxkzwaYcDqKXXuLI3A_lNabyLBr7HSPgTF6x4xbnk,1239
|
|
28
|
+
cobweb_launcher-1.0.2.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
|
29
|
+
cobweb_launcher-1.0.2.dist-info/METADATA,sha256=FoUQ1Np5McQ8EucslrEXX7FFcUuWpZvAnuxfhgLRyCc,1245
|
|
30
|
+
cobweb_launcher-1.0.2.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
|
31
|
+
cobweb_launcher-1.0.2.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
|
32
|
+
cobweb_launcher-1.0.2.dist-info/RECORD,,
|
cobweb/bbb.py
DELETED
|
@@ -1,191 +0,0 @@
|
|
|
1
|
-
# from typing import Iterable
|
|
2
|
-
import json
|
|
3
|
-
import time
|
|
4
|
-
import hashlib
|
|
5
|
-
from .log import log
|
|
6
|
-
from .utils import struct_queue_name
|
|
7
|
-
from collections import deque, namedtuple
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class Queue:
|
|
11
|
-
|
|
12
|
-
def __init__(self):
|
|
13
|
-
self._queue = deque()
|
|
14
|
-
|
|
15
|
-
@property
|
|
16
|
-
def length(self) -> int:
|
|
17
|
-
return len(self._queue)
|
|
18
|
-
#
|
|
19
|
-
# @property
|
|
20
|
-
# def queue_names(self):
|
|
21
|
-
# return tuple(self.__dict__.keys())
|
|
22
|
-
#
|
|
23
|
-
# @property
|
|
24
|
-
# def used_memory(self):
|
|
25
|
-
# return asizeof.asizeof(self)
|
|
26
|
-
|
|
27
|
-
# def create_queue(self, queue_name: str):
|
|
28
|
-
# self.__setattr__(queue_name, deque())
|
|
29
|
-
|
|
30
|
-
# def push_seed(self, seed):
|
|
31
|
-
# self.push("_seed_queue", seed)
|
|
32
|
-
|
|
33
|
-
# def pop_seed(self):
|
|
34
|
-
# return self.pop("_seed_queue")
|
|
35
|
-
|
|
36
|
-
def push(self, data, left: bool = False, direct_insertion: bool = False):
|
|
37
|
-
try:
|
|
38
|
-
if not data:
|
|
39
|
-
return None
|
|
40
|
-
if direct_insertion or isinstance(data, Seed):
|
|
41
|
-
self._queue.appendleft(data) if left else self._queue.append(data)
|
|
42
|
-
elif any(isinstance(data, t) for t in (list, tuple)):
|
|
43
|
-
self._queue.extendleft(data) if left else self._queue.extend(data)
|
|
44
|
-
except AttributeError as e:
|
|
45
|
-
log.exception(e)
|
|
46
|
-
|
|
47
|
-
def pop(self, left: bool = True):
|
|
48
|
-
try:
|
|
49
|
-
return self._queue.popleft() if left else self._queue.pop()
|
|
50
|
-
except IndexError:
|
|
51
|
-
return None
|
|
52
|
-
except AttributeError as e:
|
|
53
|
-
log.exception(e)
|
|
54
|
-
return None
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class Seed:
|
|
58
|
-
|
|
59
|
-
def __init__(
|
|
60
|
-
self,
|
|
61
|
-
seed_info=None,
|
|
62
|
-
priority=300,
|
|
63
|
-
version=0,
|
|
64
|
-
retry=0,
|
|
65
|
-
**kwargs
|
|
66
|
-
):
|
|
67
|
-
if seed_info:
|
|
68
|
-
if any(isinstance(seed_info, t) for t in (str, bytes)):
|
|
69
|
-
try:
|
|
70
|
-
item = json.loads(seed_info)
|
|
71
|
-
for k, v in item.items():
|
|
72
|
-
self.__setattr__(k, v)
|
|
73
|
-
except json.JSONDecodeError:
|
|
74
|
-
self.__setattr__("url", seed_info)
|
|
75
|
-
elif isinstance(seed_info, dict):
|
|
76
|
-
for k, v in seed_info.items():
|
|
77
|
-
self.__setattr__(k, v)
|
|
78
|
-
else:
|
|
79
|
-
raise TypeError(Exception(
|
|
80
|
-
f"seed type error, "
|
|
81
|
-
f"must be str or dict! "
|
|
82
|
-
f"seed_info: {seed_info}"
|
|
83
|
-
))
|
|
84
|
-
for k, v in kwargs.items():
|
|
85
|
-
self.__setattr__(k, v)
|
|
86
|
-
if not getattr(self, "_priority"):
|
|
87
|
-
self._priority = min(max(1, int(priority)), 999)
|
|
88
|
-
if not getattr(self, "_version"):
|
|
89
|
-
self._version = int(version) or int(time.time())
|
|
90
|
-
if not getattr(self, "_retry"):
|
|
91
|
-
self._retry = retry
|
|
92
|
-
if not getattr(self, "sid"):
|
|
93
|
-
self.init_id()
|
|
94
|
-
|
|
95
|
-
def init_id(self):
|
|
96
|
-
item_string = self.format_seed
|
|
97
|
-
seed_id = hashlib.md5(item_string.encode()).hexdigest()
|
|
98
|
-
self.__setattr__("sid", seed_id)
|
|
99
|
-
|
|
100
|
-
def __setitem__(self, key, value):
|
|
101
|
-
setattr(self, key, value)
|
|
102
|
-
|
|
103
|
-
def __getitem__(self, item):
|
|
104
|
-
return getattr(self, item)
|
|
105
|
-
|
|
106
|
-
def __getattr__(self, name):
|
|
107
|
-
return None
|
|
108
|
-
|
|
109
|
-
def __str__(self):
|
|
110
|
-
return json.dumps(self.__dict__, ensure_ascii=False)
|
|
111
|
-
|
|
112
|
-
def __repr__(self):
|
|
113
|
-
chars = [f"{k}={v}" for k, v in self.__dict__.items()]
|
|
114
|
-
return f'{self.__class__.__name__}({", ".join(chars)})'
|
|
115
|
-
|
|
116
|
-
@property
|
|
117
|
-
def dict_seed(self):
|
|
118
|
-
seed = self.__dict__.copy()
|
|
119
|
-
del seed["_priority"]
|
|
120
|
-
del seed["_version"]
|
|
121
|
-
del seed["_retry"]
|
|
122
|
-
return seed
|
|
123
|
-
|
|
124
|
-
@property
|
|
125
|
-
def format_seed(self):
|
|
126
|
-
return json.dumps(self.dict_seed, ensure_ascii=False, separators=(",", ":"))
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
class DBItem:
|
|
130
|
-
|
|
131
|
-
def __init__(self, **kwargs):
|
|
132
|
-
self.__setattr__("_index", 0, True)
|
|
133
|
-
for table in self.__class__.__table__:
|
|
134
|
-
if set(kwargs.keys()) == set(table._fields):
|
|
135
|
-
break
|
|
136
|
-
self._index += 1
|
|
137
|
-
|
|
138
|
-
if self._index > len(self.__class__.__table__):
|
|
139
|
-
raise Exception()
|
|
140
|
-
|
|
141
|
-
table = self.__class__.__table__[self._index]
|
|
142
|
-
self.__setattr__("struct_data", table(**kwargs), True)
|
|
143
|
-
self.__setattr__("db_name", self.__class__.__name__, True)
|
|
144
|
-
self.__setattr__("table_name", self.struct_data.__class__.__name__, True)
|
|
145
|
-
|
|
146
|
-
@classmethod
|
|
147
|
-
def init_item(cls, table_name, fields):
|
|
148
|
-
queue_name = struct_queue_name(cls.__name__, table_name)
|
|
149
|
-
if getattr(cls, queue_name, None) is None:
|
|
150
|
-
setattr(cls, queue_name, Queue())
|
|
151
|
-
|
|
152
|
-
if getattr(cls, "__table__", None) is None:
|
|
153
|
-
cls.__table__ = []
|
|
154
|
-
|
|
155
|
-
table = namedtuple(table_name, fields)
|
|
156
|
-
|
|
157
|
-
if table in getattr(cls, "__table__"):
|
|
158
|
-
raise Exception()
|
|
159
|
-
getattr(cls, "__table__").append(table)
|
|
160
|
-
|
|
161
|
-
def queue(self):
|
|
162
|
-
queue_name = struct_queue_name(self.db_name, self.table_name)
|
|
163
|
-
return getattr(self.__class__, queue_name)
|
|
164
|
-
|
|
165
|
-
def __setitem__(self, key, value):
|
|
166
|
-
self.__setattr__(key, value)
|
|
167
|
-
|
|
168
|
-
def __getitem__(self, item):
|
|
169
|
-
return self.struct_data[item]
|
|
170
|
-
|
|
171
|
-
def __getattr__(self, name):
|
|
172
|
-
return None
|
|
173
|
-
|
|
174
|
-
def __setattr__(self, key, value, init=None):
|
|
175
|
-
if init:
|
|
176
|
-
super().__setattr__(key, value)
|
|
177
|
-
elif not getattr(self, "struct_data"):
|
|
178
|
-
raise Exception(f"no struct_data")
|
|
179
|
-
else:
|
|
180
|
-
self.__setattr__(
|
|
181
|
-
"struct_data",
|
|
182
|
-
self.struct_data._replace(**{key: value}),
|
|
183
|
-
init=True
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
def __str__(self):
|
|
187
|
-
return json.dumps(self.struct_data._asdict(), ensure_ascii=False)
|
|
188
|
-
|
|
189
|
-
def __repr__(self):
|
|
190
|
-
return f'{self.__class__.__name__}:{self.struct_data}'
|
|
191
|
-
|
cobweb/config.py
DELETED
|
@@ -1,164 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from collections import namedtuple
|
|
3
|
-
from base.utils import struct_table_name
|
|
4
|
-
|
|
5
|
-
StorerInfo = namedtuple(
|
|
6
|
-
"StorerInfo",
|
|
7
|
-
"DB, table, fields, length, config"
|
|
8
|
-
)
|
|
9
|
-
SchedulerInfo = namedtuple(
|
|
10
|
-
"SchedulerInfo",
|
|
11
|
-
"DB, table, sql, length, size, config",
|
|
12
|
-
)
|
|
13
|
-
RedisInfo = namedtuple(
|
|
14
|
-
"RedisInfo",
|
|
15
|
-
"host, port, username, password, db",
|
|
16
|
-
defaults=("localhost", 6379, None, None, 0)
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
# redis_info = dict(
|
|
20
|
-
# host="localhost",
|
|
21
|
-
# port=6379,
|
|
22
|
-
# username=None,
|
|
23
|
-
# password=None,
|
|
24
|
-
# db=0
|
|
25
|
-
# )
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class SchedulerDB:
|
|
29
|
-
|
|
30
|
-
@staticmethod
|
|
31
|
-
def default():
|
|
32
|
-
from db.scheduler.default import Default
|
|
33
|
-
return SchedulerInfo(DB=Default, table="", sql="", length=100, size=500000, config=None)
|
|
34
|
-
|
|
35
|
-
@staticmethod
|
|
36
|
-
def textfile(table, sql=None, length=100, size=500000):
|
|
37
|
-
from db.scheduler.textfile import Textfile
|
|
38
|
-
return SchedulerInfo(DB=Textfile, table=table, sql=sql, length=length, size=size, config=None)
|
|
39
|
-
|
|
40
|
-
@staticmethod
|
|
41
|
-
def diy(DB, table, sql=None, length=100, size=500000, config=None):
|
|
42
|
-
from base.interface import SchedulerInterface
|
|
43
|
-
if not isinstance(DB, SchedulerInterface):
|
|
44
|
-
raise Exception("DB must be inherit from SchedulerInterface")
|
|
45
|
-
return SchedulerInfo(DB=DB, table=table, sql=sql, length=length, size=size, config=config)
|
|
46
|
-
|
|
47
|
-
# @staticmethod
|
|
48
|
-
# def info(scheduler_info):
|
|
49
|
-
# if not scheduler_info:
|
|
50
|
-
# return SchedulerDB.default()
|
|
51
|
-
#
|
|
52
|
-
# if isinstance(scheduler_info, SchedulerInfo):
|
|
53
|
-
# return scheduler_info
|
|
54
|
-
#
|
|
55
|
-
# if isinstance(scheduler_info, str):
|
|
56
|
-
# scheduler = json.loads(scheduler_info)
|
|
57
|
-
# if isinstance(scheduler, dict):
|
|
58
|
-
# db_name = scheduler["DB"]
|
|
59
|
-
# if db_name in dir(SchedulerDB):
|
|
60
|
-
# del scheduler["DB"]
|
|
61
|
-
# else:
|
|
62
|
-
# db_name = "diy"
|
|
63
|
-
# func = getattr(SchedulerDB, db_name)
|
|
64
|
-
# return func(**scheduler)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class StorerDB:
|
|
68
|
-
|
|
69
|
-
@staticmethod
|
|
70
|
-
def console(table, fields, length=200):
|
|
71
|
-
from db.storer.console import Console
|
|
72
|
-
table = struct_table_name(table)
|
|
73
|
-
return StorerInfo(DB=Console, table=table, fields=fields, length=length, config=None)
|
|
74
|
-
|
|
75
|
-
@staticmethod
|
|
76
|
-
def textfile(table, fields, length=200):
|
|
77
|
-
from db.storer.textfile import Textfile
|
|
78
|
-
table = struct_table_name(table)
|
|
79
|
-
return StorerInfo(DB=Textfile, table=table, fields=fields, length=length, config=None)
|
|
80
|
-
|
|
81
|
-
@staticmethod
|
|
82
|
-
def loghub(table, fields, length=200, config=None):
|
|
83
|
-
from db.storer.loghub import Loghub
|
|
84
|
-
table = struct_table_name(table)
|
|
85
|
-
return StorerInfo(DB=Loghub, table=table, fields=fields, length=length, config=config)
|
|
86
|
-
|
|
87
|
-
@staticmethod
|
|
88
|
-
def diy(DB, table, fields, length=200, config=None):
|
|
89
|
-
from base.interface import StorerInterface
|
|
90
|
-
if not isinstance(DB, StorerInterface):
|
|
91
|
-
raise Exception("DB must be inherit from StorerInterface")
|
|
92
|
-
table = struct_table_name(table)
|
|
93
|
-
return StorerInfo(DB=DB, table=table, fields=fields, length=length, config=config)
|
|
94
|
-
|
|
95
|
-
# @staticmethod
|
|
96
|
-
# def info(storer_info):
|
|
97
|
-
# if not storer_info:
|
|
98
|
-
# return None
|
|
99
|
-
#
|
|
100
|
-
# if isinstance(storer_info, str):
|
|
101
|
-
# storer_info = json.loads(storer_info)
|
|
102
|
-
#
|
|
103
|
-
# if any(isinstance(storer_info, t) for t in (dict, StorerInfo)):
|
|
104
|
-
# storer_info = [storer_info]
|
|
105
|
-
#
|
|
106
|
-
# if not isinstance(storer_info, list):
|
|
107
|
-
# raise Exception("StorerDB.info storer_info")
|
|
108
|
-
#
|
|
109
|
-
# storer_info_list = []
|
|
110
|
-
# for storer in storer_info:
|
|
111
|
-
# if isinstance(storer, StorerInfo):
|
|
112
|
-
# storer_info_list.append(storer)
|
|
113
|
-
# else:
|
|
114
|
-
# db_name = storer["DB"]
|
|
115
|
-
# if db_name in dir(StorerDB):
|
|
116
|
-
# del storer["DB"]
|
|
117
|
-
# else:
|
|
118
|
-
# db_name = "diy"
|
|
119
|
-
# func = getattr(StorerDB, db_name)
|
|
120
|
-
# storer_info_list.append(func(**storer))
|
|
121
|
-
# return storer_info_list
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def deal(config, tag):
|
|
126
|
-
if isinstance(config, dict):
|
|
127
|
-
if tag == 0:
|
|
128
|
-
return RedisInfo(**config)
|
|
129
|
-
elif tag == 1:
|
|
130
|
-
db_name = config["DB"]
|
|
131
|
-
if db_name in dir(SchedulerDB):
|
|
132
|
-
del config["DB"]
|
|
133
|
-
else:
|
|
134
|
-
db_name = "diy"
|
|
135
|
-
func = getattr(SchedulerDB, db_name)
|
|
136
|
-
return func(**config)
|
|
137
|
-
elif tag == 2:
|
|
138
|
-
db_name = config["DB"]
|
|
139
|
-
if db_name in dir(StorerDB):
|
|
140
|
-
del config["DB"]
|
|
141
|
-
else:
|
|
142
|
-
db_name = "diy"
|
|
143
|
-
func = getattr(StorerDB, db_name)
|
|
144
|
-
return func(**config)
|
|
145
|
-
raise ValueError("tag must be in [0, 1, 2]")
|
|
146
|
-
elif any(isinstance(config, t) for t in (StorerInfo, SchedulerInfo, RedisInfo)):
|
|
147
|
-
return config
|
|
148
|
-
raise TypeError("config must be in [StorerInfo, SchedulerInfo, RedisInfo]")
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def info(configs, tag = 0):
|
|
152
|
-
if configs is None:
|
|
153
|
-
return SchedulerDB.default() if tag == 1 else None
|
|
154
|
-
|
|
155
|
-
if isinstance(configs, str):
|
|
156
|
-
configs = json.loads(configs)
|
|
157
|
-
|
|
158
|
-
if tag == 0:
|
|
159
|
-
return deal(configs, tag)
|
|
160
|
-
|
|
161
|
-
if not isinstance(configs, list):
|
|
162
|
-
configs = [configs]
|
|
163
|
-
|
|
164
|
-
return [deal(config, tag) for config in configs]
|
cobweb/db/oss_db.py
DELETED
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
import oss2
|
|
2
|
-
from typing import Union
|
|
3
|
-
from oss2.models import PartInfo
|
|
4
|
-
from requests import Response
|
|
5
|
-
from cobweb import log
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class OssDB:
|
|
9
|
-
|
|
10
|
-
def __init__(
|
|
11
|
-
self,
|
|
12
|
-
bucket_name,
|
|
13
|
-
endpoint,
|
|
14
|
-
access_key,
|
|
15
|
-
secret_key,
|
|
16
|
-
chunk_size=1024 ** 2,
|
|
17
|
-
min_size=1024
|
|
18
|
-
):
|
|
19
|
-
self.endpoint = endpoint
|
|
20
|
-
self.bucket_name = bucket_name
|
|
21
|
-
self.auth = oss2.Auth(
|
|
22
|
-
access_key_id=access_key,
|
|
23
|
-
access_key_secret=secret_key
|
|
24
|
-
)
|
|
25
|
-
self.bucket = oss2.Bucket(
|
|
26
|
-
auth=self.auth,
|
|
27
|
-
endpoint=endpoint,
|
|
28
|
-
bucket_name=bucket_name
|
|
29
|
-
)
|
|
30
|
-
self.chunk_size = chunk_size
|
|
31
|
-
self.min_size = min_size
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
def format_upload_len(length):
|
|
35
|
-
if not length:
|
|
36
|
-
raise ValueError("Length cannot be None or 0")
|
|
37
|
-
|
|
38
|
-
units = ["KB", "MB", "GB", "TB"]
|
|
39
|
-
for i in range(3):
|
|
40
|
-
num = length / (1024 ** (i + 1))
|
|
41
|
-
if num <= 1024:
|
|
42
|
-
return f"{round(num, 2)} {units[i]}"
|
|
43
|
-
|
|
44
|
-
def assemble(self, ready_data, part_data):
|
|
45
|
-
upload_data = None
|
|
46
|
-
ready_data = ready_data + part_data
|
|
47
|
-
if len(ready_data) >= self.chunk_size:
|
|
48
|
-
upload_data = ready_data[:self.chunk_size]
|
|
49
|
-
ready_data = ready_data[self.chunk_size:]
|
|
50
|
-
|
|
51
|
-
return ready_data, upload_data
|
|
52
|
-
|
|
53
|
-
def iter_data(self, data):
|
|
54
|
-
if isinstance(data, Response):
|
|
55
|
-
for part_data in data.iter_content(self.chunk_size):
|
|
56
|
-
yield part_data
|
|
57
|
-
if isinstance(data, bytes):
|
|
58
|
-
for i in range(0, len(data), self.chunk_size):
|
|
59
|
-
yield data[i:i + self.chunk_size]
|
|
60
|
-
|
|
61
|
-
def upload_split(
|
|
62
|
-
self, oss_path: str,
|
|
63
|
-
data: Union[bytes, Response],
|
|
64
|
-
timeout: int = 300,
|
|
65
|
-
):
|
|
66
|
-
parts = []
|
|
67
|
-
status = False
|
|
68
|
-
upload_id = None
|
|
69
|
-
ready_data = b""
|
|
70
|
-
upload_data_len = 0
|
|
71
|
-
headers = {"Expires": str(timeout * 1000)}
|
|
72
|
-
try:
|
|
73
|
-
upload_id = self.bucket.init_multipart_upload(oss_path).upload_id
|
|
74
|
-
for part_data in self.iter_data(data):
|
|
75
|
-
upload_data_len += len(part_data)
|
|
76
|
-
ready_data, upload_data = self.assemble(ready_data, part_data)
|
|
77
|
-
if upload_data:
|
|
78
|
-
part_index = len(parts) + 1
|
|
79
|
-
upload_info = self.bucket.upload_part(
|
|
80
|
-
oss_path, upload_id, part_index, upload_data
|
|
81
|
-
)
|
|
82
|
-
parts.append(PartInfo(part_index, upload_info.etag))
|
|
83
|
-
|
|
84
|
-
format_upload = self.format_upload_len(upload_data_len)
|
|
85
|
-
|
|
86
|
-
if parts and ready_data:
|
|
87
|
-
part_index = len(parts) + 1
|
|
88
|
-
upload_info = self.bucket.upload_part(
|
|
89
|
-
oss_path, upload_id, part_index, ready_data
|
|
90
|
-
)
|
|
91
|
-
parts.append(PartInfo(part_index, upload_info.etag))
|
|
92
|
-
self.bucket.complete_multipart_upload(
|
|
93
|
-
oss_path, upload_id, parts
|
|
94
|
-
)
|
|
95
|
-
log.info(
|
|
96
|
-
f"split upload, file path: {oss_path}"
|
|
97
|
-
f", file size: {format_upload}"
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
elif len(ready_data) > self.min_size:
|
|
101
|
-
self.bucket.put_object(oss_path, ready_data, headers)
|
|
102
|
-
log.info(
|
|
103
|
-
f"upload file, file path: {oss_path}"
|
|
104
|
-
f", file size: {format_upload}"
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
# else:
|
|
108
|
-
# log.info(
|
|
109
|
-
# f"file size smaller than min size! "
|
|
110
|
-
# f"file size: {format_upload}"
|
|
111
|
-
# )
|
|
112
|
-
status = True
|
|
113
|
-
except ValueError as e:
|
|
114
|
-
pass
|
|
115
|
-
# log.exception(str(e))
|
|
116
|
-
except oss2.exceptions.RequestError as e:
|
|
117
|
-
self.bucket = oss2.Bucket(
|
|
118
|
-
auth=self.auth,
|
|
119
|
-
endpoint=self.endpoint,
|
|
120
|
-
bucket_name=self.bucket_name
|
|
121
|
-
)
|
|
122
|
-
log.exception("oss timeout! " + str(e))
|
|
123
|
-
except Exception as e:
|
|
124
|
-
self.bucket.abort_multipart_upload(oss_path, upload_id, headers)
|
|
125
|
-
log.exception("upload file exception: " + str(e))
|
|
126
|
-
|
|
127
|
-
return status
|
|
128
|
-
|
cobweb/db/scheduler/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .. import log, Seed, SchedulerInterface as Inf
|
cobweb/db/scheduler/default.py
DELETED
cobweb/db/scheduler/textfile.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from . import Inf, log, Seed
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Textfile(Inf):
|
|
5
|
-
|
|
6
|
-
index = None
|
|
7
|
-
|
|
8
|
-
def schedule(self):
|
|
9
|
-
try:
|
|
10
|
-
seeds = []
|
|
11
|
-
with open(self.table, "r") as fp:
|
|
12
|
-
fp.seek(self.index or 0, 0)
|
|
13
|
-
for _ in range(self.length):
|
|
14
|
-
data = fp.readline().strip()
|
|
15
|
-
if not data:
|
|
16
|
-
log.info("scheduler end!")
|
|
17
|
-
self.stop = True
|
|
18
|
-
break
|
|
19
|
-
seeds.append(Seed(data))
|
|
20
|
-
self.index = fp.tell()
|
|
21
|
-
return seeds
|
|
22
|
-
except FileNotFoundError:
|
|
23
|
-
log.error("task table not found!")
|
|
24
|
-
return None
|
|
25
|
-
except TypeError:
|
|
26
|
-
log.error("task table type error!")
|
|
27
|
-
return None
|
cobweb/db/storer/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .. import log, Seed, StorerInterface as Inf
|
cobweb/db/storer/console.py
DELETED
cobweb/db/storer/loghub.py
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from . import Inf, log
|
|
3
|
-
from aliyun.log import LogClient, LogItem, PutLogsRequest
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class Loghub(Inf):
|
|
7
|
-
|
|
8
|
-
def __init__(self, **kwargs):
|
|
9
|
-
super().__init__(**kwargs)
|
|
10
|
-
self.client = None
|
|
11
|
-
|
|
12
|
-
def init_loghub_clint(self):
|
|
13
|
-
try:
|
|
14
|
-
self.client = LogClient(
|
|
15
|
-
self.config['endpoint'],
|
|
16
|
-
self.config['access_key_id'],
|
|
17
|
-
self.config['access_key']
|
|
18
|
-
)
|
|
19
|
-
except Exception as e:
|
|
20
|
-
self.client = None
|
|
21
|
-
return False
|
|
22
|
-
|
|
23
|
-
def store(self, data_list):
|
|
24
|
-
try:
|
|
25
|
-
if not self.client:
|
|
26
|
-
self.init_loghub_clint()
|
|
27
|
-
|
|
28
|
-
log_items = list()
|
|
29
|
-
for item in data_list:
|
|
30
|
-
temp = item._asdict()
|
|
31
|
-
for key, value in temp.items():
|
|
32
|
-
if isinstance(value, str):
|
|
33
|
-
temp[key] = value
|
|
34
|
-
else:
|
|
35
|
-
temp[key] = json.dumps(value, ensure_ascii=False)
|
|
36
|
-
log_item = LogItem()
|
|
37
|
-
contents = sorted(temp.items()) # dict to tuple
|
|
38
|
-
log_item.set_contents(contents)
|
|
39
|
-
log_items.append(log_item)
|
|
40
|
-
request = PutLogsRequest(
|
|
41
|
-
project=self.config["project"],
|
|
42
|
-
logstore=self.table,
|
|
43
|
-
topic=self.config["topic"],
|
|
44
|
-
source=self.config.get("source"),
|
|
45
|
-
logitems=log_items,
|
|
46
|
-
compress=True
|
|
47
|
-
)
|
|
48
|
-
self.client.put_logs(request=request)
|
|
49
|
-
log.info(f"save data, data length: {len(data_list)}")
|
|
50
|
-
return True
|
|
51
|
-
except Exception as e:
|
|
52
|
-
log.exception(e)
|
|
53
|
-
return False
|
|
54
|
-
|
cobweb/db/storer/redis.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from cobweb import log, StorerInterface
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Redis(StorerInterface):
|
|
5
|
-
|
|
6
|
-
def store(self, data_list):
|
|
7
|
-
try:
|
|
8
|
-
data_str = "\n".join(str(data) for data in data_list)
|
|
9
|
-
with open(self.table, "a") as fp:
|
|
10
|
-
fp.write(data_str)
|
|
11
|
-
log.info(f"save data, data length: {len(data_list)}")
|
|
12
|
-
return True
|
|
13
|
-
except Exception as e:
|
|
14
|
-
return False
|
|
15
|
-
|
cobweb/db/storer/textfile.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from . import Inf, log
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Textfile(Inf):
|
|
5
|
-
|
|
6
|
-
def store(self, data_list):
|
|
7
|
-
try:
|
|
8
|
-
data_str = "\n".join(str(data) for data in data_list)
|
|
9
|
-
with open(self.table, "a") as fp:
|
|
10
|
-
fp.write(data_str)
|
|
11
|
-
log.info(f"save data, data length: {len(data_list)}")
|
|
12
|
-
return True
|
|
13
|
-
except Exception as e:
|
|
14
|
-
return False
|
|
15
|
-
|