openkosmos-core 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openkosmos_core-0.1.2/PKG-INFO +15 -0
- openkosmos_core-0.1.2/README.md +1 -0
- openkosmos_core-0.1.2/pyproject.toml +24 -0
- openkosmos_core-0.1.2/src/openkosmos_core/__init__.py +4 -0
- openkosmos_core-0.1.2/src/openkosmos_core/arguments.py +64 -0
- openkosmos_core-0.1.2/src/openkosmos_core/auth.py +30 -0
- openkosmos_core-0.1.2/src/openkosmos_core/config.py +36 -0
- openkosmos_core-0.1.2/src/openkosmos_core/exception.py +11 -0
- openkosmos_core-0.1.2/src/openkosmos_core/executor.py +75 -0
- openkosmos_core-0.1.2/src/openkosmos_core/html_extractor.py +57 -0
- openkosmos_core-0.1.2/src/openkosmos_core/py.typed +0 -0
- openkosmos_core-0.1.2/src/openkosmos_core/setting.py +43 -0
- openkosmos_core-0.1.2/src/openkosmos_core/template_loader.py +2 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: openkosmos-core
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: openkosmos-core
|
|
5
|
+
Author: wangtao
|
|
6
|
+
Author-email: wangtao <wangtao@openkosmos.com>
|
|
7
|
+
Requires-Dist: jinja2>=3.1.6
|
|
8
|
+
Requires-Dist: parsel>=1.10.0
|
|
9
|
+
Requires-Dist: pyjwt>=2.10.1
|
|
10
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
11
|
+
Requires-Dist: tqdm>=4.67.1
|
|
12
|
+
Requires-Python: >=3.12
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
openkosmos-core
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
openkosmos-core
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "openkosmos-core"
|
|
3
|
+
version = "0.1.2"
|
|
4
|
+
description = "openkosmos-core"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "wangtao", email = "wangtao@openkosmos.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.12"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"jinja2>=3.1.6",
|
|
12
|
+
"parsel>=1.10.0",
|
|
13
|
+
"pyjwt>=2.10.1",
|
|
14
|
+
"pyyaml>=6.0.3",
|
|
15
|
+
"tqdm>=4.67.1",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[build-system]
|
|
19
|
+
requires = ["uv_build>=0.9.15,<0.10.0"]
|
|
20
|
+
build-backend = "uv_build"
|
|
21
|
+
|
|
22
|
+
[[tool.uv.index]]
|
|
23
|
+
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
|
|
24
|
+
default = true
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
import logging.config
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from multiprocessing import Lock
|
|
7
|
+
|
|
8
|
+
CONTAINER_ENV = os.environ.get("CONTAINER_ENV", "DEFAULT")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Argument:
|
|
12
|
+
__log_init = False
|
|
13
|
+
if CONTAINER_ENV == "WSL_DOCKER":
|
|
14
|
+
__init_lock = None
|
|
15
|
+
else:
|
|
16
|
+
__init_lock = Lock()
|
|
17
|
+
|
|
18
|
+
def __init__(self, instance):
|
|
19
|
+
self.__parser = argparse.ArgumentParser()
|
|
20
|
+
Argument.__current = instance
|
|
21
|
+
|
|
22
|
+
def param(self, name, default=None, help=None, action=None):
|
|
23
|
+
self.__parser.add_argument("-" + name, default=default, help=help, action=action)
|
|
24
|
+
return self
|
|
25
|
+
|
|
26
|
+
def parse(self, args=sys.argv[1:]):
|
|
27
|
+
self.__args = self.__parser.parse_args(args)
|
|
28
|
+
self.__parameters = vars(self.__args)
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def get(self, name):
|
|
32
|
+
return self.__parameters.get(name)
|
|
33
|
+
|
|
34
|
+
def __str__(self):
|
|
35
|
+
return str(self.__parameters)
|
|
36
|
+
|
|
37
|
+
def usage(self):
|
|
38
|
+
self.__parser.print_help()
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def current(cls):
|
|
42
|
+
return Argument.__current
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def set_log_config(cls, config_file, create_logs_dir=True):
|
|
46
|
+
Argument.__config_file = config_file
|
|
47
|
+
Argument.__create_logs_dir = create_logs_dir
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def logger(cls, tag):
|
|
51
|
+
if not Argument.__log_init:
|
|
52
|
+
if Argument.__init_lock is not None:
|
|
53
|
+
Argument.__init_lock.acquire()
|
|
54
|
+
if not Argument.__log_init:
|
|
55
|
+
config_file = os.getenv("KOSMOS_LOG_CONFIG", default="log.conf")
|
|
56
|
+
create_log_dir = os.environ.get("KOSMOS_LOG_DIR", default=".log")
|
|
57
|
+
os.makedirs(create_log_dir, exist_ok=True)
|
|
58
|
+
logging.config.fileConfig(config_file)
|
|
59
|
+
Argument.__log_init = True
|
|
60
|
+
else:
|
|
61
|
+
return logging.getLogger(tag)
|
|
62
|
+
if Argument.__init_lock is not None:
|
|
63
|
+
Argument.__init_lock.release()
|
|
64
|
+
return logging.getLogger(tag)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
import jwt
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AuthToken:
|
|
7
|
+
def __init__(self, secret_key: str):
|
|
8
|
+
self.secret_key = secret_key.strip()
|
|
9
|
+
self.algorithms = "HS256"
|
|
10
|
+
|
|
11
|
+
def generate(self, data: dict, expired_seconds: int = 300) -> str:
|
|
12
|
+
payload = data;
|
|
13
|
+
payload["exp"] = int(time.time()) + expired_seconds
|
|
14
|
+
return jwt.encode(payload, self.secret_key, algorithm=self.algorithms);
|
|
15
|
+
|
|
16
|
+
def retrieve(self, token: str):
|
|
17
|
+
payload = jwt.decode(token.strip(), self.secret_key, algorithms=[self.algorithms])
|
|
18
|
+
del payload["exp"]
|
|
19
|
+
return payload
|
|
20
|
+
|
|
21
|
+
def generate_auth_headers(self, data: dict, expired_seconds: int = 300) -> str:
|
|
22
|
+
payload = data;
|
|
23
|
+
payload["exp"] = int(time.time()) + expired_seconds
|
|
24
|
+
|
|
25
|
+
gateway_headers = {
|
|
26
|
+
"Content-Type": "application/json",
|
|
27
|
+
"Authorization": f"Bearer {self.generate(data, expired_seconds)}"
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return gateway_headers;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import yaml
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Config:
|
|
8
|
+
config = {}
|
|
9
|
+
|
|
10
|
+
def __init__(self, config_file=None):
|
|
11
|
+
self.load(config_file)
|
|
12
|
+
|
|
13
|
+
def load(self, config_file=None):
|
|
14
|
+
c_file = config_file
|
|
15
|
+
if c_file is None:
|
|
16
|
+
c_file = os.getenv("KOSMOS_CONFIG", default="config.yml")
|
|
17
|
+
with open(c_file, "r", encoding="utf-8") as f:
|
|
18
|
+
self.config = yaml.load(f, Loader=yaml.FullLoader)
|
|
19
|
+
return self.config
|
|
20
|
+
|
|
21
|
+
def get(self, name: str | list[str], config_type=None):
|
|
22
|
+
last = None
|
|
23
|
+
if type(name) is str:
|
|
24
|
+
last = self.config.get(name)
|
|
25
|
+
else:
|
|
26
|
+
last = self.config
|
|
27
|
+
for n in name:
|
|
28
|
+
last = last.get(n)
|
|
29
|
+
|
|
30
|
+
if config_type is None:
|
|
31
|
+
return last
|
|
32
|
+
else:
|
|
33
|
+
return config_type(**last)
|
|
34
|
+
|
|
35
|
+
def __str__(self):
|
|
36
|
+
return json.dumps(self.config, indent=3, ensure_ascii=False)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
class FatalException(Exception):
|
|
2
|
+
status_code = 505
|
|
3
|
+
|
|
4
|
+
def __init__(self, message, detail=None):
|
|
5
|
+
Exception.__init__(self)
|
|
6
|
+
self.message = message
|
|
7
|
+
self.detail = detail
|
|
8
|
+
|
|
9
|
+
def to_dict(self):
|
|
10
|
+
value = {"message": self.message, "detail": dict(self.detail or ())}
|
|
11
|
+
return value
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
3
|
+
|
|
4
|
+
from tqdm import tqdm
|
|
5
|
+
|
|
6
|
+
from openkosmos_core.setting import Setting
|
|
7
|
+
|
|
8
|
+
log = Setting.logger("executor")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TaskExecutor:
|
|
12
|
+
|
|
13
|
+
def __init__(self, fn, completed_callback=None, delay=0, meta={}, exception_callback=None):
|
|
14
|
+
self.fn = fn
|
|
15
|
+
self.completed_callback = completed_callback
|
|
16
|
+
if exception_callback is None:
|
|
17
|
+
self.exception_callback = lambda exception: True
|
|
18
|
+
else:
|
|
19
|
+
self.exception_callback = exception_callback
|
|
20
|
+
self.delay = delay
|
|
21
|
+
self.meta = meta
|
|
22
|
+
|
|
23
|
+
def start(self, feed, max_worker=1, wait=True, show_progress=True):
|
|
24
|
+
if wait:
|
|
25
|
+
start = time.time()
|
|
26
|
+
with ThreadPoolExecutor(max_workers=max_worker) as executor:
|
|
27
|
+
futures = {executor.submit(self.execute, param): param for param in feed}
|
|
28
|
+
results = []
|
|
29
|
+
if show_progress:
|
|
30
|
+
with tqdm(total=len(feed), desc="progress") as progress_bar:
|
|
31
|
+
for future in as_completed(futures):
|
|
32
|
+
param = futures[future]
|
|
33
|
+
try:
|
|
34
|
+
result = future.result()
|
|
35
|
+
results.append(result)
|
|
36
|
+
progress_bar.update(1)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
log.exception(e)
|
|
39
|
+
log.error("an exception when execute : {}, {}".format(param, e))
|
|
40
|
+
if self.exception_callback(e):
|
|
41
|
+
progress_bar.close()
|
|
42
|
+
executor.shutdown()
|
|
43
|
+
return results
|
|
44
|
+
else:
|
|
45
|
+
for future in as_completed(futures):
|
|
46
|
+
param = futures[future]
|
|
47
|
+
try:
|
|
48
|
+
result = future.result()
|
|
49
|
+
results.append(result)
|
|
50
|
+
except Exception as e:
|
|
51
|
+
log.exception(e)
|
|
52
|
+
log.error("an exception when execute : {}, {}".format(param, e))
|
|
53
|
+
if self.exception_callback(e):
|
|
54
|
+
executor.shutdown()
|
|
55
|
+
return results
|
|
56
|
+
|
|
57
|
+
log.info("all completed...")
|
|
58
|
+
if self.completed_callback is not None:
|
|
59
|
+
self.completed_callback(results, self.meta)
|
|
60
|
+
else:
|
|
61
|
+
log.info("all done[{:.3f}] with results : {}, {}".format(time.time() - start, results,
|
|
62
|
+
self.meta))
|
|
63
|
+
return results
|
|
64
|
+
else:
|
|
65
|
+
executor = ThreadPoolExecutor(max_workers=max_worker)
|
|
66
|
+
futures = {executor.submit(self.execute, param): param for param in feed}
|
|
67
|
+
return futures
|
|
68
|
+
|
|
69
|
+
def execute(self, param):
|
|
70
|
+
# log.debug("start : {}".format(param))
|
|
71
|
+
if self.delay > 0:
|
|
72
|
+
time.sleep(self.delay)
|
|
73
|
+
result = self.fn(param, self.meta)
|
|
74
|
+
# log.debug("stop : {}".format(param))
|
|
75
|
+
return result
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from types import SimpleNamespace as T
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
from parsel import Selector
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class HtmlExtractor:
|
|
8
|
+
|
|
9
|
+
def __init__(self, template):
|
|
10
|
+
self.__template = template
|
|
11
|
+
|
|
12
|
+
def __extract(self, current, parent_bean, beans, kid):
|
|
13
|
+
node = current.xpath(kid["xpath"])
|
|
14
|
+
name = kid.get("name")
|
|
15
|
+
bean_name = kid.get("bean")
|
|
16
|
+
kids = kid.get("kids")
|
|
17
|
+
p_bean = parent_bean
|
|
18
|
+
|
|
19
|
+
if name is not None:
|
|
20
|
+
if parent_bean is not None:
|
|
21
|
+
parent_bean[name] = node.get()
|
|
22
|
+
elif name is not None:
|
|
23
|
+
beans.get(bean_name)[name] = node.get()
|
|
24
|
+
|
|
25
|
+
if kids is not None:
|
|
26
|
+
if bean_name is not None and name is None:
|
|
27
|
+
for n in node:
|
|
28
|
+
p_bean = dict()
|
|
29
|
+
for k in kid["kids"]:
|
|
30
|
+
self.__extract(n, p_bean, beans, k)
|
|
31
|
+
beans.get(bean_name).append(p_bean)
|
|
32
|
+
else:
|
|
33
|
+
for k in kid["kids"]:
|
|
34
|
+
self.__extract(node, p_bean, beans, k)
|
|
35
|
+
|
|
36
|
+
def __create_extractor(self, content):
|
|
37
|
+
selector = Selector(text=content)
|
|
38
|
+
beans = dict()
|
|
39
|
+
for b in self.__template["beans"]:
|
|
40
|
+
if b.get("type") == "list":
|
|
41
|
+
beans[b["name"]] = []
|
|
42
|
+
else:
|
|
43
|
+
beans[b["name"]] = dict()
|
|
44
|
+
return T(selector=selector, beans=beans)
|
|
45
|
+
|
|
46
|
+
def extract(self, content):
|
|
47
|
+
extractor = self.__create_extractor(content)
|
|
48
|
+
for kid in self.__template["template"]:
|
|
49
|
+
self.__extract(extractor.selector, None, extractor.beans, kid)
|
|
50
|
+
return extractor.beans
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def test(cls, templatefile, contentfile):
|
|
54
|
+
with open(templatefile) as tf:
|
|
55
|
+
template = yaml.load(tf, Loader=yaml.FullLoader)
|
|
56
|
+
with open(contentfile, "r", encoding="utf8") as ff:
|
|
57
|
+
return HtmlExtractor(template).extract(str(ff.read()))
|
|
File without changes
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from openkosmos_core import arguments
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Setting(arguments.Argument):
|
|
5
|
+
def __init__(self, params=None):
|
|
6
|
+
super().__init__(instance=self)
|
|
7
|
+
if params is not None:
|
|
8
|
+
for name, value in params.items():
|
|
9
|
+
self.param(name, default=value.get("default"), help=value.get("help"),
|
|
10
|
+
action=value.get("action"))
|
|
11
|
+
self.parse()
|
|
12
|
+
Setting.logger("setting").info(str(self))
|
|
13
|
+
|
|
14
|
+
def get_param(self, name):
|
|
15
|
+
return self.get(name)
|
|
16
|
+
|
|
17
|
+
def get_str_param(self, name, default_value=None):
|
|
18
|
+
value = self.get(name)
|
|
19
|
+
if value is not None:
|
|
20
|
+
return str(value)
|
|
21
|
+
else:
|
|
22
|
+
return default_value
|
|
23
|
+
|
|
24
|
+
def get_int_param(self, name, default_value=None):
|
|
25
|
+
value = self.get(name)
|
|
26
|
+
if value is not None:
|
|
27
|
+
return int(value)
|
|
28
|
+
else:
|
|
29
|
+
return default_value
|
|
30
|
+
|
|
31
|
+
def get_float_param(self, name, default_value=None):
|
|
32
|
+
value = self.get(name)
|
|
33
|
+
if value is not None:
|
|
34
|
+
return float(value)
|
|
35
|
+
else:
|
|
36
|
+
return default_value
|
|
37
|
+
|
|
38
|
+
def get_bool_param(self, name, default_value=None):
|
|
39
|
+
value = self.get(name)
|
|
40
|
+
if value is not None:
|
|
41
|
+
return bool(value)
|
|
42
|
+
else:
|
|
43
|
+
return default_value
|