nlpertools 1.0.5__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- nlpertools/__init__.py +24 -20
- nlpertools/algo/ac.py +18 -0
- nlpertools/algo/bit_ops.py +28 -0
- nlpertools/algo/kmp.py +94 -55
- nlpertools/algo/num_ops.py +12 -0
- nlpertools/algo/template.py +116 -0
- nlpertools/algo/union.py +13 -0
- nlpertools/data_client.py +387 -257
- nlpertools/data_structure/base_structure.py +109 -13
- nlpertools/dataprocess.py +611 -3
- nlpertools/default_db_config.yml +41 -0
- nlpertools/io/__init__.py +3 -3
- nlpertools/io/dir.py +54 -36
- nlpertools/io/file.py +277 -222
- nlpertools/ml.py +483 -460
- nlpertools/monitor/__init__.py +0 -0
- nlpertools/monitor/gpu.py +18 -0
- nlpertools/monitor/memory.py +24 -0
- nlpertools/movie.py +36 -0
- nlpertools/nlpertools_config.yml +1 -0
- nlpertools/{openApi.py → open_api.py} +65 -65
- nlpertools/other.py +364 -249
- nlpertools/pic.py +288 -0
- nlpertools/plugin.py +43 -43
- nlpertools/reminder.py +98 -87
- nlpertools/utils/__init__.py +3 -3
- nlpertools/utils/lazy.py +727 -0
- nlpertools/utils/log_util.py +20 -0
- nlpertools/utils/package.py +89 -76
- nlpertools/utils/package_v1.py +94 -0
- nlpertools/utils/package_v2.py +117 -0
- nlpertools/utils_for_nlpertools.py +93 -93
- nlpertools/vector_index_demo.py +108 -0
- nlpertools/wrapper.py +161 -96
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
- nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
- nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
- nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
- nlpertools_helper/__init__.py +10 -0
- nlpertools-1.0.5.dist-info/METADATA +0 -85
- nlpertools-1.0.5.dist-info/RECORD +0 -25
- nlpertools-1.0.5.dist-info/top_level.txt +0 -1
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
"""
|
5
|
+
# 该项目暂时没有日志输出
|
6
|
+
import codecs
|
7
|
+
import logging.config
|
8
|
+
|
9
|
+
import nlpertools
|
10
|
+
import yaml
|
11
|
+
|
12
|
+
nlpertools.j_mkdir("logs")
|
13
|
+
|
14
|
+
with codecs.open('log_config.yml', 'r', 'utf-8') as stream:
|
15
|
+
config = yaml.load(stream, Loader=yaml.FullLoader)
|
16
|
+
|
17
|
+
# logging.basicConfig(level=logging.INFO)
|
18
|
+
logging.config.dictConfig(config)
|
19
|
+
logger = logging.getLogger()
|
20
|
+
"""
|
nlpertools/utils/package.py
CHANGED
@@ -1,76 +1,89 @@
|
|
1
|
-
#!/usr/bin/python3.8
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : youshu.Ji
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
import importlib
|
5
|
+
from importlib import import_module
|
6
|
+
|
7
|
+
|
8
|
+
def try_import(name, package):
|
9
|
+
try:
|
10
|
+
return import_module(name, package=package)
|
11
|
+
except:
|
12
|
+
pass
|
13
|
+
# print("import {} failed".format(name))
|
14
|
+
finally:
|
15
|
+
pass
|
16
|
+
|
17
|
+
|
18
|
+
def lazy_import(importer_name, to_import):
|
19
|
+
"""
|
20
|
+
Example from net
|
21
|
+
author: unknown
|
22
|
+
this function is not used
|
23
|
+
"""
|
24
|
+
"""Return the importing module and a callable for lazy importing.
|
25
|
+
|
26
|
+
The module named by importer_name represents the module performing the
|
27
|
+
import to help facilitate resolving relative imports.
|
28
|
+
|
29
|
+
to_import is an iterable of the modules to be potentially imported (absolute
|
30
|
+
or relative). The `as` form of importing is also supported,
|
31
|
+
e.g. `pkg.mod as spam`.
|
32
|
+
|
33
|
+
This function returns a tuple of two items. The first is the importer
|
34
|
+
module for easy reference within itself. The second item is a callable to be
|
35
|
+
set to `__getattr__`.
|
36
|
+
"""
|
37
|
+
module = importlib.import_module(importer_name)
|
38
|
+
import_mapping = {}
|
39
|
+
for name in to_import:
|
40
|
+
importing, _, binding = name.partition(' as ')
|
41
|
+
if not binding:
|
42
|
+
_, _, binding = importing.rpartition('.')
|
43
|
+
import_mapping[binding] = importing
|
44
|
+
|
45
|
+
def __getattr__(name):
|
46
|
+
if name not in import_mapping:
|
47
|
+
message = f'module {importer_name!r} has no attribute {name!r}'
|
48
|
+
raise AttributeError(message)
|
49
|
+
importing = import_mapping[name]
|
50
|
+
# imortlib.import_module() implicitly sets submodules on this module as
|
51
|
+
# appropriate for direct imports.
|
52
|
+
imported = importlib.import_module(importing,
|
53
|
+
module.__spec__.parent)
|
54
|
+
setattr(module, name, imported)
|
55
|
+
return imported
|
56
|
+
|
57
|
+
return module, __getattr__
|
58
|
+
|
59
|
+
|
60
|
+
# jieba = try_import("jieba", None)
|
61
|
+
# sns = try_import("seaborn", None)
|
62
|
+
# torch = try_import("torch", None)
|
63
|
+
# nn = try_import("torch.nn", None)
|
64
|
+
# BertTokenizer = try_import("transformers", "BertTokenizer")
|
65
|
+
# BertForMaskedLM = try_import("transformers", "BertForMaskedLM")
|
66
|
+
# Elasticsearch = try_import("elasticsearch", "Elasticsearch")
|
67
|
+
# pd = try_import("pandas", None)
|
68
|
+
# xgb = try_import("xgboost", None)
|
69
|
+
|
70
|
+
aioredis = try_import("aioredis", None)
|
71
|
+
pymysql = try_import("pymysql", None)
|
72
|
+
zhconv = try_import("zhconv", None)
|
73
|
+
KafkaProducer = try_import("kafka", "KafkaProducer")
|
74
|
+
KafkaConsumer = try_import("kafka", "KafkaConsumer")
|
75
|
+
np = try_import("numpy", None)
|
76
|
+
plt = try_import("matplotlib", "pyplot")
|
77
|
+
WordNetLemmatizer = try_import("nltk.stem", "WordNetLemmatizer")
|
78
|
+
metrics = try_import("sklearn", "metrics")
|
79
|
+
requests = try_import("requests", None)
|
80
|
+
pq = try_import("pyquery", None)
|
81
|
+
CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
82
|
+
precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
83
|
+
tqdm = try_import("tqdm", "tqdm")
|
84
|
+
# TODO 自动导出langid和win32evtlogutil输出有bug
|
85
|
+
langid = try_import("langid", None)
|
86
|
+
win32evtlogutil = try_import("win32evtlogutil", None)
|
87
|
+
TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
88
|
+
yaml = try_import("yaml", None)
|
89
|
+
omegaconf = try_import("omegaconf", None)
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
import importlib
|
5
|
+
from importlib import import_module
|
6
|
+
from importlib.util import LazyLoader
|
7
|
+
from .lazy import lazy_module
|
8
|
+
|
9
|
+
EXCLUDE_LAZYIMPORT = {"torch", "torch.nn", "numpy"}
|
10
|
+
|
11
|
+
|
12
|
+
def try_import(name, package):
|
13
|
+
try:
|
14
|
+
if package:
|
15
|
+
# print("import {} success".format(name))
|
16
|
+
return lazy_module("{}.{}".format(package, name))
|
17
|
+
else:
|
18
|
+
if name in EXCLUDE_LAZYIMPORT:
|
19
|
+
return import_module(name, package=package)
|
20
|
+
return lazy_module(name)
|
21
|
+
# return import_module(name, package=package)
|
22
|
+
except:
|
23
|
+
pass
|
24
|
+
print("import {} failed".format(name))
|
25
|
+
finally:
|
26
|
+
pass
|
27
|
+
|
28
|
+
|
29
|
+
def lazy_import(importer_name, to_import):
|
30
|
+
"""Return the importing module and a callable for lazy importing.
|
31
|
+
|
32
|
+
The module named by importer_name represents the module performing the
|
33
|
+
import to help facilitate resolving relative imports.
|
34
|
+
|
35
|
+
to_import is an iterable of the modules to be potentially imported (absolute
|
36
|
+
or relative). The `as` form of importing is also supported,
|
37
|
+
e.g. `pkg.mod as spam`.
|
38
|
+
|
39
|
+
This function returns a tuple of two items. The first is the importer
|
40
|
+
module for easy reference within itself. The second item is a callable to be
|
41
|
+
set to `__getattr__`.
|
42
|
+
"""
|
43
|
+
module = importlib.import_module(importer_name)
|
44
|
+
import_mapping = {}
|
45
|
+
for name in to_import:
|
46
|
+
importing, _, binding = name.partition(' as ')
|
47
|
+
if not binding:
|
48
|
+
_, _, binding = importing.rpartition('.')
|
49
|
+
import_mapping[binding] = importing
|
50
|
+
|
51
|
+
def __getattr__(name):
|
52
|
+
if name not in import_mapping:
|
53
|
+
message = f'module {importer_name!r} has no attribute {name!r}'
|
54
|
+
raise AttributeError(message)
|
55
|
+
importing = import_mapping[name]
|
56
|
+
# imortlib.import_module() implicitly sets submodules on this module as
|
57
|
+
# appropriate for direct imports.
|
58
|
+
imported = importlib.import_module(importing,
|
59
|
+
module.__spec__.parent)
|
60
|
+
setattr(module, name, imported)
|
61
|
+
return imported
|
62
|
+
|
63
|
+
return module, __getattr__
|
64
|
+
|
65
|
+
|
66
|
+
aioredis = try_import("aioredis", None)
|
67
|
+
happybase = try_import("happybase", None)
|
68
|
+
pd = try_import("pandas", None)
|
69
|
+
pymysql = try_import("pymysql", None)
|
70
|
+
Elasticsearch = try_import("elasticsearch", "Elasticsearch")
|
71
|
+
KafkaProducer = try_import("kafka", "KafkaProducer")
|
72
|
+
MongoClient = try_import("pymongo", "MongoClient")
|
73
|
+
helpers = try_import("elasticsearch", "helpers")
|
74
|
+
KafkaConsumer = try_import("kafka", "KafkaConsumer")
|
75
|
+
np = try_import("numpy", None)
|
76
|
+
sns = try_import("seaborn", None)
|
77
|
+
torch = try_import("torch", None)
|
78
|
+
nn = try_import("torch.nn", None)
|
79
|
+
xgb = try_import("xgboost", None)
|
80
|
+
plt = try_import("matplotlib", "pyplot")
|
81
|
+
WordNetLemmatizer = try_import("nltk.stem", "WordNetLemmatizer")
|
82
|
+
metrics = try_import("sklearn", "metrics")
|
83
|
+
BertTokenizer = try_import("transformers", "BertTokenizer")
|
84
|
+
BertForMaskedLM = try_import("transformers", "BertForMaskedLM")
|
85
|
+
requests = try_import("requests", None)
|
86
|
+
psutil = try_import("psutil", None)
|
87
|
+
pq = try_import("pyquery", None)
|
88
|
+
CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
89
|
+
precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
90
|
+
tqdm = try_import("tqdm", "tqdm")
|
91
|
+
langid = try_import("langid", None)
|
92
|
+
# win32evtlogutil?
|
93
|
+
TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
94
|
+
yaml = try_import("yaml", None)
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# !/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
import importlib
|
5
|
+
from importlib import import_module
|
6
|
+
import os
|
7
|
+
|
8
|
+
|
9
|
+
def try_import(name, package):
|
10
|
+
try:
|
11
|
+
return import_module(name, package=package)
|
12
|
+
except:
|
13
|
+
pass
|
14
|
+
# print("import {} failed".format(name))
|
15
|
+
finally:
|
16
|
+
pass
|
17
|
+
|
18
|
+
|
19
|
+
aioredis = None
|
20
|
+
happybase = None
|
21
|
+
pd = None
|
22
|
+
pymysql = None
|
23
|
+
Elasticsearch = None
|
24
|
+
KafkaProducer = None
|
25
|
+
MongoClient = None
|
26
|
+
helpers = None
|
27
|
+
KafkaConsumer = None
|
28
|
+
np = None
|
29
|
+
sns = None
|
30
|
+
torch = None
|
31
|
+
nn = None
|
32
|
+
xgb = None
|
33
|
+
plt = None
|
34
|
+
WordNetLemmatizer = None
|
35
|
+
metrics = None
|
36
|
+
BertTokenizer = None
|
37
|
+
BertForMaskedLM = None
|
38
|
+
requests = None
|
39
|
+
psutil = None
|
40
|
+
pq = None
|
41
|
+
CountVectorizer = None
|
42
|
+
precision_recall_fscore_support = None
|
43
|
+
tqdm = None
|
44
|
+
langid = None
|
45
|
+
win32evtlogutil = None
|
46
|
+
TfidfTransformer = None
|
47
|
+
yaml = None
|
48
|
+
|
49
|
+
import_dict = {
|
50
|
+
"aioredis": ("aioredis", None),
|
51
|
+
"happybase": ("happybase", None),
|
52
|
+
"pd": ("pandas", None),
|
53
|
+
"pymysql": ("pymysql", None),
|
54
|
+
"Elasticsearch": ("elasticsearch", "Elasticsearch"),
|
55
|
+
"KafkaProducer": ("kafka", "KafkaProducer"),
|
56
|
+
"MongoClient": ("pymongo", "MongoClient"),
|
57
|
+
"helpers": ("elasticsearch", "helpers"),
|
58
|
+
"KafkaConsumer": ("kafka", "KafkaConsumer"),
|
59
|
+
"np": ("numpy", None),
|
60
|
+
"sns": ("seaborn", None),
|
61
|
+
"torch": ("torch", None),
|
62
|
+
"nn": ("torch.nn", None),
|
63
|
+
"xgb": ("xgboost", None),
|
64
|
+
"plt": ("matplotlib", "pyplot"),
|
65
|
+
"WordNetLemmatizer": ("nltk.stem", "WordNetLemmatizer"),
|
66
|
+
"metrics": ("sklearn", "metrics"),
|
67
|
+
"BertTokenizer": ("transformers", "BertTokenizer"),
|
68
|
+
"BertForMaskedLM": ("transformers", "BertForMaskedLM"),
|
69
|
+
"requests": ("requests", None),
|
70
|
+
"psutil": ("psutil", None),
|
71
|
+
"pq": ("pyquery", None),
|
72
|
+
"CountVectorizer": ("sklearn.feature_extraction.text", "CountVectorizer"),
|
73
|
+
"precision_recall_fscore_support": ("sklearn.metrics", "precision_recall_fscore_support"),
|
74
|
+
"tqdm": ("tqdm", "tqdm"),
|
75
|
+
"langid": ("langid", None),
|
76
|
+
"win32evtlogutil": ("win32evtlogutil", None),
|
77
|
+
"TfidfTransformer": ("sklearn.feature_extraction.text", "TfidfTransformer"),
|
78
|
+
"yaml": ("yaml", None)
|
79
|
+
}
|
80
|
+
if "nlpertools_helper" in os.environ.keys():
|
81
|
+
# TODO 该方法未经过测试
|
82
|
+
import_list = os.environ["nlpertools_helper"]
|
83
|
+
|
84
|
+
for k in import_list:
|
85
|
+
name, package = import_dict[k]
|
86
|
+
globals()[k] = try_import(name, package)
|
87
|
+
else:
|
88
|
+
aioredis = try_import("aioredis", None)
|
89
|
+
happybase = try_import("happybase", None)
|
90
|
+
pd = try_import("pandas", None)
|
91
|
+
pymysql = try_import("pymysql", None)
|
92
|
+
Elasticsearch = try_import("elasticsearch", "Elasticsearch")
|
93
|
+
KafkaProducer = try_import("kafka", "KafkaProducer")
|
94
|
+
MongoClient = try_import("pymongo", "MongoClient")
|
95
|
+
helpers = try_import("elasticsearch", "helpers")
|
96
|
+
KafkaConsumer = try_import("kafka", "KafkaConsumer")
|
97
|
+
np = try_import("numpy", None)
|
98
|
+
sns = try_import("seaborn", None)
|
99
|
+
torch = try_import("torch", None)
|
100
|
+
nn = try_import("torch.nn", None)
|
101
|
+
xgb = try_import("xgboost", None)
|
102
|
+
plt = try_import("matplotlib", "pyplot")
|
103
|
+
WordNetLemmatizer = try_import("nltk.stem", "WordNetLemmatizer")
|
104
|
+
metrics = try_import("sklearn", "metrics")
|
105
|
+
BertTokenizer = try_import("transformers", "BertTokenizer")
|
106
|
+
BertForMaskedLM = try_import("transformers", "BertForMaskedLM")
|
107
|
+
requests = try_import("requests", None)
|
108
|
+
psutil = try_import("psutil", None)
|
109
|
+
pq = try_import("pyquery", None)
|
110
|
+
CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
111
|
+
precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
112
|
+
tqdm = try_import("tqdm", "tqdm")
|
113
|
+
# TODO 自动导出langid和win32evtlogutil输出有bug
|
114
|
+
langid = try_import("langid", None)
|
115
|
+
win32evtlogutil = try_import("win32evtlogutil", None)
|
116
|
+
TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
117
|
+
yaml = try_import("yaml", None)
|
@@ -1,93 +1,93 @@
|
|
1
|
-
import os
|
2
|
-
import shutil
|
3
|
-
from importlib import import_module
|
4
|
-
|
5
|
-
from .io.dir import j_mkdir
|
6
|
-
from .io.file import readtxt_list_all_strip, writetxt_w_list
|
7
|
-
|
8
|
-
|
9
|
-
def try_import(name, package):
|
10
|
-
try:
|
11
|
-
return import_module(name, package=package)
|
12
|
-
except:
|
13
|
-
print("import {} failed".format(name))
|
14
|
-
finally:
|
15
|
-
pass
|
16
|
-
|
17
|
-
|
18
|
-
def convert_import_to_try_import(from_path, to_path):
|
19
|
-
j_mkdir(to_path)
|
20
|
-
for root, dirs, files in os.walk(from_path):
|
21
|
-
for sub_dir in dirs:
|
22
|
-
j_mkdir(os.path.join(root.replace(from_path, to_path), sub_dir))
|
23
|
-
for file in files:
|
24
|
-
src = os.path.join(root, file)
|
25
|
-
dst = os.path.join(root.replace(from_path, to_path), file)
|
26
|
-
excluded_file = ["wrapper.py", "kmp.py", "__init__.py"]
|
27
|
-
if file.endswith(".py") and file != "utils_for_nlpertools.py" and file not in excluded_file:
|
28
|
-
raw_code = readtxt_list_all_strip(src)
|
29
|
-
start_idx, end_idx = 0, 0
|
30
|
-
|
31
|
-
for idx, each_line in enumerate(raw_code[:30]):
|
32
|
-
each_line = each_line.lstrip("# ")
|
33
|
-
if start_idx == 0 and (each_line.startswith("from") or each_line.startswith("import")):
|
34
|
-
try:
|
35
|
-
exec(each_line)
|
36
|
-
except:
|
37
|
-
start_idx = idx
|
38
|
-
if start_idx != 0 and not each_line:
|
39
|
-
end_idx = idx
|
40
|
-
break
|
41
|
-
# print(file, start_idx, end_idx)
|
42
|
-
if start_idx != 0 and end_idx != 0:
|
43
|
-
new_code = raw_code[:start_idx] + convert_import_string_to_import_list(
|
44
|
-
"\n".join(raw_code[start_idx:end_idx])) + raw_code[end_idx:]
|
45
|
-
else:
|
46
|
-
new_code = raw_code
|
47
|
-
writetxt_w_list(new_code, dst)
|
48
|
-
else:
|
49
|
-
shutil.copy(src=src, dst=dst)
|
50
|
-
print("convert over")
|
51
|
-
|
52
|
-
|
53
|
-
def get_import_info(text):
|
54
|
-
pass
|
55
|
-
|
56
|
-
|
57
|
-
def convert_import_string_to_import_list(text):
|
58
|
-
"""
|
59
|
-
该方法将 import 转变为 try import
|
60
|
-
"""
|
61
|
-
models_to_import = []
|
62
|
-
import_list = text.split("\n")
|
63
|
-
for each in import_list:
|
64
|
-
print(each)
|
65
|
-
name, package, as_name = None, None, None
|
66
|
-
elements = each.split(" ")
|
67
|
-
for pre, cur in zip(elements, elements[1:]):
|
68
|
-
if cur.endswith(","):
|
69
|
-
cur = cur.rstrip(",")
|
70
|
-
# 为了实现from import 和 import统一,首先把package和name的含义反过来,后面再掉换
|
71
|
-
if pre == "import":
|
72
|
-
package = cur
|
73
|
-
if pre == "from":
|
74
|
-
name = cur
|
75
|
-
if pre == "as":
|
76
|
-
as_name = cur
|
77
|
-
if pre[-1] == ",":
|
78
|
-
# 针对 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
79
|
-
# 将将前面部分和当前的组成新字段
|
80
|
-
prefix = each.split("import")[0]
|
81
|
-
import_list.append("{}import {}".format(prefix, cur))
|
82
|
-
if not as_name:
|
83
|
-
as_name = package.split(".")[-1]
|
84
|
-
if not name:
|
85
|
-
name, package = package, name
|
86
|
-
models_to_import.append((name, package, as_name))
|
87
|
-
# 打印
|
88
|
-
all_import_info = ["", "from utils_for_nlpertools import try_import", ""]
|
89
|
-
for name, package, as_name in models_to_import:
|
90
|
-
import_info = '{} = try_import("{}", {})'.format(as_name, name, '"{}"'.format(package) if package else package)
|
91
|
-
all_import_info.append(import_info)
|
92
|
-
print(import_info)
|
93
|
-
return all_import_info
|
1
|
+
import os
|
2
|
+
import shutil
|
3
|
+
from importlib import import_module
|
4
|
+
|
5
|
+
from .io.dir import j_mkdir
|
6
|
+
from .io.file import readtxt_list_all_strip, writetxt_w_list
|
7
|
+
|
8
|
+
|
9
|
+
def try_import(name, package):
|
10
|
+
try:
|
11
|
+
return import_module(name, package=package)
|
12
|
+
except:
|
13
|
+
print("import {} failed".format(name))
|
14
|
+
finally:
|
15
|
+
pass
|
16
|
+
|
17
|
+
|
18
|
+
def convert_import_to_try_import(from_path, to_path):
|
19
|
+
j_mkdir(to_path)
|
20
|
+
for root, dirs, files in os.walk(from_path):
|
21
|
+
for sub_dir in dirs:
|
22
|
+
j_mkdir(os.path.join(root.replace(from_path, to_path), sub_dir))
|
23
|
+
for file in files:
|
24
|
+
src = os.path.join(root, file)
|
25
|
+
dst = os.path.join(root.replace(from_path, to_path), file)
|
26
|
+
excluded_file = ["wrapper.py", "kmp.py", "__init__.py"]
|
27
|
+
if file.endswith(".py") and file != "utils_for_nlpertools.py" and file not in excluded_file:
|
28
|
+
raw_code = readtxt_list_all_strip(src)
|
29
|
+
start_idx, end_idx = 0, 0
|
30
|
+
|
31
|
+
for idx, each_line in enumerate(raw_code[:30]):
|
32
|
+
each_line = each_line.lstrip("# ")
|
33
|
+
if start_idx == 0 and (each_line.startswith("from") or each_line.startswith("import")):
|
34
|
+
try:
|
35
|
+
exec(each_line)
|
36
|
+
except:
|
37
|
+
start_idx = idx
|
38
|
+
if start_idx != 0 and not each_line:
|
39
|
+
end_idx = idx
|
40
|
+
break
|
41
|
+
# print(file, start_idx, end_idx)
|
42
|
+
if start_idx != 0 and end_idx != 0:
|
43
|
+
new_code = raw_code[:start_idx] + convert_import_string_to_import_list(
|
44
|
+
"\n".join(raw_code[start_idx:end_idx])) + raw_code[end_idx:]
|
45
|
+
else:
|
46
|
+
new_code = raw_code
|
47
|
+
writetxt_w_list(new_code, dst)
|
48
|
+
else:
|
49
|
+
shutil.copy(src=src, dst=dst)
|
50
|
+
print("convert over")
|
51
|
+
|
52
|
+
|
53
|
+
def get_import_info(text):
|
54
|
+
pass
|
55
|
+
|
56
|
+
|
57
|
+
def convert_import_string_to_import_list(text):
|
58
|
+
"""
|
59
|
+
该方法将 import 转变为 try import
|
60
|
+
"""
|
61
|
+
models_to_import = []
|
62
|
+
import_list = text.split("\n")
|
63
|
+
for each in import_list:
|
64
|
+
print(each)
|
65
|
+
name, package, as_name = None, None, None
|
66
|
+
elements = each.split(" ")
|
67
|
+
for pre, cur in zip(elements, elements[1:]):
|
68
|
+
if cur.endswith(","):
|
69
|
+
cur = cur.rstrip(",")
|
70
|
+
# 为了实现from import 和 import统一,首先把package和name的含义反过来,后面再掉换
|
71
|
+
if pre == "import":
|
72
|
+
package = cur
|
73
|
+
if pre == "from":
|
74
|
+
name = cur
|
75
|
+
if pre == "as":
|
76
|
+
as_name = cur
|
77
|
+
if pre[-1] == ",":
|
78
|
+
# 针对 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
79
|
+
# 将将前面部分和当前的组成新字段
|
80
|
+
prefix = each.split("import")[0]
|
81
|
+
import_list.append("{}import {}".format(prefix, cur))
|
82
|
+
if not as_name:
|
83
|
+
as_name = package.split(".")[-1]
|
84
|
+
if not name:
|
85
|
+
name, package = package, name
|
86
|
+
models_to_import.append((name, package, as_name))
|
87
|
+
# 打印
|
88
|
+
all_import_info = ["", "from utils_for_nlpertools import try_import", ""]
|
89
|
+
for name, package, as_name in models_to_import:
|
90
|
+
import_info = '{} = try_import("{}", {})'.format(as_name, name, '"{}"'.format(package) if package else package)
|
91
|
+
all_import_info.append(import_info)
|
92
|
+
print(import_info)
|
93
|
+
return all_import_info
|