nlpertools 1.0.5__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nlpertools/__init__.py +24 -20
- nlpertools/algo/ac.py +18 -0
- nlpertools/algo/bit_ops.py +28 -0
- nlpertools/algo/kmp.py +94 -55
- nlpertools/algo/num_ops.py +12 -0
- nlpertools/algo/template.py +116 -0
- nlpertools/algo/union.py +13 -0
- nlpertools/data_client.py +387 -257
- nlpertools/data_structure/base_structure.py +109 -13
- nlpertools/dataprocess.py +611 -3
- nlpertools/default_db_config.yml +41 -0
- nlpertools/io/__init__.py +3 -3
- nlpertools/io/dir.py +54 -36
- nlpertools/io/file.py +277 -222
- nlpertools/ml.py +483 -460
- nlpertools/monitor/__init__.py +0 -0
- nlpertools/monitor/gpu.py +18 -0
- nlpertools/monitor/memory.py +24 -0
- nlpertools/movie.py +36 -0
- nlpertools/nlpertools_config.yml +1 -0
- nlpertools/{openApi.py → open_api.py} +65 -65
- nlpertools/other.py +364 -249
- nlpertools/pic.py +288 -0
- nlpertools/plugin.py +43 -43
- nlpertools/reminder.py +98 -87
- nlpertools/utils/__init__.py +3 -3
- nlpertools/utils/lazy.py +727 -0
- nlpertools/utils/log_util.py +20 -0
- nlpertools/utils/package.py +89 -76
- nlpertools/utils/package_v1.py +94 -0
- nlpertools/utils/package_v2.py +117 -0
- nlpertools/utils_for_nlpertools.py +93 -93
- nlpertools/vector_index_demo.py +108 -0
- nlpertools/wrapper.py +161 -96
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
- nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
- nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
- nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
- nlpertools_helper/__init__.py +10 -0
- nlpertools-1.0.5.dist-info/METADATA +0 -85
- nlpertools-1.0.5.dist-info/RECORD +0 -25
- nlpertools-1.0.5.dist-info/top_level.txt +0 -1
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
"""
|
5
|
+
# 该项目暂时没有日志输出
|
6
|
+
import codecs
|
7
|
+
import logging.config
|
8
|
+
|
9
|
+
import nlpertools
|
10
|
+
import yaml
|
11
|
+
|
12
|
+
nlpertools.j_mkdir("logs")
|
13
|
+
|
14
|
+
with codecs.open('log_config.yml', 'r', 'utf-8') as stream:
|
15
|
+
config = yaml.load(stream, Loader=yaml.FullLoader)
|
16
|
+
|
17
|
+
# logging.basicConfig(level=logging.INFO)
|
18
|
+
logging.config.dictConfig(config)
|
19
|
+
logger = logging.getLogger()
|
20
|
+
"""
|
nlpertools/utils/package.py
CHANGED
@@ -1,76 +1,89 @@
|
|
1
|
-
#!/usr/bin/python3.8
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : youshu.Ji
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
import importlib
|
5
|
+
from importlib import import_module
|
6
|
+
|
7
|
+
|
8
|
+
def try_import(name, package):
|
9
|
+
try:
|
10
|
+
return import_module(name, package=package)
|
11
|
+
except:
|
12
|
+
pass
|
13
|
+
# print("import {} failed".format(name))
|
14
|
+
finally:
|
15
|
+
pass
|
16
|
+
|
17
|
+
|
18
|
+
def lazy_import(importer_name, to_import):
|
19
|
+
"""
|
20
|
+
Example from net
|
21
|
+
author: unknown
|
22
|
+
this function is not used
|
23
|
+
"""
|
24
|
+
"""Return the importing module and a callable for lazy importing.
|
25
|
+
|
26
|
+
The module named by importer_name represents the module performing the
|
27
|
+
import to help facilitate resolving relative imports.
|
28
|
+
|
29
|
+
to_import is an iterable of the modules to be potentially imported (absolute
|
30
|
+
or relative). The `as` form of importing is also supported,
|
31
|
+
e.g. `pkg.mod as spam`.
|
32
|
+
|
33
|
+
This function returns a tuple of two items. The first is the importer
|
34
|
+
module for easy reference within itself. The second item is a callable to be
|
35
|
+
set to `__getattr__`.
|
36
|
+
"""
|
37
|
+
module = importlib.import_module(importer_name)
|
38
|
+
import_mapping = {}
|
39
|
+
for name in to_import:
|
40
|
+
importing, _, binding = name.partition(' as ')
|
41
|
+
if not binding:
|
42
|
+
_, _, binding = importing.rpartition('.')
|
43
|
+
import_mapping[binding] = importing
|
44
|
+
|
45
|
+
def __getattr__(name):
|
46
|
+
if name not in import_mapping:
|
47
|
+
message = f'module {importer_name!r} has no attribute {name!r}'
|
48
|
+
raise AttributeError(message)
|
49
|
+
importing = import_mapping[name]
|
50
|
+
# imortlib.import_module() implicitly sets submodules on this module as
|
51
|
+
# appropriate for direct imports.
|
52
|
+
imported = importlib.import_module(importing,
|
53
|
+
module.__spec__.parent)
|
54
|
+
setattr(module, name, imported)
|
55
|
+
return imported
|
56
|
+
|
57
|
+
return module, __getattr__
|
58
|
+
|
59
|
+
|
60
|
+
# jieba = try_import("jieba", None)
|
61
|
+
# sns = try_import("seaborn", None)
|
62
|
+
# torch = try_import("torch", None)
|
63
|
+
# nn = try_import("torch.nn", None)
|
64
|
+
# BertTokenizer = try_import("transformers", "BertTokenizer")
|
65
|
+
# BertForMaskedLM = try_import("transformers", "BertForMaskedLM")
|
66
|
+
# Elasticsearch = try_import("elasticsearch", "Elasticsearch")
|
67
|
+
# pd = try_import("pandas", None)
|
68
|
+
# xgb = try_import("xgboost", None)
|
69
|
+
|
70
|
+
aioredis = try_import("aioredis", None)
|
71
|
+
pymysql = try_import("pymysql", None)
|
72
|
+
zhconv = try_import("zhconv", None)
|
73
|
+
KafkaProducer = try_import("kafka", "KafkaProducer")
|
74
|
+
KafkaConsumer = try_import("kafka", "KafkaConsumer")
|
75
|
+
np = try_import("numpy", None)
|
76
|
+
plt = try_import("matplotlib", "pyplot")
|
77
|
+
WordNetLemmatizer = try_import("nltk.stem", "WordNetLemmatizer")
|
78
|
+
metrics = try_import("sklearn", "metrics")
|
79
|
+
requests = try_import("requests", None)
|
80
|
+
pq = try_import("pyquery", None)
|
81
|
+
CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
82
|
+
precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
83
|
+
tqdm = try_import("tqdm", "tqdm")
|
84
|
+
# TODO 自动导出langid和win32evtlogutil输出有bug
|
85
|
+
langid = try_import("langid", None)
|
86
|
+
win32evtlogutil = try_import("win32evtlogutil", None)
|
87
|
+
TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
88
|
+
yaml = try_import("yaml", None)
|
89
|
+
omegaconf = try_import("omegaconf", None)
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
import importlib
|
5
|
+
from importlib import import_module
|
6
|
+
from importlib.util import LazyLoader
|
7
|
+
from .lazy import lazy_module
|
8
|
+
|
9
|
+
EXCLUDE_LAZYIMPORT = {"torch", "torch.nn", "numpy"}
|
10
|
+
|
11
|
+
|
12
|
+
def try_import(name, package):
|
13
|
+
try:
|
14
|
+
if package:
|
15
|
+
# print("import {} success".format(name))
|
16
|
+
return lazy_module("{}.{}".format(package, name))
|
17
|
+
else:
|
18
|
+
if name in EXCLUDE_LAZYIMPORT:
|
19
|
+
return import_module(name, package=package)
|
20
|
+
return lazy_module(name)
|
21
|
+
# return import_module(name, package=package)
|
22
|
+
except:
|
23
|
+
pass
|
24
|
+
print("import {} failed".format(name))
|
25
|
+
finally:
|
26
|
+
pass
|
27
|
+
|
28
|
+
|
29
|
+
def lazy_import(importer_name, to_import):
|
30
|
+
"""Return the importing module and a callable for lazy importing.
|
31
|
+
|
32
|
+
The module named by importer_name represents the module performing the
|
33
|
+
import to help facilitate resolving relative imports.
|
34
|
+
|
35
|
+
to_import is an iterable of the modules to be potentially imported (absolute
|
36
|
+
or relative). The `as` form of importing is also supported,
|
37
|
+
e.g. `pkg.mod as spam`.
|
38
|
+
|
39
|
+
This function returns a tuple of two items. The first is the importer
|
40
|
+
module for easy reference within itself. The second item is a callable to be
|
41
|
+
set to `__getattr__`.
|
42
|
+
"""
|
43
|
+
module = importlib.import_module(importer_name)
|
44
|
+
import_mapping = {}
|
45
|
+
for name in to_import:
|
46
|
+
importing, _, binding = name.partition(' as ')
|
47
|
+
if not binding:
|
48
|
+
_, _, binding = importing.rpartition('.')
|
49
|
+
import_mapping[binding] = importing
|
50
|
+
|
51
|
+
def __getattr__(name):
|
52
|
+
if name not in import_mapping:
|
53
|
+
message = f'module {importer_name!r} has no attribute {name!r}'
|
54
|
+
raise AttributeError(message)
|
55
|
+
importing = import_mapping[name]
|
56
|
+
# imortlib.import_module() implicitly sets submodules on this module as
|
57
|
+
# appropriate for direct imports.
|
58
|
+
imported = importlib.import_module(importing,
|
59
|
+
module.__spec__.parent)
|
60
|
+
setattr(module, name, imported)
|
61
|
+
return imported
|
62
|
+
|
63
|
+
return module, __getattr__
|
64
|
+
|
65
|
+
|
66
|
+
aioredis = try_import("aioredis", None)
|
67
|
+
happybase = try_import("happybase", None)
|
68
|
+
pd = try_import("pandas", None)
|
69
|
+
pymysql = try_import("pymysql", None)
|
70
|
+
Elasticsearch = try_import("elasticsearch", "Elasticsearch")
|
71
|
+
KafkaProducer = try_import("kafka", "KafkaProducer")
|
72
|
+
MongoClient = try_import("pymongo", "MongoClient")
|
73
|
+
helpers = try_import("elasticsearch", "helpers")
|
74
|
+
KafkaConsumer = try_import("kafka", "KafkaConsumer")
|
75
|
+
np = try_import("numpy", None)
|
76
|
+
sns = try_import("seaborn", None)
|
77
|
+
torch = try_import("torch", None)
|
78
|
+
nn = try_import("torch.nn", None)
|
79
|
+
xgb = try_import("xgboost", None)
|
80
|
+
plt = try_import("matplotlib", "pyplot")
|
81
|
+
WordNetLemmatizer = try_import("nltk.stem", "WordNetLemmatizer")
|
82
|
+
metrics = try_import("sklearn", "metrics")
|
83
|
+
BertTokenizer = try_import("transformers", "BertTokenizer")
|
84
|
+
BertForMaskedLM = try_import("transformers", "BertForMaskedLM")
|
85
|
+
requests = try_import("requests", None)
|
86
|
+
psutil = try_import("psutil", None)
|
87
|
+
pq = try_import("pyquery", None)
|
88
|
+
CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
89
|
+
precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
90
|
+
tqdm = try_import("tqdm", "tqdm")
|
91
|
+
langid = try_import("langid", None)
|
92
|
+
# win32evtlogutil?
|
93
|
+
TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
94
|
+
yaml = try_import("yaml", None)
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# !/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
import importlib
|
5
|
+
from importlib import import_module
|
6
|
+
import os
|
7
|
+
|
8
|
+
|
9
|
+
def try_import(name, package):
|
10
|
+
try:
|
11
|
+
return import_module(name, package=package)
|
12
|
+
except:
|
13
|
+
pass
|
14
|
+
# print("import {} failed".format(name))
|
15
|
+
finally:
|
16
|
+
pass
|
17
|
+
|
18
|
+
|
19
|
+
aioredis = None
|
20
|
+
happybase = None
|
21
|
+
pd = None
|
22
|
+
pymysql = None
|
23
|
+
Elasticsearch = None
|
24
|
+
KafkaProducer = None
|
25
|
+
MongoClient = None
|
26
|
+
helpers = None
|
27
|
+
KafkaConsumer = None
|
28
|
+
np = None
|
29
|
+
sns = None
|
30
|
+
torch = None
|
31
|
+
nn = None
|
32
|
+
xgb = None
|
33
|
+
plt = None
|
34
|
+
WordNetLemmatizer = None
|
35
|
+
metrics = None
|
36
|
+
BertTokenizer = None
|
37
|
+
BertForMaskedLM = None
|
38
|
+
requests = None
|
39
|
+
psutil = None
|
40
|
+
pq = None
|
41
|
+
CountVectorizer = None
|
42
|
+
precision_recall_fscore_support = None
|
43
|
+
tqdm = None
|
44
|
+
langid = None
|
45
|
+
win32evtlogutil = None
|
46
|
+
TfidfTransformer = None
|
47
|
+
yaml = None
|
48
|
+
|
49
|
+
import_dict = {
|
50
|
+
"aioredis": ("aioredis", None),
|
51
|
+
"happybase": ("happybase", None),
|
52
|
+
"pd": ("pandas", None),
|
53
|
+
"pymysql": ("pymysql", None),
|
54
|
+
"Elasticsearch": ("elasticsearch", "Elasticsearch"),
|
55
|
+
"KafkaProducer": ("kafka", "KafkaProducer"),
|
56
|
+
"MongoClient": ("pymongo", "MongoClient"),
|
57
|
+
"helpers": ("elasticsearch", "helpers"),
|
58
|
+
"KafkaConsumer": ("kafka", "KafkaConsumer"),
|
59
|
+
"np": ("numpy", None),
|
60
|
+
"sns": ("seaborn", None),
|
61
|
+
"torch": ("torch", None),
|
62
|
+
"nn": ("torch.nn", None),
|
63
|
+
"xgb": ("xgboost", None),
|
64
|
+
"plt": ("matplotlib", "pyplot"),
|
65
|
+
"WordNetLemmatizer": ("nltk.stem", "WordNetLemmatizer"),
|
66
|
+
"metrics": ("sklearn", "metrics"),
|
67
|
+
"BertTokenizer": ("transformers", "BertTokenizer"),
|
68
|
+
"BertForMaskedLM": ("transformers", "BertForMaskedLM"),
|
69
|
+
"requests": ("requests", None),
|
70
|
+
"psutil": ("psutil", None),
|
71
|
+
"pq": ("pyquery", None),
|
72
|
+
"CountVectorizer": ("sklearn.feature_extraction.text", "CountVectorizer"),
|
73
|
+
"precision_recall_fscore_support": ("sklearn.metrics", "precision_recall_fscore_support"),
|
74
|
+
"tqdm": ("tqdm", "tqdm"),
|
75
|
+
"langid": ("langid", None),
|
76
|
+
"win32evtlogutil": ("win32evtlogutil", None),
|
77
|
+
"TfidfTransformer": ("sklearn.feature_extraction.text", "TfidfTransformer"),
|
78
|
+
"yaml": ("yaml", None)
|
79
|
+
}
|
80
|
+
if "nlpertools_helper" in os.environ.keys():
|
81
|
+
# TODO 该方法未经过测试
|
82
|
+
import_list = os.environ["nlpertools_helper"]
|
83
|
+
|
84
|
+
for k in import_list:
|
85
|
+
name, package = import_dict[k]
|
86
|
+
globals()[k] = try_import(name, package)
|
87
|
+
else:
|
88
|
+
aioredis = try_import("aioredis", None)
|
89
|
+
happybase = try_import("happybase", None)
|
90
|
+
pd = try_import("pandas", None)
|
91
|
+
pymysql = try_import("pymysql", None)
|
92
|
+
Elasticsearch = try_import("elasticsearch", "Elasticsearch")
|
93
|
+
KafkaProducer = try_import("kafka", "KafkaProducer")
|
94
|
+
MongoClient = try_import("pymongo", "MongoClient")
|
95
|
+
helpers = try_import("elasticsearch", "helpers")
|
96
|
+
KafkaConsumer = try_import("kafka", "KafkaConsumer")
|
97
|
+
np = try_import("numpy", None)
|
98
|
+
sns = try_import("seaborn", None)
|
99
|
+
torch = try_import("torch", None)
|
100
|
+
nn = try_import("torch.nn", None)
|
101
|
+
xgb = try_import("xgboost", None)
|
102
|
+
plt = try_import("matplotlib", "pyplot")
|
103
|
+
WordNetLemmatizer = try_import("nltk.stem", "WordNetLemmatizer")
|
104
|
+
metrics = try_import("sklearn", "metrics")
|
105
|
+
BertTokenizer = try_import("transformers", "BertTokenizer")
|
106
|
+
BertForMaskedLM = try_import("transformers", "BertForMaskedLM")
|
107
|
+
requests = try_import("requests", None)
|
108
|
+
psutil = try_import("psutil", None)
|
109
|
+
pq = try_import("pyquery", None)
|
110
|
+
CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
111
|
+
precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
112
|
+
tqdm = try_import("tqdm", "tqdm")
|
113
|
+
# TODO 自动导出langid和win32evtlogutil输出有bug
|
114
|
+
langid = try_import("langid", None)
|
115
|
+
win32evtlogutil = try_import("win32evtlogutil", None)
|
116
|
+
TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
117
|
+
yaml = try_import("yaml", None)
|
@@ -1,93 +1,93 @@
|
|
1
|
-
import os
|
2
|
-
import shutil
|
3
|
-
from importlib import import_module
|
4
|
-
|
5
|
-
from .io.dir import j_mkdir
|
6
|
-
from .io.file import readtxt_list_all_strip, writetxt_w_list
|
7
|
-
|
8
|
-
|
9
|
-
def try_import(name, package):
|
10
|
-
try:
|
11
|
-
return import_module(name, package=package)
|
12
|
-
except:
|
13
|
-
print("import {} failed".format(name))
|
14
|
-
finally:
|
15
|
-
pass
|
16
|
-
|
17
|
-
|
18
|
-
def convert_import_to_try_import(from_path, to_path):
|
19
|
-
j_mkdir(to_path)
|
20
|
-
for root, dirs, files in os.walk(from_path):
|
21
|
-
for sub_dir in dirs:
|
22
|
-
j_mkdir(os.path.join(root.replace(from_path, to_path), sub_dir))
|
23
|
-
for file in files:
|
24
|
-
src = os.path.join(root, file)
|
25
|
-
dst = os.path.join(root.replace(from_path, to_path), file)
|
26
|
-
excluded_file = ["wrapper.py", "kmp.py", "__init__.py"]
|
27
|
-
if file.endswith(".py") and file != "utils_for_nlpertools.py" and file not in excluded_file:
|
28
|
-
raw_code = readtxt_list_all_strip(src)
|
29
|
-
start_idx, end_idx = 0, 0
|
30
|
-
|
31
|
-
for idx, each_line in enumerate(raw_code[:30]):
|
32
|
-
each_line = each_line.lstrip("# ")
|
33
|
-
if start_idx == 0 and (each_line.startswith("from") or each_line.startswith("import")):
|
34
|
-
try:
|
35
|
-
exec(each_line)
|
36
|
-
except:
|
37
|
-
start_idx = idx
|
38
|
-
if start_idx != 0 and not each_line:
|
39
|
-
end_idx = idx
|
40
|
-
break
|
41
|
-
# print(file, start_idx, end_idx)
|
42
|
-
if start_idx != 0 and end_idx != 0:
|
43
|
-
new_code = raw_code[:start_idx] + convert_import_string_to_import_list(
|
44
|
-
"\n".join(raw_code[start_idx:end_idx])) + raw_code[end_idx:]
|
45
|
-
else:
|
46
|
-
new_code = raw_code
|
47
|
-
writetxt_w_list(new_code, dst)
|
48
|
-
else:
|
49
|
-
shutil.copy(src=src, dst=dst)
|
50
|
-
print("convert over")
|
51
|
-
|
52
|
-
|
53
|
-
def get_import_info(text):
|
54
|
-
pass
|
55
|
-
|
56
|
-
|
57
|
-
def convert_import_string_to_import_list(text):
|
58
|
-
"""
|
59
|
-
该方法将 import 转变为 try import
|
60
|
-
"""
|
61
|
-
models_to_import = []
|
62
|
-
import_list = text.split("\n")
|
63
|
-
for each in import_list:
|
64
|
-
print(each)
|
65
|
-
name, package, as_name = None, None, None
|
66
|
-
elements = each.split(" ")
|
67
|
-
for pre, cur in zip(elements, elements[1:]):
|
68
|
-
if cur.endswith(","):
|
69
|
-
cur = cur.rstrip(",")
|
70
|
-
# 为了实现from import 和 import统一,首先把package和name的含义反过来,后面再掉换
|
71
|
-
if pre == "import":
|
72
|
-
package = cur
|
73
|
-
if pre == "from":
|
74
|
-
name = cur
|
75
|
-
if pre == "as":
|
76
|
-
as_name = cur
|
77
|
-
if pre[-1] == ",":
|
78
|
-
# 针对 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
79
|
-
# 将将前面部分和当前的组成新字段
|
80
|
-
prefix = each.split("import")[0]
|
81
|
-
import_list.append("{}import {}".format(prefix, cur))
|
82
|
-
if not as_name:
|
83
|
-
as_name = package.split(".")[-1]
|
84
|
-
if not name:
|
85
|
-
name, package = package, name
|
86
|
-
models_to_import.append((name, package, as_name))
|
87
|
-
# 打印
|
88
|
-
all_import_info = ["", "from utils_for_nlpertools import try_import", ""]
|
89
|
-
for name, package, as_name in models_to_import:
|
90
|
-
import_info = '{} = try_import("{}", {})'.format(as_name, name, '"{}"'.format(package) if package else package)
|
91
|
-
all_import_info.append(import_info)
|
92
|
-
print(import_info)
|
93
|
-
return all_import_info
|
1
|
+
import os
|
2
|
+
import shutil
|
3
|
+
from importlib import import_module
|
4
|
+
|
5
|
+
from .io.dir import j_mkdir
|
6
|
+
from .io.file import readtxt_list_all_strip, writetxt_w_list
|
7
|
+
|
8
|
+
|
9
|
+
def try_import(name, package):
|
10
|
+
try:
|
11
|
+
return import_module(name, package=package)
|
12
|
+
except:
|
13
|
+
print("import {} failed".format(name))
|
14
|
+
finally:
|
15
|
+
pass
|
16
|
+
|
17
|
+
|
18
|
+
def convert_import_to_try_import(from_path, to_path):
|
19
|
+
j_mkdir(to_path)
|
20
|
+
for root, dirs, files in os.walk(from_path):
|
21
|
+
for sub_dir in dirs:
|
22
|
+
j_mkdir(os.path.join(root.replace(from_path, to_path), sub_dir))
|
23
|
+
for file in files:
|
24
|
+
src = os.path.join(root, file)
|
25
|
+
dst = os.path.join(root.replace(from_path, to_path), file)
|
26
|
+
excluded_file = ["wrapper.py", "kmp.py", "__init__.py"]
|
27
|
+
if file.endswith(".py") and file != "utils_for_nlpertools.py" and file not in excluded_file:
|
28
|
+
raw_code = readtxt_list_all_strip(src)
|
29
|
+
start_idx, end_idx = 0, 0
|
30
|
+
|
31
|
+
for idx, each_line in enumerate(raw_code[:30]):
|
32
|
+
each_line = each_line.lstrip("# ")
|
33
|
+
if start_idx == 0 and (each_line.startswith("from") or each_line.startswith("import")):
|
34
|
+
try:
|
35
|
+
exec(each_line)
|
36
|
+
except:
|
37
|
+
start_idx = idx
|
38
|
+
if start_idx != 0 and not each_line:
|
39
|
+
end_idx = idx
|
40
|
+
break
|
41
|
+
# print(file, start_idx, end_idx)
|
42
|
+
if start_idx != 0 and end_idx != 0:
|
43
|
+
new_code = raw_code[:start_idx] + convert_import_string_to_import_list(
|
44
|
+
"\n".join(raw_code[start_idx:end_idx])) + raw_code[end_idx:]
|
45
|
+
else:
|
46
|
+
new_code = raw_code
|
47
|
+
writetxt_w_list(new_code, dst)
|
48
|
+
else:
|
49
|
+
shutil.copy(src=src, dst=dst)
|
50
|
+
print("convert over")
|
51
|
+
|
52
|
+
|
53
|
+
def get_import_info(text):
|
54
|
+
pass
|
55
|
+
|
56
|
+
|
57
|
+
def convert_import_string_to_import_list(text):
|
58
|
+
"""
|
59
|
+
该方法将 import 转变为 try import
|
60
|
+
"""
|
61
|
+
models_to_import = []
|
62
|
+
import_list = text.split("\n")
|
63
|
+
for each in import_list:
|
64
|
+
print(each)
|
65
|
+
name, package, as_name = None, None, None
|
66
|
+
elements = each.split(" ")
|
67
|
+
for pre, cur in zip(elements, elements[1:]):
|
68
|
+
if cur.endswith(","):
|
69
|
+
cur = cur.rstrip(",")
|
70
|
+
# 为了实现from import 和 import统一,首先把package和name的含义反过来,后面再掉换
|
71
|
+
if pre == "import":
|
72
|
+
package = cur
|
73
|
+
if pre == "from":
|
74
|
+
name = cur
|
75
|
+
if pre == "as":
|
76
|
+
as_name = cur
|
77
|
+
if pre[-1] == ",":
|
78
|
+
# 针对 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
79
|
+
# 将将前面部分和当前的组成新字段
|
80
|
+
prefix = each.split("import")[0]
|
81
|
+
import_list.append("{}import {}".format(prefix, cur))
|
82
|
+
if not as_name:
|
83
|
+
as_name = package.split(".")[-1]
|
84
|
+
if not name:
|
85
|
+
name, package = package, name
|
86
|
+
models_to_import.append((name, package, as_name))
|
87
|
+
# 打印
|
88
|
+
all_import_info = ["", "from utils_for_nlpertools import try_import", ""]
|
89
|
+
for name, package, as_name in models_to_import:
|
90
|
+
import_info = '{} = try_import("{}", {})'.format(as_name, name, '"{}"'.format(package) if package else package)
|
91
|
+
all_import_info.append(import_info)
|
92
|
+
print(import_info)
|
93
|
+
return all_import_info
|