nlpertools 1.0.5__py3-none-any.whl → 1.0.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- nlpertools/__init__.py +23 -20
- nlpertools/algo/ac.py +18 -0
- nlpertools/algo/bit_ops.py +28 -0
- nlpertools/algo/kmp.py +94 -55
- nlpertools/algo/num_ops.py +12 -0
- nlpertools/algo/template.py +116 -0
- nlpertools/algo/union.py +13 -0
- nlpertools/cli.py +87 -0
- nlpertools/data_client.py +426 -257
- nlpertools/data_structure/base_structure.py +109 -13
- nlpertools/dataprocess.py +627 -3
- nlpertools/default_db_config.yml +41 -0
- nlpertools/draw/__init__.py +0 -0
- nlpertools/draw/draw.py +83 -0
- nlpertools/draw/math_func.py +33 -0
- nlpertools/get_2fa.py +0 -0
- nlpertools/io/__init__.py +3 -3
- nlpertools/io/dir.py +86 -36
- nlpertools/io/file.py +283 -222
- nlpertools/ml.py +511 -460
- nlpertools/monitor/__init__.py +0 -0
- nlpertools/monitor/gpu.py +18 -0
- nlpertools/monitor/memory.py +24 -0
- nlpertools/movie.py +36 -0
- nlpertools/nlpertools_config.yml +1 -0
- nlpertools/{openApi.py → open_api.py} +65 -65
- nlpertools/other.py +475 -249
- nlpertools/pic.py +288 -0
- nlpertools/plugin.py +43 -43
- nlpertools/reminder.py +98 -87
- nlpertools/utils/__init__.py +3 -3
- nlpertools/utils/lazy.py +727 -0
- nlpertools/utils/log_util.py +20 -0
- nlpertools/utils/package.py +89 -76
- nlpertools/utils/package_v1.py +94 -0
- nlpertools/utils/package_v2.py +117 -0
- nlpertools/utils_for_nlpertools.py +93 -93
- nlpertools/vector_index_demo.py +108 -0
- nlpertools/wrapper.py +161 -96
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/LICENSE +200 -200
- nlpertools-1.0.8.dist-info/METADATA +132 -0
- nlpertools-1.0.8.dist-info/RECORD +49 -0
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/WHEEL +1 -1
- nlpertools-1.0.8.dist-info/entry_points.txt +2 -0
- nlpertools-1.0.8.dist-info/top_level.txt +2 -0
- nlpertools_helper/__init__.py +10 -0
- nlpertools-1.0.5.dist-info/METADATA +0 -85
- nlpertools-1.0.5.dist-info/RECORD +0 -25
- nlpertools-1.0.5.dist-info/top_level.txt +0 -1
nlpertools/__init__.py
CHANGED
@@ -1,20 +1,23 @@
|
|
1
|
-
#!/usr/bin/python3.8
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : youshu.Ji
|
4
|
-
from .algo.kmp import *
|
5
|
-
from .data_structure.base_structure import *
|
6
|
-
from .
|
7
|
-
from .
|
8
|
-
from .io.
|
9
|
-
from .
|
10
|
-
from .ml import *
|
11
|
-
from .
|
12
|
-
from .other import *
|
13
|
-
from .
|
14
|
-
from .
|
15
|
-
from .
|
16
|
-
from .
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
from .algo.kmp import *
|
5
|
+
from .data_structure.base_structure import *
|
6
|
+
from .draw import *
|
7
|
+
from .dataprocess import *
|
8
|
+
from .io.dir import *
|
9
|
+
from .io.file import *
|
10
|
+
from .ml import *
|
11
|
+
from .open_api import *
|
12
|
+
from .other import *
|
13
|
+
from .pic import *
|
14
|
+
from .plugin import *
|
15
|
+
from .reminder import *
|
16
|
+
from .utils_for_nlpertools import *
|
17
|
+
from .wrapper import *
|
18
|
+
from .monitor import *
|
19
|
+
from .cli import *
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
__version__ = '1.0.8'
|
nlpertools/algo/ac.py
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
from ..io.file import readtxt_list_all_strip
|
5
|
+
|
6
|
+
|
7
|
+
def find_sentence_covered_vocab(vocab, sentences):
|
8
|
+
"""
|
9
|
+
找到词典中
|
10
|
+
此为参照写法,具体用的时候复制出去用避免重复构建
|
11
|
+
"""
|
12
|
+
|
13
|
+
from ahocorasick import Automaton
|
14
|
+
atm = Automaton()
|
15
|
+
for word in vocab:
|
16
|
+
atm.add_word(word, word)
|
17
|
+
atm.make_automaton()
|
18
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
def foo(num):
|
5
|
+
return num & -num
|
6
|
+
|
7
|
+
|
8
|
+
def foo2(num):
|
9
|
+
"""
|
10
|
+
raw: 0 1 2 3 4 5 6 7 8 9
|
11
|
+
res: 0 0 0 2 0 4 4 6 0 8
|
12
|
+
"""
|
13
|
+
return num & (num - 1)
|
14
|
+
|
15
|
+
|
16
|
+
def _lowbit(index: int) -> int:
|
17
|
+
"""
|
18
|
+
raw: 0 1 2 3 4 5 6 7 8 9
|
19
|
+
res: 0 1 2 1 4 1 2 1 8 1
|
20
|
+
"""
|
21
|
+
return index & -index
|
22
|
+
|
23
|
+
if __name__ == '__main__':
|
24
|
+
for i in range(10):
|
25
|
+
print(i, end=" ")
|
26
|
+
print()
|
27
|
+
for i in range(10):
|
28
|
+
print(foo2(i), end=" ")
|
nlpertools/algo/kmp.py
CHANGED
@@ -1,55 +1,94 @@
|
|
1
|
-
#!/usr/bin/python3.8
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : youshu.Ji
|
4
|
-
|
5
|
-
def build(pattern_string):
|
6
|
-
"""
|
7
|
-
构建模式串的PMT
|
8
|
-
[zhihu](https://www.zhihu.com/question/21923021/answer/281346746)
|
9
|
-
|
10
|
-
"""
|
11
|
-
# 构建pattern需要回溯的位置,
|
12
|
-
backtrace_points = [0] * len(pattern_string)
|
13
|
-
main_pointer, pattern_pointer = 0, -1
|
14
|
-
backtrace_points[0] = -1
|
15
|
-
while main_pointer < len(pattern_string) - 1:
|
16
|
-
if pattern_pointer == -1 or pattern_string[pattern_pointer] == pattern_string[main_pointer]:
|
17
|
-
main_pointer += 1
|
18
|
-
pattern_pointer += 1
|
19
|
-
backtrace_points[main_pointer] = pattern_pointer
|
20
|
-
else:
|
21
|
-
pattern_pointer = backtrace_points[pattern_pointer]
|
22
|
-
return backtrace_points
|
23
|
-
|
24
|
-
|
25
|
-
def
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
main_pointer
|
35
|
-
|
36
|
-
|
37
|
-
if
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
if
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
|
5
|
+
def build(pattern_string):
|
6
|
+
"""
|
7
|
+
构建模式串的PMT
|
8
|
+
[zhihu](https://www.zhihu.com/question/21923021/answer/281346746)
|
9
|
+
|
10
|
+
"""
|
11
|
+
# 构建pattern需要回溯的位置,
|
12
|
+
backtrace_points = [0] * len(pattern_string)
|
13
|
+
main_pointer, pattern_pointer = 0, -1
|
14
|
+
backtrace_points[0] = -1
|
15
|
+
while main_pointer < len(pattern_string) - 1:
|
16
|
+
if pattern_pointer == -1 or pattern_string[pattern_pointer] == pattern_string[main_pointer]:
|
17
|
+
main_pointer += 1
|
18
|
+
pattern_pointer += 1
|
19
|
+
backtrace_points[main_pointer] = pattern_pointer
|
20
|
+
else:
|
21
|
+
pattern_pointer = backtrace_points[pattern_pointer]
|
22
|
+
return backtrace_points
|
23
|
+
|
24
|
+
|
25
|
+
def build_2(needle: str):
|
26
|
+
# 这写的比第一种简洁
|
27
|
+
# 查找方法也是自己,唯一就是判断结束条件,不是用-1了
|
28
|
+
m = len(needle)
|
29
|
+
if m == 0:
|
30
|
+
return 0
|
31
|
+
|
32
|
+
pmt = [0] * m
|
33
|
+
pattern_pointer = 0
|
34
|
+
for main_pointer in range(1, m):
|
35
|
+
while pattern_pointer > 0 and needle[main_pointer] != needle[pattern_pointer]:
|
36
|
+
pattern_pointer = pmt[pattern_pointer - 1]
|
37
|
+
if needle[main_pointer] == needle[pattern_pointer]:
|
38
|
+
pattern_pointer += 1
|
39
|
+
pmt[main_pointer] = pattern_pointer
|
40
|
+
return pmt
|
41
|
+
|
42
|
+
|
43
|
+
def find_after_build(main_string, pattern_string):
|
44
|
+
backtracker = build(pattern_string)
|
45
|
+
# print(backtracker)
|
46
|
+
main_pointer, pattern_pointer = -1, -1
|
47
|
+
while main_pointer <= len(main_string) - 1:
|
48
|
+
if pattern_pointer == -1 or pattern_string[pattern_pointer] == main_string[main_pointer]:
|
49
|
+
# 这是返回首次匹配时main的位置
|
50
|
+
if pattern_pointer == len(pattern_string) - 1:
|
51
|
+
return main_pointer - len(pattern_string) + 1
|
52
|
+
pattern_pointer += 1
|
53
|
+
main_pointer += 1
|
54
|
+
else:
|
55
|
+
pattern_pointer = backtracker[pattern_pointer]
|
56
|
+
return -1
|
57
|
+
|
58
|
+
|
59
|
+
def find(main_string, pattern_string):
|
60
|
+
"""
|
61
|
+
模式匹配
|
62
|
+
一边构建字串的回溯点,一边判断模式是否匹配
|
63
|
+
"""
|
64
|
+
if len(main_string) < len(pattern_string):
|
65
|
+
return False
|
66
|
+
main_string = " " + main_string
|
67
|
+
backtrace_points = [0] * (len(main_string) + 1)
|
68
|
+
main_pointer, pattern_pointer = 0, -1
|
69
|
+
backtrace_points[0] = -1
|
70
|
+
while main_pointer < len(main_string):
|
71
|
+
if pattern_pointer == -1 or pattern_string[pattern_pointer] == main_string[main_pointer]:
|
72
|
+
if pattern_pointer == len(pattern_string) - 1:
|
73
|
+
return True
|
74
|
+
main_pointer += 1
|
75
|
+
pattern_pointer += 1
|
76
|
+
backtrace_points[main_pointer] = pattern_pointer
|
77
|
+
else:
|
78
|
+
pattern_pointer = backtrace_points[pattern_pointer]
|
79
|
+
return False
|
80
|
+
|
81
|
+
|
82
|
+
if __name__ == '__main__':
|
83
|
+
test_main_string = "abababc"
|
84
|
+
test_pattern_string = "abababc"
|
85
|
+
|
86
|
+
res = build(test_pattern_string)
|
87
|
+
print(res)
|
88
|
+
res = build_2(test_pattern_string)
|
89
|
+
print(res)
|
90
|
+
# res = find(test_main_string, test_pattern_string)
|
91
|
+
# print(res)
|
92
|
+
#
|
93
|
+
# res = find_after_build(test_main_string, test_pattern_string)
|
94
|
+
# print(res)
|
@@ -0,0 +1,116 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
from collections import defaultdict
|
5
|
+
|
6
|
+
|
7
|
+
# from sortedcontainers import SortedDict, SortedList
|
8
|
+
|
9
|
+
# 树状数组只能维护前缀“操作和”(前缀和,前缀积,前缀最大最小),而线段树可以维护区间操作和。
|
10
|
+
|
11
|
+
# 线段树
|
12
|
+
class SegmentTree:
|
13
|
+
"""
|
14
|
+
https://www.zhihu.com/question/346961479/answer/2274087021
|
15
|
+
性质:线段树的每一个树节点其实都存储了一个「区间(段)的信息」
|
16
|
+
通过add添加
|
17
|
+
"""
|
18
|
+
|
19
|
+
pass
|
20
|
+
|
21
|
+
|
22
|
+
# 树状数组(二进制下标树) 模板
|
23
|
+
class BIT:
|
24
|
+
"""
|
25
|
+
TODO 以前在logseq写过笔记,整理到web上
|
26
|
+
代码来自https://leetcode.cn/problems/number-of-recent-calls/solutions/1472043/by-ac_oier-evqe/下的评论
|
27
|
+
"""
|
28
|
+
|
29
|
+
def __init__(self, n: int):
|
30
|
+
self.size = n
|
31
|
+
self.tree = defaultdict(int)
|
32
|
+
|
33
|
+
@staticmethod
|
34
|
+
def _lowbit(index: int) -> int:
|
35
|
+
# TODO 同样整理到web
|
36
|
+
return index & -index
|
37
|
+
|
38
|
+
def add(self, index: int, delta: int) -> None:
|
39
|
+
"""
|
40
|
+
delta为index位置加的值
|
41
|
+
"""
|
42
|
+
while index <= self.size:
|
43
|
+
self.tree[index] += delta
|
44
|
+
index += self._lowbit(index)
|
45
|
+
|
46
|
+
def query(self, index: int) -> int:
|
47
|
+
if index > self.size:
|
48
|
+
index = self.size
|
49
|
+
res = 0
|
50
|
+
while index > 0:
|
51
|
+
res += self.tree[index]
|
52
|
+
index -= self._lowbit(index)
|
53
|
+
return res
|
54
|
+
|
55
|
+
def sumRange(self, left: int, right: int) -> int:
|
56
|
+
return self.query(right) - self.query(left - 1)
|
57
|
+
|
58
|
+
|
59
|
+
class BITUsageDemo:
|
60
|
+
"""
|
61
|
+
查找区间值的出现次数
|
62
|
+
"""
|
63
|
+
|
64
|
+
def __init__(self):
|
65
|
+
self.bit = BIT(10)
|
66
|
+
|
67
|
+
def add(self, x: int):
|
68
|
+
self.bit.add(x, 1)
|
69
|
+
|
70
|
+
def query(self, x, y):
|
71
|
+
return self.bit.sumRange(x, y)
|
72
|
+
|
73
|
+
|
74
|
+
class Trie:
|
75
|
+
def __init__(self):
|
76
|
+
self.children = [None] * 26
|
77
|
+
self.isEnd = False
|
78
|
+
|
79
|
+
def insert(self, word: str) -> None:
|
80
|
+
node = self
|
81
|
+
for ch in word:
|
82
|
+
ch = ord(ch) - ord("a")
|
83
|
+
if not node.children[ch]:
|
84
|
+
node.children[ch] = Trie()
|
85
|
+
node = node.children[ch]
|
86
|
+
node.isEnd = True
|
87
|
+
|
88
|
+
def search_prefix(self, prefix: str):
|
89
|
+
node = self
|
90
|
+
for ch in prefix:
|
91
|
+
ch = ord(ch) - ord("a")
|
92
|
+
if not node.children[ch]:
|
93
|
+
return None
|
94
|
+
node = node.children[ch]
|
95
|
+
|
96
|
+
return node
|
97
|
+
|
98
|
+
def search(self, word: str) -> bool:
|
99
|
+
node = self.search_prefix(word)
|
100
|
+
return node is not None and node.isEnd
|
101
|
+
|
102
|
+
def starts_with(self, prefix: str) -> bool:
|
103
|
+
return self.search_prefix(prefix) is not None
|
104
|
+
|
105
|
+
|
106
|
+
if __name__ == "__main__":
|
107
|
+
bit_usage_demo = BITUsageDemo()
|
108
|
+
bit_usage_demo.add(1)
|
109
|
+
bit_usage_demo.add(2)
|
110
|
+
res = bit_usage_demo.query(1, 2)
|
111
|
+
print(res)
|
112
|
+
# 前缀树
|
113
|
+
# obj = Trie()
|
114
|
+
# obj.insert(word)
|
115
|
+
# param_2 = obj.search(word)
|
116
|
+
# param_3 = obj.startsWith(prefix)
|
nlpertools/algo/union.py
ADDED
nlpertools/cli.py
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
import argparse
|
2
|
+
import os
|
3
|
+
import uuid
|
4
|
+
import sys
|
5
|
+
|
6
|
+
import pyotp
|
7
|
+
|
8
|
+
"""
|
9
|
+
如何Debug cli.py
|
10
|
+
"""
|
11
|
+
|
12
|
+
|
13
|
+
def git_push():
|
14
|
+
"""
|
15
|
+
针对国内提交github经常失败,自动提交
|
16
|
+
"""
|
17
|
+
num = -1
|
18
|
+
while 1:
|
19
|
+
num += 1
|
20
|
+
print("retry num: {}".format(num))
|
21
|
+
info = os.system("git push --set-upstream origin main")
|
22
|
+
print(str(info))
|
23
|
+
if not str(info).startswith("fatal"):
|
24
|
+
print("scucess")
|
25
|
+
break
|
26
|
+
|
27
|
+
|
28
|
+
def git_pull():
|
29
|
+
"""
|
30
|
+
针对国内提交github经常失败,自动提交
|
31
|
+
"""
|
32
|
+
num = -1
|
33
|
+
while 1:
|
34
|
+
num += 1
|
35
|
+
print("retry num: {}".format(num))
|
36
|
+
info = os.system("git pull")
|
37
|
+
print(str(info))
|
38
|
+
if not str(info).startswith("fatal") and not str(info).startswith("error"):
|
39
|
+
print("scucess")
|
40
|
+
break
|
41
|
+
|
42
|
+
|
43
|
+
def get_mac_address():
|
44
|
+
mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
|
45
|
+
mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
|
46
|
+
print("mac address 不一定准确")
|
47
|
+
print(mac_address)
|
48
|
+
return mac_address
|
49
|
+
|
50
|
+
|
51
|
+
def get_2af_value(key):
|
52
|
+
"""
|
53
|
+
key应该是7位的
|
54
|
+
"""
|
55
|
+
print(key)
|
56
|
+
totp = pyotp.TOTP(key)
|
57
|
+
print(totp.now())
|
58
|
+
|
59
|
+
|
60
|
+
def main():
|
61
|
+
parser = argparse.ArgumentParser(description="CLI tool for git operations and getting MAC address.")
|
62
|
+
parser.add_argument('--gitpush', action='store_true', help='Perform git push operation.')
|
63
|
+
parser.add_argument('--gitpull', action='store_true', help='Perform git push operation.')
|
64
|
+
parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
|
65
|
+
|
66
|
+
parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
|
67
|
+
parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
|
68
|
+
|
69
|
+
args = parser.parse_args()
|
70
|
+
|
71
|
+
if args.gitpush:
|
72
|
+
git_push()
|
73
|
+
elif args.gitpull:
|
74
|
+
git_pull()
|
75
|
+
elif args.mac_address:
|
76
|
+
get_mac_address()
|
77
|
+
elif args.get_2fa:
|
78
|
+
if args.get_2fa_key:
|
79
|
+
get_2af_value(args.get_2fa_key)
|
80
|
+
else:
|
81
|
+
print("Please provide a key as an argument.")
|
82
|
+
else:
|
83
|
+
print("No operation specified. Use --gitpush or --get_mac_address.")
|
84
|
+
|
85
|
+
|
86
|
+
if __name__ == '__main__':
|
87
|
+
main()
|