nlpertools 1.0.4__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- nlpertools/__init__.py +24 -11
- nlpertools/algo/__init__.py +0 -0
- nlpertools/algo/ac.py +18 -0
- nlpertools/algo/bit_ops.py +28 -0
- nlpertools/algo/kmp.py +94 -0
- nlpertools/algo/num_ops.py +12 -0
- nlpertools/algo/template.py +116 -0
- nlpertools/algo/union.py +13 -0
- nlpertools/data_client.py +387 -0
- nlpertools/data_structure/__init__.py +0 -0
- nlpertools/data_structure/base_structure.py +109 -0
- nlpertools/dataprocess.py +611 -3
- nlpertools/default_db_config.yml +41 -0
- nlpertools/io/__init__.py +3 -3
- nlpertools/io/dir.py +54 -47
- nlpertools/io/file.py +277 -205
- nlpertools/ml.py +483 -317
- nlpertools/monitor/__init__.py +0 -0
- nlpertools/monitor/gpu.py +18 -0
- nlpertools/monitor/memory.py +24 -0
- nlpertools/movie.py +36 -0
- nlpertools/nlpertools_config.yml +1 -0
- nlpertools/{openApi.py → open_api.py} +65 -62
- nlpertools/other.py +364 -188
- nlpertools/pic.py +288 -0
- nlpertools/plugin.py +43 -34
- nlpertools/reminder.py +98 -15
- nlpertools/template/__init__.py +0 -0
- nlpertools/utils/__init__.py +3 -0
- nlpertools/utils/lazy.py +727 -0
- nlpertools/utils/log_util.py +20 -0
- nlpertools/utils/package.py +89 -0
- nlpertools/utils/package_v1.py +94 -0
- nlpertools/utils/package_v2.py +117 -0
- nlpertools/utils_for_nlpertools.py +93 -0
- nlpertools/vector_index_demo.py +108 -0
- nlpertools/wrapper.py +161 -0
- {nlpertools-1.0.4.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
- nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
- nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
- {nlpertools-1.0.4.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
- nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
- nlpertools_helper/__init__.py +10 -0
- nlpertools-1.0.4.dist-info/METADATA +0 -42
- nlpertools-1.0.4.dist-info/RECORD +0 -15
- nlpertools-1.0.4.dist-info/top_level.txt +0 -1
nlpertools/__init__.py
CHANGED
@@ -1,11 +1,24 @@
|
|
1
|
-
#!/usr/bin/python3.8
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : youshu.Ji
|
4
|
-
from .
|
5
|
-
from .
|
6
|
-
from .
|
7
|
-
from .
|
8
|
-
from .
|
9
|
-
from .
|
10
|
-
from .
|
11
|
-
from .
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
from .algo.kmp import *
|
5
|
+
from .data_structure.base_structure import *
|
6
|
+
from .dataprocess import *
|
7
|
+
from .io.dir import *
|
8
|
+
from .io.file import *
|
9
|
+
from .ml import *
|
10
|
+
from .open_api import *
|
11
|
+
from .other import *
|
12
|
+
from .pic import *
|
13
|
+
from .plugin import *
|
14
|
+
from .reminder import *
|
15
|
+
from .utils_for_nlpertools import *
|
16
|
+
from .wrapper import *
|
17
|
+
from .monitor import *
|
18
|
+
|
19
|
+
import os
|
20
|
+
|
21
|
+
|
22
|
+
DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__),"default_db_config.yml")
|
23
|
+
|
24
|
+
__version__ = '1.0.5'
|
File without changes
|
nlpertools/algo/ac.py
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
from ..io.file import readtxt_list_all_strip
|
5
|
+
|
6
|
+
|
7
|
+
def find_sentence_covered_vocab(vocab, sentences):
|
8
|
+
"""
|
9
|
+
找到词典中
|
10
|
+
此为参照写法,具体用的时候复制出去用避免重复构建
|
11
|
+
"""
|
12
|
+
|
13
|
+
from ahocorasick import Automaton
|
14
|
+
atm = Automaton()
|
15
|
+
for word in vocab:
|
16
|
+
atm.add_word(word, word)
|
17
|
+
atm.make_automaton()
|
18
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
def foo(num):
|
5
|
+
return num & -num
|
6
|
+
|
7
|
+
|
8
|
+
def foo2(num):
|
9
|
+
"""
|
10
|
+
raw: 0 1 2 3 4 5 6 7 8 9
|
11
|
+
res: 0 0 0 2 0 4 4 6 0 8
|
12
|
+
"""
|
13
|
+
return num & (num - 1)
|
14
|
+
|
15
|
+
|
16
|
+
def _lowbit(index: int) -> int:
|
17
|
+
"""
|
18
|
+
raw: 0 1 2 3 4 5 6 7 8 9
|
19
|
+
res: 0 1 2 1 4 1 2 1 8 1
|
20
|
+
"""
|
21
|
+
return index & -index
|
22
|
+
|
23
|
+
if __name__ == '__main__':
|
24
|
+
for i in range(10):
|
25
|
+
print(i, end=" ")
|
26
|
+
print()
|
27
|
+
for i in range(10):
|
28
|
+
print(foo2(i), end=" ")
|
nlpertools/algo/kmp.py
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
|
5
|
+
def build(pattern_string):
|
6
|
+
"""
|
7
|
+
构建模式串的PMT
|
8
|
+
[zhihu](https://www.zhihu.com/question/21923021/answer/281346746)
|
9
|
+
|
10
|
+
"""
|
11
|
+
# 构建pattern需要回溯的位置,
|
12
|
+
backtrace_points = [0] * len(pattern_string)
|
13
|
+
main_pointer, pattern_pointer = 0, -1
|
14
|
+
backtrace_points[0] = -1
|
15
|
+
while main_pointer < len(pattern_string) - 1:
|
16
|
+
if pattern_pointer == -1 or pattern_string[pattern_pointer] == pattern_string[main_pointer]:
|
17
|
+
main_pointer += 1
|
18
|
+
pattern_pointer += 1
|
19
|
+
backtrace_points[main_pointer] = pattern_pointer
|
20
|
+
else:
|
21
|
+
pattern_pointer = backtrace_points[pattern_pointer]
|
22
|
+
return backtrace_points
|
23
|
+
|
24
|
+
|
25
|
+
def build_2(needle: str):
|
26
|
+
# 这写的比第一种简洁
|
27
|
+
# 查找方法也是自己,唯一就是判断结束条件,不是用-1了
|
28
|
+
m = len(needle)
|
29
|
+
if m == 0:
|
30
|
+
return 0
|
31
|
+
|
32
|
+
pmt = [0] * m
|
33
|
+
pattern_pointer = 0
|
34
|
+
for main_pointer in range(1, m):
|
35
|
+
while pattern_pointer > 0 and needle[main_pointer] != needle[pattern_pointer]:
|
36
|
+
pattern_pointer = pmt[pattern_pointer - 1]
|
37
|
+
if needle[main_pointer] == needle[pattern_pointer]:
|
38
|
+
pattern_pointer += 1
|
39
|
+
pmt[main_pointer] = pattern_pointer
|
40
|
+
return pmt
|
41
|
+
|
42
|
+
|
43
|
+
def find_after_build(main_string, pattern_string):
|
44
|
+
backtracker = build(pattern_string)
|
45
|
+
# print(backtracker)
|
46
|
+
main_pointer, pattern_pointer = -1, -1
|
47
|
+
while main_pointer <= len(main_string) - 1:
|
48
|
+
if pattern_pointer == -1 or pattern_string[pattern_pointer] == main_string[main_pointer]:
|
49
|
+
# 这是返回首次匹配时main的位置
|
50
|
+
if pattern_pointer == len(pattern_string) - 1:
|
51
|
+
return main_pointer - len(pattern_string) + 1
|
52
|
+
pattern_pointer += 1
|
53
|
+
main_pointer += 1
|
54
|
+
else:
|
55
|
+
pattern_pointer = backtracker[pattern_pointer]
|
56
|
+
return -1
|
57
|
+
|
58
|
+
|
59
|
+
def find(main_string, pattern_string):
|
60
|
+
"""
|
61
|
+
模式匹配
|
62
|
+
一边构建字串的回溯点,一边判断模式是否匹配
|
63
|
+
"""
|
64
|
+
if len(main_string) < len(pattern_string):
|
65
|
+
return False
|
66
|
+
main_string = " " + main_string
|
67
|
+
backtrace_points = [0] * (len(main_string) + 1)
|
68
|
+
main_pointer, pattern_pointer = 0, -1
|
69
|
+
backtrace_points[0] = -1
|
70
|
+
while main_pointer < len(main_string):
|
71
|
+
if pattern_pointer == -1 or pattern_string[pattern_pointer] == main_string[main_pointer]:
|
72
|
+
if pattern_pointer == len(pattern_string) - 1:
|
73
|
+
return True
|
74
|
+
main_pointer += 1
|
75
|
+
pattern_pointer += 1
|
76
|
+
backtrace_points[main_pointer] = pattern_pointer
|
77
|
+
else:
|
78
|
+
pattern_pointer = backtrace_points[pattern_pointer]
|
79
|
+
return False
|
80
|
+
|
81
|
+
|
82
|
+
if __name__ == '__main__':
|
83
|
+
test_main_string = "abababc"
|
84
|
+
test_pattern_string = "abababc"
|
85
|
+
|
86
|
+
res = build(test_pattern_string)
|
87
|
+
print(res)
|
88
|
+
res = build_2(test_pattern_string)
|
89
|
+
print(res)
|
90
|
+
# res = find(test_main_string, test_pattern_string)
|
91
|
+
# print(res)
|
92
|
+
#
|
93
|
+
# res = find_after_build(test_main_string, test_pattern_string)
|
94
|
+
# print(res)
|
@@ -0,0 +1,116 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
from collections import defaultdict
|
5
|
+
|
6
|
+
|
7
|
+
# from sortedcontainers import SortedDict, SortedList
|
8
|
+
|
9
|
+
# 树状数组只能维护前缀“操作和”(前缀和,前缀积,前缀最大最小),而线段树可以维护区间操作和。
|
10
|
+
|
11
|
+
# 线段树
|
12
|
+
class SegmentTree:
|
13
|
+
"""
|
14
|
+
https://www.zhihu.com/question/346961479/answer/2274087021
|
15
|
+
性质:线段树的每一个树节点其实都存储了一个「区间(段)的信息」
|
16
|
+
通过add添加
|
17
|
+
"""
|
18
|
+
|
19
|
+
pass
|
20
|
+
|
21
|
+
|
22
|
+
# 树状数组(二进制下标树) 模板
|
23
|
+
class BIT:
|
24
|
+
"""
|
25
|
+
TODO 以前在logseq写过笔记,整理到web上
|
26
|
+
代码来自https://leetcode.cn/problems/number-of-recent-calls/solutions/1472043/by-ac_oier-evqe/下的评论
|
27
|
+
"""
|
28
|
+
|
29
|
+
def __init__(self, n: int):
|
30
|
+
self.size = n
|
31
|
+
self.tree = defaultdict(int)
|
32
|
+
|
33
|
+
@staticmethod
|
34
|
+
def _lowbit(index: int) -> int:
|
35
|
+
# TODO 同样整理到web
|
36
|
+
return index & -index
|
37
|
+
|
38
|
+
def add(self, index: int, delta: int) -> None:
|
39
|
+
"""
|
40
|
+
delta为index位置加的值
|
41
|
+
"""
|
42
|
+
while index <= self.size:
|
43
|
+
self.tree[index] += delta
|
44
|
+
index += self._lowbit(index)
|
45
|
+
|
46
|
+
def query(self, index: int) -> int:
|
47
|
+
if index > self.size:
|
48
|
+
index = self.size
|
49
|
+
res = 0
|
50
|
+
while index > 0:
|
51
|
+
res += self.tree[index]
|
52
|
+
index -= self._lowbit(index)
|
53
|
+
return res
|
54
|
+
|
55
|
+
def sumRange(self, left: int, right: int) -> int:
|
56
|
+
return self.query(right) - self.query(left - 1)
|
57
|
+
|
58
|
+
|
59
|
+
class BITUsageDemo:
|
60
|
+
"""
|
61
|
+
查找区间值的出现次数
|
62
|
+
"""
|
63
|
+
|
64
|
+
def __init__(self):
|
65
|
+
self.bit = BIT(10)
|
66
|
+
|
67
|
+
def add(self, x: int):
|
68
|
+
self.bit.add(x, 1)
|
69
|
+
|
70
|
+
def query(self, x, y):
|
71
|
+
return self.bit.sumRange(x, y)
|
72
|
+
|
73
|
+
|
74
|
+
class Trie:
|
75
|
+
def __init__(self):
|
76
|
+
self.children = [None] * 26
|
77
|
+
self.isEnd = False
|
78
|
+
|
79
|
+
def insert(self, word: str) -> None:
|
80
|
+
node = self
|
81
|
+
for ch in word:
|
82
|
+
ch = ord(ch) - ord("a")
|
83
|
+
if not node.children[ch]:
|
84
|
+
node.children[ch] = Trie()
|
85
|
+
node = node.children[ch]
|
86
|
+
node.isEnd = True
|
87
|
+
|
88
|
+
def search_prefix(self, prefix: str):
|
89
|
+
node = self
|
90
|
+
for ch in prefix:
|
91
|
+
ch = ord(ch) - ord("a")
|
92
|
+
if not node.children[ch]:
|
93
|
+
return None
|
94
|
+
node = node.children[ch]
|
95
|
+
|
96
|
+
return node
|
97
|
+
|
98
|
+
def search(self, word: str) -> bool:
|
99
|
+
node = self.search_prefix(word)
|
100
|
+
return node is not None and node.isEnd
|
101
|
+
|
102
|
+
def starts_with(self, prefix: str) -> bool:
|
103
|
+
return self.search_prefix(prefix) is not None
|
104
|
+
|
105
|
+
|
106
|
+
if __name__ == "__main__":
|
107
|
+
bit_usage_demo = BITUsageDemo()
|
108
|
+
bit_usage_demo.add(1)
|
109
|
+
bit_usage_demo.add(2)
|
110
|
+
res = bit_usage_demo.query(1, 2)
|
111
|
+
print(res)
|
112
|
+
# 前缀树
|
113
|
+
# obj = Trie()
|
114
|
+
# obj.insert(word)
|
115
|
+
# param_2 = obj.search(word)
|
116
|
+
# param_3 = obj.startsWith(prefix)
|
nlpertools/algo/union.py
ADDED