nlpertools 1.0.5__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. nlpertools/__init__.py +24 -20
  2. nlpertools/algo/ac.py +18 -0
  3. nlpertools/algo/bit_ops.py +28 -0
  4. nlpertools/algo/kmp.py +94 -55
  5. nlpertools/algo/num_ops.py +12 -0
  6. nlpertools/algo/template.py +116 -0
  7. nlpertools/algo/union.py +13 -0
  8. nlpertools/data_client.py +387 -257
  9. nlpertools/data_structure/base_structure.py +109 -13
  10. nlpertools/dataprocess.py +611 -3
  11. nlpertools/default_db_config.yml +41 -0
  12. nlpertools/io/__init__.py +3 -3
  13. nlpertools/io/dir.py +54 -36
  14. nlpertools/io/file.py +277 -222
  15. nlpertools/ml.py +483 -460
  16. nlpertools/monitor/__init__.py +0 -0
  17. nlpertools/monitor/gpu.py +18 -0
  18. nlpertools/monitor/memory.py +24 -0
  19. nlpertools/movie.py +36 -0
  20. nlpertools/nlpertools_config.yml +1 -0
  21. nlpertools/{openApi.py → open_api.py} +65 -65
  22. nlpertools/other.py +364 -249
  23. nlpertools/pic.py +288 -0
  24. nlpertools/plugin.py +43 -43
  25. nlpertools/reminder.py +98 -87
  26. nlpertools/utils/__init__.py +3 -3
  27. nlpertools/utils/lazy.py +727 -0
  28. nlpertools/utils/log_util.py +20 -0
  29. nlpertools/utils/package.py +89 -76
  30. nlpertools/utils/package_v1.py +94 -0
  31. nlpertools/utils/package_v2.py +117 -0
  32. nlpertools/utils_for_nlpertools.py +93 -93
  33. nlpertools/vector_index_demo.py +108 -0
  34. nlpertools/wrapper.py +161 -96
  35. {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
  36. nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
  37. nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
  38. {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
  39. nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
  40. nlpertools_helper/__init__.py +10 -0
  41. nlpertools-1.0.5.dist-info/METADATA +0 -85
  42. nlpertools-1.0.5.dist-info/RECORD +0 -25
  43. nlpertools-1.0.5.dist-info/top_level.txt +0 -1
nlpertools/__init__.py CHANGED
@@ -1,20 +1,24 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
4
- from .algo.kmp import *
5
- from .data_structure.base_structure import *
6
- from .dataprocess import *
7
- from .io.dir import *
8
- from .io.file import *
9
- from .ml import *
10
- from .ml import *
11
- from .openApi import *
12
- from .other import *
13
- from .plugin import *
14
- from .reminder import *
15
- from .utils_for_nlpertools import *
16
- from .wrapper import *
17
-
18
- DB_CONFIG_FILE = "./default_db_config.yml"
19
-
20
- __version__ = '1.0.5'
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from .algo.kmp import *
5
+ from .data_structure.base_structure import *
6
+ from .dataprocess import *
7
+ from .io.dir import *
8
+ from .io.file import *
9
+ from .ml import *
10
+ from .open_api import *
11
+ from .other import *
12
+ from .pic import *
13
+ from .plugin import *
14
+ from .reminder import *
15
+ from .utils_for_nlpertools import *
16
+ from .wrapper import *
17
+ from .monitor import *
18
+
19
+ import os
20
+
21
+
22
+ DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__),"default_db_config.yml")
23
+
24
+ __version__ = '1.0.5'
nlpertools/algo/ac.py ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from ..io.file import readtxt_list_all_strip
5
+
6
+
7
+ def find_sentence_covered_vocab(vocab, sentences):
8
+ """
9
+ 找到词典中
10
+ 此为参照写法,具体用的时候复制出去用避免重复构建
11
+ """
12
+
13
+ from ahocorasick import Automaton
14
+ atm = Automaton()
15
+ for word in vocab:
16
+ atm.add_word(word, word)
17
+ atm.make_automaton()
18
+
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ def foo(num):
5
+ return num & -num
6
+
7
+
8
+ def foo2(num):
9
+ """
10
+ raw: 0 1 2 3 4 5 6 7 8 9
11
+ res: 0 0 0 2 0 4 4 6 0 8
12
+ """
13
+ return num & (num - 1)
14
+
15
+
16
+ def _lowbit(index: int) -> int:
17
+ """
18
+ raw: 0 1 2 3 4 5 6 7 8 9
19
+ res: 0 1 2 1 4 1 2 1 8 1
20
+ """
21
+ return index & -index
22
+
23
+ if __name__ == '__main__':
24
+ for i in range(10):
25
+ print(i, end=" ")
26
+ print()
27
+ for i in range(10):
28
+ print(foo2(i), end=" ")
nlpertools/algo/kmp.py CHANGED
@@ -1,55 +1,94 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
4
-
5
- def build(pattern_string):
6
- """
7
- 构建模式串的PMT
8
- [zhihu](https://www.zhihu.com/question/21923021/answer/281346746)
9
-
10
- """
11
- # 构建pattern需要回溯的位置,
12
- backtrace_points = [0] * len(pattern_string)
13
- main_pointer, pattern_pointer = 0, -1
14
- backtrace_points[0] = -1
15
- while main_pointer < len(pattern_string) - 1:
16
- if pattern_pointer == -1 or pattern_string[pattern_pointer] == pattern_string[main_pointer]:
17
- main_pointer += 1
18
- pattern_pointer += 1
19
- backtrace_points[main_pointer] = pattern_pointer
20
- else:
21
- pattern_pointer = backtrace_points[pattern_pointer]
22
- return backtrace_points
23
-
24
-
25
- def find(main_string, pattern_string):
26
- """
27
- 模式匹配
28
- 一边构建字串的回溯点,一边判断模式是否匹配
29
- """
30
- if len(main_string) < len(pattern_string):
31
- return False
32
- main_string = " " + main_string
33
- backtrace_points = [0] * (len(main_string) + 1)
34
- main_pointer, pattern_pointer = 0, -1
35
- backtrace_points[0] = -1
36
- while main_pointer < len(main_string):
37
- if pattern_pointer == -1 or pattern_string[pattern_pointer] == main_string[main_pointer]:
38
- if pattern_pointer == len(pattern_string) - 1:
39
- return True
40
- main_pointer += 1
41
- pattern_pointer += 1
42
- backtrace_points[main_pointer] = pattern_pointer
43
- else:
44
- pattern_pointer = backtrace_points[pattern_pointer]
45
- return False
46
-
47
-
48
- if __name__ == '__main__':
49
- test_main_string = "aa"
50
- test_pattern_string = "aa"
51
-
52
- res = build(test_pattern_string)
53
- print(res)
54
- res = find(test_main_string, test_pattern_string)
55
- print(res)
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+
5
+ def build(pattern_string):
6
+ """
7
+ 构建模式串的PMT
8
+ [zhihu](https://www.zhihu.com/question/21923021/answer/281346746)
9
+
10
+ """
11
+ # 构建pattern需要回溯的位置,
12
+ backtrace_points = [0] * len(pattern_string)
13
+ main_pointer, pattern_pointer = 0, -1
14
+ backtrace_points[0] = -1
15
+ while main_pointer < len(pattern_string) - 1:
16
+ if pattern_pointer == -1 or pattern_string[pattern_pointer] == pattern_string[main_pointer]:
17
+ main_pointer += 1
18
+ pattern_pointer += 1
19
+ backtrace_points[main_pointer] = pattern_pointer
20
+ else:
21
+ pattern_pointer = backtrace_points[pattern_pointer]
22
+ return backtrace_points
23
+
24
+
25
+ def build_2(needle: str):
26
+ # 这写的比第一种简洁
27
+ # 查找方法也是自己,唯一就是判断结束条件,不是用-1了
28
+ m = len(needle)
29
+ if m == 0:
30
+ return 0
31
+
32
+ pmt = [0] * m
33
+ pattern_pointer = 0
34
+ for main_pointer in range(1, m):
35
+ while pattern_pointer > 0 and needle[main_pointer] != needle[pattern_pointer]:
36
+ pattern_pointer = pmt[pattern_pointer - 1]
37
+ if needle[main_pointer] == needle[pattern_pointer]:
38
+ pattern_pointer += 1
39
+ pmt[main_pointer] = pattern_pointer
40
+ return pmt
41
+
42
+
43
+ def find_after_build(main_string, pattern_string):
44
+ backtracker = build(pattern_string)
45
+ # print(backtracker)
46
+ main_pointer, pattern_pointer = -1, -1
47
+ while main_pointer <= len(main_string) - 1:
48
+ if pattern_pointer == -1 or pattern_string[pattern_pointer] == main_string[main_pointer]:
49
+ # 这是返回首次匹配时main的位置
50
+ if pattern_pointer == len(pattern_string) - 1:
51
+ return main_pointer - len(pattern_string) + 1
52
+ pattern_pointer += 1
53
+ main_pointer += 1
54
+ else:
55
+ pattern_pointer = backtracker[pattern_pointer]
56
+ return -1
57
+
58
+
59
+ def find(main_string, pattern_string):
60
+ """
61
+ 模式匹配
62
+ 一边构建字串的回溯点,一边判断模式是否匹配
63
+ """
64
+ if len(main_string) < len(pattern_string):
65
+ return False
66
+ main_string = " " + main_string
67
+ backtrace_points = [0] * (len(main_string) + 1)
68
+ main_pointer, pattern_pointer = 0, -1
69
+ backtrace_points[0] = -1
70
+ while main_pointer < len(main_string):
71
+ if pattern_pointer == -1 or pattern_string[pattern_pointer] == main_string[main_pointer]:
72
+ if pattern_pointer == len(pattern_string) - 1:
73
+ return True
74
+ main_pointer += 1
75
+ pattern_pointer += 1
76
+ backtrace_points[main_pointer] = pattern_pointer
77
+ else:
78
+ pattern_pointer = backtrace_points[pattern_pointer]
79
+ return False
80
+
81
+
82
+ if __name__ == '__main__':
83
+ test_main_string = "abababc"
84
+ test_pattern_string = "abababc"
85
+
86
+ res = build(test_pattern_string)
87
+ print(res)
88
+ res = build_2(test_pattern_string)
89
+ print(res)
90
+ # res = find(test_main_string, test_pattern_string)
91
+ # print(res)
92
+ #
93
+ # res = find_after_build(test_main_string, test_pattern_string)
94
+ # print(res)
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ def gcd(a, b):
5
+ """
6
+ math.gcd()等包对于gcd的实现源码中看不到
7
+ 实现方法;辗转相除法
8
+ """
9
+ a, b = b, a % b
10
+ if b == 0:
11
+ return a
12
+ return gcd(a, b)
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from collections import defaultdict
5
+
6
+
7
+ # from sortedcontainers import SortedDict, SortedList
8
+
9
+ # 树状数组只能维护前缀“操作和”(前缀和,前缀积,前缀最大最小),而线段树可以维护区间操作和。
10
+
11
+ # 线段树
12
+ class SegmentTree:
13
+ """
14
+ https://www.zhihu.com/question/346961479/answer/2274087021
15
+ 性质:线段树的每一个树节点其实都存储了一个「区间(段)的信息」
16
+ 通过add添加
17
+ """
18
+
19
+ pass
20
+
21
+
22
+ # 树状数组(二进制下标树) 模板
23
+ class BIT:
24
+ """
25
+ TODO 以前在logseq写过笔记,整理到web上
26
+ 代码来自https://leetcode.cn/problems/number-of-recent-calls/solutions/1472043/by-ac_oier-evqe/下的评论
27
+ """
28
+
29
+ def __init__(self, n: int):
30
+ self.size = n
31
+ self.tree = defaultdict(int)
32
+
33
+ @staticmethod
34
+ def _lowbit(index: int) -> int:
35
+ # TODO 同样整理到web
36
+ return index & -index
37
+
38
+ def add(self, index: int, delta: int) -> None:
39
+ """
40
+ delta为index位置加的值
41
+ """
42
+ while index <= self.size:
43
+ self.tree[index] += delta
44
+ index += self._lowbit(index)
45
+
46
+ def query(self, index: int) -> int:
47
+ if index > self.size:
48
+ index = self.size
49
+ res = 0
50
+ while index > 0:
51
+ res += self.tree[index]
52
+ index -= self._lowbit(index)
53
+ return res
54
+
55
+ def sumRange(self, left: int, right: int) -> int:
56
+ return self.query(right) - self.query(left - 1)
57
+
58
+
59
+ class BITUsageDemo:
60
+ """
61
+ 查找区间值的出现次数
62
+ """
63
+
64
+ def __init__(self):
65
+ self.bit = BIT(10)
66
+
67
+ def add(self, x: int):
68
+ self.bit.add(x, 1)
69
+
70
+ def query(self, x, y):
71
+ return self.bit.sumRange(x, y)
72
+
73
+
74
+ class Trie:
75
+ def __init__(self):
76
+ self.children = [None] * 26
77
+ self.isEnd = False
78
+
79
+ def insert(self, word: str) -> None:
80
+ node = self
81
+ for ch in word:
82
+ ch = ord(ch) - ord("a")
83
+ if not node.children[ch]:
84
+ node.children[ch] = Trie()
85
+ node = node.children[ch]
86
+ node.isEnd = True
87
+
88
+ def search_prefix(self, prefix: str):
89
+ node = self
90
+ for ch in prefix:
91
+ ch = ord(ch) - ord("a")
92
+ if not node.children[ch]:
93
+ return None
94
+ node = node.children[ch]
95
+
96
+ return node
97
+
98
+ def search(self, word: str) -> bool:
99
+ node = self.search_prefix(word)
100
+ return node is not None and node.isEnd
101
+
102
+ def starts_with(self, prefix: str) -> bool:
103
+ return self.search_prefix(prefix) is not None
104
+
105
+
106
+ if __name__ == "__main__":
107
+ bit_usage_demo = BITUsageDemo()
108
+ bit_usage_demo.add(1)
109
+ bit_usage_demo.add(2)
110
+ res = bit_usage_demo.query(1, 2)
111
+ print(res)
112
+ # 前缀树
113
+ # obj = Trie()
114
+ # obj.insert(word)
115
+ # param_2 = obj.search(word)
116
+ # param_3 = obj.startsWith(prefix)
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ class UnionFind():
5
+ def __init__(self, num):
6
+ self.parent = list(range(num))
7
+
8
+ def union(self):
9
+ pass
10
+
11
+ def find(self, index):
12
+ if 1:
13
+ pass