nlpertools 1.0.4__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. nlpertools/__init__.py +24 -11
  2. nlpertools/algo/__init__.py +0 -0
  3. nlpertools/algo/ac.py +18 -0
  4. nlpertools/algo/bit_ops.py +28 -0
  5. nlpertools/algo/kmp.py +94 -0
  6. nlpertools/algo/num_ops.py +12 -0
  7. nlpertools/algo/template.py +116 -0
  8. nlpertools/algo/union.py +13 -0
  9. nlpertools/data_client.py +387 -0
  10. nlpertools/data_structure/__init__.py +0 -0
  11. nlpertools/data_structure/base_structure.py +109 -0
  12. nlpertools/dataprocess.py +611 -3
  13. nlpertools/default_db_config.yml +41 -0
  14. nlpertools/io/__init__.py +3 -3
  15. nlpertools/io/dir.py +54 -47
  16. nlpertools/io/file.py +277 -205
  17. nlpertools/ml.py +483 -317
  18. nlpertools/monitor/__init__.py +0 -0
  19. nlpertools/monitor/gpu.py +18 -0
  20. nlpertools/monitor/memory.py +24 -0
  21. nlpertools/movie.py +36 -0
  22. nlpertools/nlpertools_config.yml +1 -0
  23. nlpertools/{openApi.py → open_api.py} +65 -62
  24. nlpertools/other.py +364 -188
  25. nlpertools/pic.py +288 -0
  26. nlpertools/plugin.py +43 -34
  27. nlpertools/reminder.py +98 -15
  28. nlpertools/template/__init__.py +0 -0
  29. nlpertools/utils/__init__.py +3 -0
  30. nlpertools/utils/lazy.py +727 -0
  31. nlpertools/utils/log_util.py +20 -0
  32. nlpertools/utils/package.py +89 -0
  33. nlpertools/utils/package_v1.py +94 -0
  34. nlpertools/utils/package_v2.py +117 -0
  35. nlpertools/utils_for_nlpertools.py +93 -0
  36. nlpertools/vector_index_demo.py +108 -0
  37. nlpertools/wrapper.py +161 -0
  38. {nlpertools-1.0.4.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
  39. nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
  40. nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
  41. {nlpertools-1.0.4.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
  42. nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
  43. nlpertools_helper/__init__.py +10 -0
  44. nlpertools-1.0.4.dist-info/METADATA +0 -42
  45. nlpertools-1.0.4.dist-info/RECORD +0 -15
  46. nlpertools-1.0.4.dist-info/top_level.txt +0 -1
nlpertools/__init__.py CHANGED
@@ -1,11 +1,24 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
4
- from .io.file import *
5
- from .io.dir import *
6
- from .ml import *
7
- from .dataprocess import *
8
- from .ml import *
9
- from .openApi import *
10
- from .reminder import *
11
- from .plugin import *
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from .algo.kmp import *
5
+ from .data_structure.base_structure import *
6
+ from .dataprocess import *
7
+ from .io.dir import *
8
+ from .io.file import *
9
+ from .ml import *
10
+ from .open_api import *
11
+ from .other import *
12
+ from .pic import *
13
+ from .plugin import *
14
+ from .reminder import *
15
+ from .utils_for_nlpertools import *
16
+ from .wrapper import *
17
+ from .monitor import *
18
+
19
+ import os
20
+
21
+
22
+ DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__),"default_db_config.yml")
23
+
24
+ __version__ = '1.0.5'
File without changes
nlpertools/algo/ac.py ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from ..io.file import readtxt_list_all_strip
5
+
6
+
7
+ def find_sentence_covered_vocab(vocab, sentences):
8
+ """
9
+ 找到词典中
10
+ 此为参照写法,具体用的时候复制出去用避免重复构建
11
+ """
12
+
13
+ from ahocorasick import Automaton
14
+ atm = Automaton()
15
+ for word in vocab:
16
+ atm.add_word(word, word)
17
+ atm.make_automaton()
18
+
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ def foo(num):
5
+ return num & -num
6
+
7
+
8
+ def foo2(num):
9
+ """
10
+ raw: 0 1 2 3 4 5 6 7 8 9
11
+ res: 0 0 0 2 0 4 4 6 0 8
12
+ """
13
+ return num & (num - 1)
14
+
15
+
16
+ def _lowbit(index: int) -> int:
17
+ """
18
+ raw: 0 1 2 3 4 5 6 7 8 9
19
+ res: 0 1 2 1 4 1 2 1 8 1
20
+ """
21
+ return index & -index
22
+
23
+ if __name__ == '__main__':
24
+ for i in range(10):
25
+ print(i, end=" ")
26
+ print()
27
+ for i in range(10):
28
+ print(foo2(i), end=" ")
nlpertools/algo/kmp.py ADDED
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+
5
+ def build(pattern_string):
6
+ """
7
+ 构建模式串的PMT
8
+ [zhihu](https://www.zhihu.com/question/21923021/answer/281346746)
9
+
10
+ """
11
+ # 构建pattern需要回溯的位置,
12
+ backtrace_points = [0] * len(pattern_string)
13
+ main_pointer, pattern_pointer = 0, -1
14
+ backtrace_points[0] = -1
15
+ while main_pointer < len(pattern_string) - 1:
16
+ if pattern_pointer == -1 or pattern_string[pattern_pointer] == pattern_string[main_pointer]:
17
+ main_pointer += 1
18
+ pattern_pointer += 1
19
+ backtrace_points[main_pointer] = pattern_pointer
20
+ else:
21
+ pattern_pointer = backtrace_points[pattern_pointer]
22
+ return backtrace_points
23
+
24
+
25
+ def build_2(needle: str):
26
+ # 这写的比第一种简洁
27
+ # 查找方法也是自己,唯一就是判断结束条件,不是用-1了
28
+ m = len(needle)
29
+ if m == 0:
30
+ return 0
31
+
32
+ pmt = [0] * m
33
+ pattern_pointer = 0
34
+ for main_pointer in range(1, m):
35
+ while pattern_pointer > 0 and needle[main_pointer] != needle[pattern_pointer]:
36
+ pattern_pointer = pmt[pattern_pointer - 1]
37
+ if needle[main_pointer] == needle[pattern_pointer]:
38
+ pattern_pointer += 1
39
+ pmt[main_pointer] = pattern_pointer
40
+ return pmt
41
+
42
+
43
+ def find_after_build(main_string, pattern_string):
44
+ backtracker = build(pattern_string)
45
+ # print(backtracker)
46
+ main_pointer, pattern_pointer = -1, -1
47
+ while main_pointer <= len(main_string) - 1:
48
+ if pattern_pointer == -1 or pattern_string[pattern_pointer] == main_string[main_pointer]:
49
+ # 这是返回首次匹配时main的位置
50
+ if pattern_pointer == len(pattern_string) - 1:
51
+ return main_pointer - len(pattern_string) + 1
52
+ pattern_pointer += 1
53
+ main_pointer += 1
54
+ else:
55
+ pattern_pointer = backtracker[pattern_pointer]
56
+ return -1
57
+
58
+
59
+ def find(main_string, pattern_string):
60
+ """
61
+ 模式匹配
62
+ 一边构建字串的回溯点,一边判断模式是否匹配
63
+ """
64
+ if len(main_string) < len(pattern_string):
65
+ return False
66
+ main_string = " " + main_string
67
+ backtrace_points = [0] * (len(main_string) + 1)
68
+ main_pointer, pattern_pointer = 0, -1
69
+ backtrace_points[0] = -1
70
+ while main_pointer < len(main_string):
71
+ if pattern_pointer == -1 or pattern_string[pattern_pointer] == main_string[main_pointer]:
72
+ if pattern_pointer == len(pattern_string) - 1:
73
+ return True
74
+ main_pointer += 1
75
+ pattern_pointer += 1
76
+ backtrace_points[main_pointer] = pattern_pointer
77
+ else:
78
+ pattern_pointer = backtrace_points[pattern_pointer]
79
+ return False
80
+
81
+
82
+ if __name__ == '__main__':
83
+ test_main_string = "abababc"
84
+ test_pattern_string = "abababc"
85
+
86
+ res = build(test_pattern_string)
87
+ print(res)
88
+ res = build_2(test_pattern_string)
89
+ print(res)
90
+ # res = find(test_main_string, test_pattern_string)
91
+ # print(res)
92
+ #
93
+ # res = find_after_build(test_main_string, test_pattern_string)
94
+ # print(res)
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ def gcd(a, b):
5
+ """
6
+ math.gcd()等包对于gcd的实现源码中看不到
7
+ 实现方法;辗转相除法
8
+ """
9
+ a, b = b, a % b
10
+ if b == 0:
11
+ return a
12
+ return gcd(a, b)
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from collections import defaultdict
5
+
6
+
7
+ # from sortedcontainers import SortedDict, SortedList
8
+
9
+ # 树状数组只能维护前缀“操作和”(前缀和,前缀积,前缀最大最小),而线段树可以维护区间操作和。
10
+
11
+ # 线段树
12
+ class SegmentTree:
13
+ """
14
+ https://www.zhihu.com/question/346961479/answer/2274087021
15
+ 性质:线段树的每一个树节点其实都存储了一个「区间(段)的信息」
16
+ 通过add添加
17
+ """
18
+
19
+ pass
20
+
21
+
22
+ # 树状数组(二进制下标树) 模板
23
+ class BIT:
24
+ """
25
+ TODO 以前在logseq写过笔记,整理到web上
26
+ 代码来自https://leetcode.cn/problems/number-of-recent-calls/solutions/1472043/by-ac_oier-evqe/下的评论
27
+ """
28
+
29
+ def __init__(self, n: int):
30
+ self.size = n
31
+ self.tree = defaultdict(int)
32
+
33
+ @staticmethod
34
+ def _lowbit(index: int) -> int:
35
+ # TODO 同样整理到web
36
+ return index & -index
37
+
38
+ def add(self, index: int, delta: int) -> None:
39
+ """
40
+ delta为index位置加的值
41
+ """
42
+ while index <= self.size:
43
+ self.tree[index] += delta
44
+ index += self._lowbit(index)
45
+
46
+ def query(self, index: int) -> int:
47
+ if index > self.size:
48
+ index = self.size
49
+ res = 0
50
+ while index > 0:
51
+ res += self.tree[index]
52
+ index -= self._lowbit(index)
53
+ return res
54
+
55
+ def sumRange(self, left: int, right: int) -> int:
56
+ return self.query(right) - self.query(left - 1)
57
+
58
+
59
+ class BITUsageDemo:
60
+ """
61
+ 查找区间值的出现次数
62
+ """
63
+
64
+ def __init__(self):
65
+ self.bit = BIT(10)
66
+
67
+ def add(self, x: int):
68
+ self.bit.add(x, 1)
69
+
70
+ def query(self, x, y):
71
+ return self.bit.sumRange(x, y)
72
+
73
+
74
+ class Trie:
75
+ def __init__(self):
76
+ self.children = [None] * 26
77
+ self.isEnd = False
78
+
79
+ def insert(self, word: str) -> None:
80
+ node = self
81
+ for ch in word:
82
+ ch = ord(ch) - ord("a")
83
+ if not node.children[ch]:
84
+ node.children[ch] = Trie()
85
+ node = node.children[ch]
86
+ node.isEnd = True
87
+
88
+ def search_prefix(self, prefix: str):
89
+ node = self
90
+ for ch in prefix:
91
+ ch = ord(ch) - ord("a")
92
+ if not node.children[ch]:
93
+ return None
94
+ node = node.children[ch]
95
+
96
+ return node
97
+
98
+ def search(self, word: str) -> bool:
99
+ node = self.search_prefix(word)
100
+ return node is not None and node.isEnd
101
+
102
+ def starts_with(self, prefix: str) -> bool:
103
+ return self.search_prefix(prefix) is not None
104
+
105
+
106
+ if __name__ == "__main__":
107
+ bit_usage_demo = BITUsageDemo()
108
+ bit_usage_demo.add(1)
109
+ bit_usage_demo.add(2)
110
+ res = bit_usage_demo.query(1, 2)
111
+ print(res)
112
+ # 前缀树
113
+ # obj = Trie()
114
+ # obj.insert(word)
115
+ # param_2 = obj.search(word)
116
+ # param_3 = obj.startsWith(prefix)
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ class UnionFind():
5
+ def __init__(self, num):
6
+ self.parent = list(range(num))
7
+
8
+ def union(self):
9
+ pass
10
+
11
+ def find(self, index):
12
+ if 1:
13
+ pass