re-common 10.0.39__py3-none-any.whl → 10.0.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +235 -220
  145. re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
  146. re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
  147. re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
  148. re_common/v2/baselibrary/decorators/utils.py +59 -59
  149. re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
  150. re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
  151. re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
  152. re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
  153. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  154. re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
  155. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  156. re_common/v2/baselibrary/tools/concurrency.py +35 -35
  157. re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
  158. re_common/v2/baselibrary/tools/data_processer/data_processer.py +497 -508
  159. re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
  160. re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
  161. re_common/v2/baselibrary/tools/dict_tools.py +44 -44
  162. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  163. re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
  164. re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
  165. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  166. re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
  167. re_common/v2/baselibrary/tools/list_tools.py +69 -69
  168. re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
  169. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  170. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  171. re_common/v2/baselibrary/tools/tree_processor/__init__.py +0 -0
  172. re_common/v2/baselibrary/tools/tree_processor/builder.py +25 -0
  173. re_common/v2/baselibrary/tools/tree_processor/node.py +13 -0
  174. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  175. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  176. re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
  177. re_common/v2/baselibrary/utils/author_smi.py +361 -361
  178. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  179. re_common/v2/baselibrary/utils/basedict.py +37 -37
  180. re_common/v2/baselibrary/utils/basehdfs.py +163 -163
  181. re_common/v2/baselibrary/utils/basepika.py +180 -180
  182. re_common/v2/baselibrary/utils/basetime.py +94 -77
  183. re_common/v2/baselibrary/utils/db.py +174 -156
  184. re_common/v2/baselibrary/utils/elasticsearch.py +46 -0
  185. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  186. re_common/v2/baselibrary/utils/mq.py +83 -83
  187. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  188. re_common/v2/baselibrary/utils/string_bool.py +187 -186
  189. re_common/v2/baselibrary/utils/string_clear.py +246 -246
  190. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  191. re_common/v2/baselibrary/utils/stringutils.py +312 -271
  192. re_common/vip/base_step_process.py +11 -11
  193. re_common/vip/baseencodeid.py +90 -90
  194. re_common/vip/changetaskname.py +28 -28
  195. re_common/vip/core_var.py +24 -24
  196. re_common/vip/mmh3Hash.py +89 -89
  197. re_common/vip/proxy/allproxys.py +127 -127
  198. re_common/vip/proxy/allproxys_thread.py +159 -159
  199. re_common/vip/proxy/cnki_proxy.py +153 -153
  200. re_common/vip/proxy/kuaidaili.py +87 -87
  201. re_common/vip/proxy/proxy_all.py +113 -113
  202. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  203. re_common/vip/proxy/wanfang_proxy.py +152 -152
  204. re_common/vip/proxy/wp_proxy_all.py +181 -181
  205. re_common/vip/read_rawid_to_txt.py +91 -91
  206. re_common/vip/title/__init__.py +5 -5
  207. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  208. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  209. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  210. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  211. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  212. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  213. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  214. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  215. re_common/vip/title/transform/__init__.py +10 -10
  216. {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/LICENSE +201 -201
  217. {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/METADATA +16 -16
  218. re_common-10.0.41.dist-info/RECORD +252 -0
  219. {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/WHEEL +1 -1
  220. re_common-10.0.39.dist-info/RECORD +0 -248
  221. {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/top_level.txt +0 -0
@@ -1,302 +1,302 @@
1
- import numpy as np
2
- import pandas as pd
3
- from pandas import DatetimeIndex, DataFrame
4
-
5
- """
6
- https://www.pypandas.cn/docs/getting_started/10min.html#%E6%9F%A5%E7%9C%8B%E6%95%B0%E6%8D%AE
7
- Series(一维数据);带标签的一维同构数组
8
- DataFrame(二维数据);带标签的,大小可变的,二维异构表格;index(行)或 columns(列)
9
- DataFrame 是 Series 的容器,Series 则是标量的容器
10
- NumPy 数组只有一种数据类型,DataFrame 每列的数据类型各不相同
11
- """
12
-
13
-
14
- class BasePandas(object):
15
-
16
- def __init__(self):
17
- pass
18
-
19
- def create_null_dataframe(self):
20
- """
21
- 创建空的dataframe
22
- :return: type:DataFrame
23
- """
24
- df = pd.DataFrame()
25
- return df
26
-
27
- def create_series_for_list(self, list):
28
- """
29
- 用值列表生成 Series 时,Pandas 默认自动生成整数索引
30
- pd.Series([1, 3, 5, np.nan, 6, 8])
31
- :return: type:Series
32
- """
33
- s = pd.Series(list)
34
- # print(type(s))
35
- return s
36
-
37
- def create_time_index(self, datastring, periods):
38
- """
39
- 创建行标
40
- 含日期时间索引与标签的 NumPy 的数组
41
- :param datastring: '20130101'
42
- :param periods: 6
43
- :return: type:DatetimeIndex
44
- """
45
- dates = pd.date_range(datastring, periods=periods)
46
- return dates
47
-
48
- def create_ndarray(self, index, columns):
49
- """
50
- 产生指定行列的随机数据
51
- :param index: 行 6
52
- :param columns: 列 4
53
- :return: type:ndarray
54
- """
55
- return np.random.randn(index, columns)
56
-
57
- def create_time_dataform(self, data, dates: DatetimeIndex, columns=list('ABCD')):
58
- return pd.DataFrame(data, index=dates, columns=columns)
59
-
60
- def dicts_to_dataform(self, dicts):
61
- """
62
- 字典转二维数据
63
- {'A': 1.,
64
- 'B': pd.Timestamp('20130102'),
65
- 'C': pd.Series(1, index=list(range(4)), dtype='float32'),
66
- 'D': np.array([3] * 4, dtype='int32'),
67
- 'E': pd.Categorical(["test", "train", "test", "train"]),
68
- 'F': 'foo'}
69
- :param dicts:
70
- :return:
71
- """
72
- df = pd.DataFrame(dicts)
73
- return df
74
-
75
- def dtypes(self, df: DataFrame):
76
- """
77
- DataFrame 的列的数据类型
78
- :return:
79
- """
80
- return df.dtypes
81
-
82
- def head(self, df, num=5):
83
- """
84
- 查看前几条数据(默认前五条)
85
- :param num:
86
- :return:
87
- """
88
- return df.head(num)
89
-
90
- def tail(self, df, num=5):
91
- """
92
- 查看后几条数据(默认后五条)
93
- :param num:
94
- :return:
95
- """
96
- return df.tail(num)
97
-
98
- def index(self, df):
99
- """
100
- 显示索引
101
- :param df:
102
- :return:
103
- """
104
- return df.index
105
-
106
- def columns(self, df):
107
- """
108
- 显示列名
109
- :param df:
110
- :return:
111
- """
112
- return df.columns
113
-
114
- def dataform_to_numpy(self, df):
115
- """
116
- 输出底层数据的 NumPy 对象。
117
- 注意,DataFrame 的列由多种数据类型组成时,该操作耗费系统资源较大,
118
- 输出不包含行索引和列标签
119
- :return:
120
- """
121
- return df.to_numpy()
122
-
123
- def describe(self, df):
124
- """
125
- 可以快速查看数据的统计摘要:
126
- :param df:
127
- :return:
128
- """
129
- return df.describe()
130
-
131
- def df_T(self, df):
132
- """
133
- 转置数据
134
- :param df:
135
- :return:
136
- """
137
- return df.T
138
-
139
- def sort_index(self, df):
140
- """
141
- 按轴排序
142
- :return:
143
- """
144
- return df.sort_index(axis=1, ascending=False)
145
-
146
- def sort_values(self, df):
147
- """
148
- 按值排序
149
- :param df:
150
- :return:
151
- """
152
- return df.sort_values(by='B')
153
-
154
- def get_series(self, df):
155
- """
156
- 获取单列数据 等于 df.A
157
- :param df:
158
- :return:
159
- """
160
- return df["A"]
161
-
162
- def get_spilt(self, df):
163
- """
164
- 切片行 或者 df['20130102':'20130104']
165
- :return:
166
- """
167
- return df[0:3]
168
-
169
- def get_loc(self, df, dates: DatetimeIndex):
170
- """
171
- 标签提取一行数据
172
- :return:
173
- """
174
- return df.loc[dates[0]]
175
-
176
- def get_many_loc(self, df):
177
- """
178
- 用标签选择多列数据
179
- :return:
180
- """
181
- return df.loc[:, ['A', 'B']]
182
-
183
- def get_many_loc_index(self, df):
184
- """
185
- 用标签切片,包含行与列结束点
186
- :param df:
187
- :return:
188
- """
189
- return df.loc['20130102':'20130104', ['A', 'B']]
190
-
191
- def get_onedata(self, df, dates):
192
- """
193
- 提取标量值
194
- 快速访问标量,与上述方法等效:df.at[dates[0], 'A']
195
- :return:
196
- """
197
- return df.loc[dates[0], 'A']
198
-
199
- def get_index(self, df):
200
- """
201
- 获取行 用整数位置选择
202
- :return:
203
- """
204
- return df.iloc[3]
205
-
206
- def get_qiepian(self, df):
207
- """
208
- 3:5 为行 0:2列
209
- 用整数列表按位置切片
210
- df.iloc[[1, 2, 4], [0, 2]]
211
- 显式整行切片
212
- df.iloc[1:3, :]
213
- 显式整列切片
214
- df.iloc[:, 1:3]
215
- 显式提取值
216
- df.iloc[1, 1]
217
- 快速访问标量,与上述方法等效:
218
- df.iat[1, 1]
219
- :param df:
220
- :return:
221
- """
222
- return df.iloc[3:5, 0:2]
223
-
224
- def select_data(self, df):
225
- """
226
- 用单列的值选择行数据
227
- 选择 DataFrame 里满足条件的值:
228
- df[df > 0]
229
- :param df:
230
- :return:
231
- """
232
- return df[df.A > 0]
233
-
234
- def copy(self, df):
235
- df2 = df.copy()
236
- return df2
237
-
238
- def add_col(self, df):
239
- """
240
- 添加列
241
- :return:
242
- """
243
- df['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
244
- return df
245
-
246
- def isin(self, df):
247
- """
248
- 用 isin() 筛选 行
249
- :return:
250
- """
251
- return df[df['E'].isin(['two', 'four'])]
252
-
253
- def set_value(self, df):
254
- """
255
- 按标签赋值
256
- df.at[dates[0], 'A'] = 0
257
- 按位置赋值:
258
- df.iat[0, 1] = 0
259
- 按 NumPy 数组赋值:
260
- df.loc[:, 'D'] = np.array([5] * len(df))
261
- 用 where 条件赋值:
262
- df2 = df.copy()
263
- df2[df2 > 0] = -df2
264
- Pandas 主要用 np.nan 表示缺失数据
265
- :param df:
266
- :return:
267
- """
268
- s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
269
- df['F'] = s1
270
-
271
- def reindex(self, df, dates):
272
- """
273
- 重建索引(reindex)可以更改、添加、删除指定轴的索引,并返回数据副本,即不更改原数据。
274
- :param df:
275
- :return:
276
- """
277
- df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
278
- df1.loc[dates[0]:dates[1], 'E'] = 1
279
-
280
- def dropna(self, df):
281
- """
282
- 删除所有含缺失值的行:
283
- :param df:
284
- :return:
285
- """
286
- return df.dropna(how='any')
287
-
288
- def fillna(self, df):
289
- """
290
- 填充缺失值
291
- :param df:
292
- :return:
293
- """
294
- return df.fillna(value=5)
295
-
296
- def isna(self,df):
297
- """
298
- 提取 nan 值的布尔掩码
299
- :param df:
300
- :return:
301
- """
302
- return pd.isna(df)
1
+ import numpy as np
2
+ import pandas as pd
3
+ from pandas import DatetimeIndex, DataFrame
4
+
5
+ """
6
+ https://www.pypandas.cn/docs/getting_started/10min.html#%E6%9F%A5%E7%9C%8B%E6%95%B0%E6%8D%AE
7
+ Series(一维数据);带标签的一维同构数组
8
+ DataFrame(二维数据);带标签的,大小可变的,二维异构表格;index(行)或 columns(列)
9
+ DataFrame 是 Series 的容器,Series 则是标量的容器
10
+ NumPy 数组只有一种数据类型,DataFrame 每列的数据类型各不相同
11
+ """
12
+
13
+
14
+ class BasePandas(object):
15
+
16
+ def __init__(self):
17
+ pass
18
+
19
+ def create_null_dataframe(self):
20
+ """
21
+ 创建空的dataframe
22
+ :return: type:DataFrame
23
+ """
24
+ df = pd.DataFrame()
25
+ return df
26
+
27
+ def create_series_for_list(self, list):
28
+ """
29
+ 用值列表生成 Series 时,Pandas 默认自动生成整数索引
30
+ pd.Series([1, 3, 5, np.nan, 6, 8])
31
+ :return: type:Series
32
+ """
33
+ s = pd.Series(list)
34
+ # print(type(s))
35
+ return s
36
+
37
+ def create_time_index(self, datastring, periods):
38
+ """
39
+ 创建行标
40
+ 含日期时间索引与标签的 NumPy 的数组
41
+ :param datastring: '20130101'
42
+ :param periods: 6
43
+ :return: type:DatetimeIndex
44
+ """
45
+ dates = pd.date_range(datastring, periods=periods)
46
+ return dates
47
+
48
+ def create_ndarray(self, index, columns):
49
+ """
50
+ 产生指定行列的随机数据
51
+ :param index: 行 6
52
+ :param columns: 列 4
53
+ :return: type:ndarray
54
+ """
55
+ return np.random.randn(index, columns)
56
+
57
+ def create_time_dataform(self, data, dates: DatetimeIndex, columns=list('ABCD')):
58
+ return pd.DataFrame(data, index=dates, columns=columns)
59
+
60
+ def dicts_to_dataform(self, dicts):
61
+ """
62
+ 字典转二维数据
63
+ {'A': 1.,
64
+ 'B': pd.Timestamp('20130102'),
65
+ 'C': pd.Series(1, index=list(range(4)), dtype='float32'),
66
+ 'D': np.array([3] * 4, dtype='int32'),
67
+ 'E': pd.Categorical(["test", "train", "test", "train"]),
68
+ 'F': 'foo'}
69
+ :param dicts:
70
+ :return:
71
+ """
72
+ df = pd.DataFrame(dicts)
73
+ return df
74
+
75
+ def dtypes(self, df: DataFrame):
76
+ """
77
+ DataFrame 的列的数据类型
78
+ :return:
79
+ """
80
+ return df.dtypes
81
+
82
+ def head(self, df, num=5):
83
+ """
84
+ 查看前几条数据(默认前五条)
85
+ :param num:
86
+ :return:
87
+ """
88
+ return df.head(num)
89
+
90
+ def tail(self, df, num=5):
91
+ """
92
+ 查看后几条数据(默认后五条)
93
+ :param num:
94
+ :return:
95
+ """
96
+ return df.tail(num)
97
+
98
+ def index(self, df):
99
+ """
100
+ 显示索引
101
+ :param df:
102
+ :return:
103
+ """
104
+ return df.index
105
+
106
+ def columns(self, df):
107
+ """
108
+ 显示列名
109
+ :param df:
110
+ :return:
111
+ """
112
+ return df.columns
113
+
114
+ def dataform_to_numpy(self, df):
115
+ """
116
+ 输出底层数据的 NumPy 对象。
117
+ 注意,DataFrame 的列由多种数据类型组成时,该操作耗费系统资源较大,
118
+ 输出不包含行索引和列标签
119
+ :return:
120
+ """
121
+ return df.to_numpy()
122
+
123
+ def describe(self, df):
124
+ """
125
+ 可以快速查看数据的统计摘要:
126
+ :param df:
127
+ :return:
128
+ """
129
+ return df.describe()
130
+
131
+ def df_T(self, df):
132
+ """
133
+ 转置数据
134
+ :param df:
135
+ :return:
136
+ """
137
+ return df.T
138
+
139
+ def sort_index(self, df):
140
+ """
141
+ 按轴排序
142
+ :return:
143
+ """
144
+ return df.sort_index(axis=1, ascending=False)
145
+
146
+ def sort_values(self, df):
147
+ """
148
+ 按值排序
149
+ :param df:
150
+ :return:
151
+ """
152
+ return df.sort_values(by='B')
153
+
154
+ def get_series(self, df):
155
+ """
156
+ 获取单列数据 等于 df.A
157
+ :param df:
158
+ :return:
159
+ """
160
+ return df["A"]
161
+
162
+ def get_spilt(self, df):
163
+ """
164
+ 切片行 或者 df['20130102':'20130104']
165
+ :return:
166
+ """
167
+ return df[0:3]
168
+
169
+ def get_loc(self, df, dates: DatetimeIndex):
170
+ """
171
+ 标签提取一行数据
172
+ :return:
173
+ """
174
+ return df.loc[dates[0]]
175
+
176
+ def get_many_loc(self, df):
177
+ """
178
+ 用标签选择多列数据
179
+ :return:
180
+ """
181
+ return df.loc[:, ['A', 'B']]
182
+
183
+ def get_many_loc_index(self, df):
184
+ """
185
+ 用标签切片,包含行与列结束点
186
+ :param df:
187
+ :return:
188
+ """
189
+ return df.loc['20130102':'20130104', ['A', 'B']]
190
+
191
+ def get_onedata(self, df, dates):
192
+ """
193
+ 提取标量值
194
+ 快速访问标量,与上述方法等效:df.at[dates[0], 'A']
195
+ :return:
196
+ """
197
+ return df.loc[dates[0], 'A']
198
+
199
+ def get_index(self, df):
200
+ """
201
+ 获取行 用整数位置选择
202
+ :return:
203
+ """
204
+ return df.iloc[3]
205
+
206
+ def get_qiepian(self, df):
207
+ """
208
+ 3:5 为行 0:2列
209
+ 用整数列表按位置切片
210
+ df.iloc[[1, 2, 4], [0, 2]]
211
+ 显式整行切片
212
+ df.iloc[1:3, :]
213
+ 显式整列切片
214
+ df.iloc[:, 1:3]
215
+ 显式提取值
216
+ df.iloc[1, 1]
217
+ 快速访问标量,与上述方法等效:
218
+ df.iat[1, 1]
219
+ :param df:
220
+ :return:
221
+ """
222
+ return df.iloc[3:5, 0:2]
223
+
224
+ def select_data(self, df):
225
+ """
226
+ 用单列的值选择行数据
227
+ 选择 DataFrame 里满足条件的值:
228
+ df[df > 0]
229
+ :param df:
230
+ :return:
231
+ """
232
+ return df[df.A > 0]
233
+
234
+ def copy(self, df):
235
+ df2 = df.copy()
236
+ return df2
237
+
238
+ def add_col(self, df):
239
+ """
240
+ 添加列
241
+ :return:
242
+ """
243
+ df['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
244
+ return df
245
+
246
+ def isin(self, df):
247
+ """
248
+ 用 isin() 筛选 行
249
+ :return:
250
+ """
251
+ return df[df['E'].isin(['two', 'four'])]
252
+
253
+ def set_value(self, df):
254
+ """
255
+ 按标签赋值
256
+ df.at[dates[0], 'A'] = 0
257
+ 按位置赋值:
258
+ df.iat[0, 1] = 0
259
+ 按 NumPy 数组赋值:
260
+ df.loc[:, 'D'] = np.array([5] * len(df))
261
+ 用 where 条件赋值:
262
+ df2 = df.copy()
263
+ df2[df2 > 0] = -df2
264
+ Pandas 主要用 np.nan 表示缺失数据
265
+ :param df:
266
+ :return:
267
+ """
268
+ s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
269
+ df['F'] = s1
270
+
271
+ def reindex(self, df, dates):
272
+ """
273
+ 重建索引(reindex)可以更改、添加、删除指定轴的索引,并返回数据副本,即不更改原数据。
274
+ :param df:
275
+ :return:
276
+ """
277
+ df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
278
+ df1.loc[dates[0]:dates[1], 'E'] = 1
279
+
280
+ def dropna(self, df):
281
+ """
282
+ 删除所有含缺失值的行:
283
+ :param df:
284
+ :return:
285
+ """
286
+ return df.dropna(how='any')
287
+
288
+ def fillna(self, df):
289
+ """
290
+ 填充缺失值
291
+ :param df:
292
+ :return:
293
+ """
294
+ return df.fillna(value=5)
295
+
296
+ def isna(self,df):
297
+ """
298
+ 提取 nan 值的布尔掩码
299
+ :param df:
300
+ :return:
301
+ """
302
+ return pd.isna(df)
@@ -1,11 +1,11 @@
1
- from peewee import SqliteDatabase
2
-
3
-
4
- class BasePeewee(object):
5
-
6
- def __init__(self):
7
- self.db = None
8
-
9
- def connent_sqlite(self, path):
10
- self.db = SqliteDatabase(path)
11
- return self.db
1
+ from peewee import SqliteDatabase
2
+
3
+
4
+ class BasePeewee(object):
5
+
6
+ def __init__(self):
7
+ self.db = None
8
+
9
+ def connent_sqlite(self, path):
10
+ self.db = SqliteDatabase(path)
11
+ return self.db