re-common 10.0.39__py3-none-any.whl → 10.0.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/baselibrary/__init__.py +4 -4
- re_common/baselibrary/baseabs/__init__.py +6 -6
- re_common/baselibrary/baseabs/baseabs.py +26 -26
- re_common/baselibrary/database/mbuilder.py +132 -132
- re_common/baselibrary/database/moudle.py +93 -93
- re_common/baselibrary/database/msqlite3.py +194 -194
- re_common/baselibrary/database/mysql.py +169 -169
- re_common/baselibrary/database/sql_factory.py +26 -26
- re_common/baselibrary/mthread/MThreadingRun.py +486 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
- re_common/baselibrary/mthread/__init__.py +2 -2
- re_common/baselibrary/mthread/mythreading.py +695 -695
- re_common/baselibrary/pakge_other/socks.py +404 -404
- re_common/baselibrary/readconfig/config_factory.py +18 -18
- re_common/baselibrary/readconfig/ini_config.py +317 -317
- re_common/baselibrary/readconfig/toml_config.py +49 -49
- re_common/baselibrary/temporary/envdata.py +36 -36
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
- re_common/baselibrary/tools/contrast_db3.py +123 -123
- re_common/baselibrary/tools/copy_file.py +39 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
- re_common/baselibrary/tools/foreachgz.py +39 -39
- re_common/baselibrary/tools/get_attr.py +10 -10
- re_common/baselibrary/tools/image_to_pdf.py +61 -61
- re_common/baselibrary/tools/java_code_deal.py +139 -139
- re_common/baselibrary/tools/javacode.py +79 -79
- re_common/baselibrary/tools/mdb_db3.py +48 -48
- re_common/baselibrary/tools/merge_file.py +171 -171
- re_common/baselibrary/tools/merge_gz_file.py +165 -165
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
- re_common/baselibrary/tools/mongo_tools.py +50 -50
- re_common/baselibrary/tools/move_file.py +170 -170
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
- re_common/baselibrary/tools/myparsel.py +104 -104
- re_common/baselibrary/tools/rename_dir_file.py +37 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
- re_common/baselibrary/tools/split_line_to_many.py +25 -25
- re_common/baselibrary/tools/stringtodicts.py +33 -33
- re_common/baselibrary/tools/workwechant_bot.py +84 -84
- re_common/baselibrary/utils/baseaiohttp.py +296 -296
- re_common/baselibrary/utils/baseaiomysql.py +87 -87
- re_common/baselibrary/utils/baseallstep.py +191 -191
- re_common/baselibrary/utils/baseavro.py +19 -19
- re_common/baselibrary/utils/baseboto3.py +291 -291
- re_common/baselibrary/utils/basecsv.py +32 -32
- re_common/baselibrary/utils/basedict.py +133 -133
- re_common/baselibrary/utils/basedir.py +241 -241
- re_common/baselibrary/utils/baseencode.py +351 -351
- re_common/baselibrary/utils/baseencoding.py +28 -28
- re_common/baselibrary/utils/baseesdsl.py +86 -86
- re_common/baselibrary/utils/baseexcel.py +264 -264
- re_common/baselibrary/utils/baseexcept.py +109 -109
- re_common/baselibrary/utils/basefile.py +654 -654
- re_common/baselibrary/utils/baseftp.py +214 -214
- re_common/baselibrary/utils/basegzip.py +60 -60
- re_common/baselibrary/utils/basehdfs.py +135 -135
- re_common/baselibrary/utils/basehttpx.py +268 -268
- re_common/baselibrary/utils/baseip.py +87 -87
- re_common/baselibrary/utils/basejson.py +2 -2
- re_common/baselibrary/utils/baselist.py +32 -32
- re_common/baselibrary/utils/basemotor.py +190 -190
- re_common/baselibrary/utils/basemssql.py +98 -98
- re_common/baselibrary/utils/baseodbc.py +113 -113
- re_common/baselibrary/utils/basepandas.py +302 -302
- re_common/baselibrary/utils/basepeewee.py +11 -11
- re_common/baselibrary/utils/basepika.py +180 -180
- re_common/baselibrary/utils/basepydash.py +143 -143
- re_common/baselibrary/utils/basepymongo.py +230 -230
- re_common/baselibrary/utils/basequeue.py +22 -22
- re_common/baselibrary/utils/baserar.py +57 -57
- re_common/baselibrary/utils/baserequest.py +279 -279
- re_common/baselibrary/utils/baseset.py +8 -8
- re_common/baselibrary/utils/basesmb.py +403 -403
- re_common/baselibrary/utils/basestring.py +382 -382
- re_common/baselibrary/utils/basetime.py +320 -320
- re_common/baselibrary/utils/baseurl.py +121 -121
- re_common/baselibrary/utils/basezip.py +57 -57
- re_common/baselibrary/utils/core/__init__.py +7 -7
- re_common/baselibrary/utils/core/bottomutils.py +18 -18
- re_common/baselibrary/utils/core/mdeprecated.py +327 -327
- re_common/baselibrary/utils/core/mlamada.py +16 -16
- re_common/baselibrary/utils/core/msginfo.py +25 -25
- re_common/baselibrary/utils/core/requests_core.py +103 -103
- re_common/baselibrary/utils/fateadm.py +429 -429
- re_common/baselibrary/utils/importfun.py +123 -123
- re_common/baselibrary/utils/mfaker.py +57 -57
- re_common/baselibrary/utils/my_abc/__init__.py +3 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
- re_common/baselibrary/utils/mylogger.py +414 -414
- re_common/baselibrary/utils/myredisclient.py +861 -861
- re_common/baselibrary/utils/pipupgrade.py +21 -21
- re_common/baselibrary/utils/ringlist.py +85 -85
- re_common/baselibrary/utils/version_compare.py +36 -36
- re_common/baselibrary/utils/ydmhttp.py +126 -126
- re_common/facade/lazy_import.py +11 -11
- re_common/facade/loggerfacade.py +25 -25
- re_common/facade/mysqlfacade.py +467 -467
- re_common/facade/now.py +31 -31
- re_common/facade/sqlite3facade.py +257 -257
- re_common/facade/use/mq_use_facade.py +83 -83
- re_common/facade/use/proxy_use_facade.py +19 -19
- re_common/libtest/base_dict_test.py +19 -19
- re_common/libtest/baseavro_test.py +13 -13
- re_common/libtest/basefile_test.py +14 -14
- re_common/libtest/basemssql_test.py +77 -77
- re_common/libtest/baseodbc_test.py +7 -7
- re_common/libtest/basepandas_test.py +38 -38
- re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
- re_common/libtest/get_attr_test/settings.py +54 -54
- re_common/libtest/idencode_test.py +53 -53
- re_common/libtest/iniconfig_test.py +35 -35
- re_common/libtest/ip_test.py +34 -34
- re_common/libtest/merge_file_test.py +20 -20
- re_common/libtest/mfaker_test.py +8 -8
- re_common/libtest/mm3_test.py +31 -31
- re_common/libtest/mylogger_test.py +88 -88
- re_common/libtest/myparsel_test.py +27 -27
- re_common/libtest/mysql_test.py +151 -151
- re_common/libtest/pymongo_test.py +21 -21
- re_common/libtest/split_test.py +11 -11
- re_common/libtest/sqlite3_merge_test.py +5 -5
- re_common/libtest/sqlite3_test.py +34 -34
- re_common/libtest/tomlconfig_test.py +30 -30
- re_common/libtest/use_tools_test/__init__.py +2 -2
- re_common/libtest/user/__init__.py +4 -4
- re_common/studio/__init__.py +4 -4
- re_common/studio/assignment_expressions.py +36 -36
- re_common/studio/mydash/test1.py +18 -18
- re_common/studio/pydashstudio/first.py +9 -9
- re_common/studio/streamlitstudio/first_app.py +65 -65
- re_common/studio/streamlitstudio/uber_pickups.py +23 -23
- re_common/studio/test.py +18 -18
- re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +235 -220
- re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
- re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
- re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
- re_common/v2/baselibrary/decorators/utils.py +59 -59
- re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
- re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
- re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
- re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
- re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
- re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
- re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
- re_common/v2/baselibrary/tools/concurrency.py +35 -35
- re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
- re_common/v2/baselibrary/tools/data_processer/data_processer.py +497 -508
- re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
- re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
- re_common/v2/baselibrary/tools/dict_tools.py +44 -44
- re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
- re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
- re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
- re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
- re_common/v2/baselibrary/tools/list_tools.py +69 -69
- re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
- re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
- re_common/v2/baselibrary/tools/text_matcher.py +326 -326
- re_common/v2/baselibrary/tools/tree_processor/__init__.py +0 -0
- re_common/v2/baselibrary/tools/tree_processor/builder.py +25 -0
- re_common/v2/baselibrary/tools/tree_processor/node.py +13 -0
- re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
- re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
- re_common/v2/baselibrary/utils/author_smi.py +361 -361
- re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
- re_common/v2/baselibrary/utils/basedict.py +37 -37
- re_common/v2/baselibrary/utils/basehdfs.py +163 -163
- re_common/v2/baselibrary/utils/basepika.py +180 -180
- re_common/v2/baselibrary/utils/basetime.py +94 -77
- re_common/v2/baselibrary/utils/db.py +174 -156
- re_common/v2/baselibrary/utils/elasticsearch.py +46 -0
- re_common/v2/baselibrary/utils/json_cls.py +16 -16
- re_common/v2/baselibrary/utils/mq.py +83 -83
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
- re_common/v2/baselibrary/utils/string_bool.py +187 -186
- re_common/v2/baselibrary/utils/string_clear.py +246 -246
- re_common/v2/baselibrary/utils/string_smi.py +18 -18
- re_common/v2/baselibrary/utils/stringutils.py +312 -271
- re_common/vip/base_step_process.py +11 -11
- re_common/vip/baseencodeid.py +90 -90
- re_common/vip/changetaskname.py +28 -28
- re_common/vip/core_var.py +24 -24
- re_common/vip/mmh3Hash.py +89 -89
- re_common/vip/proxy/allproxys.py +127 -127
- re_common/vip/proxy/allproxys_thread.py +159 -159
- re_common/vip/proxy/cnki_proxy.py +153 -153
- re_common/vip/proxy/kuaidaili.py +87 -87
- re_common/vip/proxy/proxy_all.py +113 -113
- re_common/vip/proxy/update_kuaidaili_0.py +42 -42
- re_common/vip/proxy/wanfang_proxy.py +152 -152
- re_common/vip/proxy/wp_proxy_all.py +181 -181
- re_common/vip/read_rawid_to_txt.py +91 -91
- re_common/vip/title/__init__.py +5 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
- re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
- re_common/vip/title/transform/__init__.py +10 -10
- {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/LICENSE +201 -201
- {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/METADATA +16 -16
- re_common-10.0.41.dist-info/RECORD +252 -0
- {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/WHEEL +1 -1
- re_common-10.0.39.dist-info/RECORD +0 -248
- {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/top_level.txt +0 -0
|
@@ -1,302 +1,302 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
from pandas import DatetimeIndex, DataFrame
|
|
4
|
-
|
|
5
|
-
"""
|
|
6
|
-
https://www.pypandas.cn/docs/getting_started/10min.html#%E6%9F%A5%E7%9C%8B%E6%95%B0%E6%8D%AE
|
|
7
|
-
Series(一维数据);带标签的一维同构数组
|
|
8
|
-
DataFrame(二维数据);带标签的,大小可变的,二维异构表格;index(行)或 columns(列)
|
|
9
|
-
DataFrame 是 Series 的容器,Series 则是标量的容器
|
|
10
|
-
NumPy 数组只有一种数据类型,DataFrame 每列的数据类型各不相同
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class BasePandas(object):
|
|
15
|
-
|
|
16
|
-
def __init__(self):
|
|
17
|
-
pass
|
|
18
|
-
|
|
19
|
-
def create_null_dataframe(self):
|
|
20
|
-
"""
|
|
21
|
-
创建空的dataframe
|
|
22
|
-
:return: type:DataFrame
|
|
23
|
-
"""
|
|
24
|
-
df = pd.DataFrame()
|
|
25
|
-
return df
|
|
26
|
-
|
|
27
|
-
def create_series_for_list(self, list):
|
|
28
|
-
"""
|
|
29
|
-
用值列表生成 Series 时,Pandas 默认自动生成整数索引
|
|
30
|
-
pd.Series([1, 3, 5, np.nan, 6, 8])
|
|
31
|
-
:return: type:Series
|
|
32
|
-
"""
|
|
33
|
-
s = pd.Series(list)
|
|
34
|
-
# print(type(s))
|
|
35
|
-
return s
|
|
36
|
-
|
|
37
|
-
def create_time_index(self, datastring, periods):
|
|
38
|
-
"""
|
|
39
|
-
创建行标
|
|
40
|
-
含日期时间索引与标签的 NumPy 的数组
|
|
41
|
-
:param datastring: '20130101'
|
|
42
|
-
:param periods: 6
|
|
43
|
-
:return: type:DatetimeIndex
|
|
44
|
-
"""
|
|
45
|
-
dates = pd.date_range(datastring, periods=periods)
|
|
46
|
-
return dates
|
|
47
|
-
|
|
48
|
-
def create_ndarray(self, index, columns):
|
|
49
|
-
"""
|
|
50
|
-
产生指定行列的随机数据
|
|
51
|
-
:param index: 行 6
|
|
52
|
-
:param columns: 列 4
|
|
53
|
-
:return: type:ndarray
|
|
54
|
-
"""
|
|
55
|
-
return np.random.randn(index, columns)
|
|
56
|
-
|
|
57
|
-
def create_time_dataform(self, data, dates: DatetimeIndex, columns=list('ABCD')):
|
|
58
|
-
return pd.DataFrame(data, index=dates, columns=columns)
|
|
59
|
-
|
|
60
|
-
def dicts_to_dataform(self, dicts):
|
|
61
|
-
"""
|
|
62
|
-
字典转二维数据
|
|
63
|
-
{'A': 1.,
|
|
64
|
-
'B': pd.Timestamp('20130102'),
|
|
65
|
-
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
|
|
66
|
-
'D': np.array([3] * 4, dtype='int32'),
|
|
67
|
-
'E': pd.Categorical(["test", "train", "test", "train"]),
|
|
68
|
-
'F': 'foo'}
|
|
69
|
-
:param dicts:
|
|
70
|
-
:return:
|
|
71
|
-
"""
|
|
72
|
-
df = pd.DataFrame(dicts)
|
|
73
|
-
return df
|
|
74
|
-
|
|
75
|
-
def dtypes(self, df: DataFrame):
|
|
76
|
-
"""
|
|
77
|
-
DataFrame 的列的数据类型
|
|
78
|
-
:return:
|
|
79
|
-
"""
|
|
80
|
-
return df.dtypes
|
|
81
|
-
|
|
82
|
-
def head(self, df, num=5):
|
|
83
|
-
"""
|
|
84
|
-
查看前几条数据(默认前五条)
|
|
85
|
-
:param num:
|
|
86
|
-
:return:
|
|
87
|
-
"""
|
|
88
|
-
return df.head(num)
|
|
89
|
-
|
|
90
|
-
def tail(self, df, num=5):
|
|
91
|
-
"""
|
|
92
|
-
查看后几条数据(默认后五条)
|
|
93
|
-
:param num:
|
|
94
|
-
:return:
|
|
95
|
-
"""
|
|
96
|
-
return df.tail(num)
|
|
97
|
-
|
|
98
|
-
def index(self, df):
|
|
99
|
-
"""
|
|
100
|
-
显示索引
|
|
101
|
-
:param df:
|
|
102
|
-
:return:
|
|
103
|
-
"""
|
|
104
|
-
return df.index
|
|
105
|
-
|
|
106
|
-
def columns(self, df):
|
|
107
|
-
"""
|
|
108
|
-
显示列名
|
|
109
|
-
:param df:
|
|
110
|
-
:return:
|
|
111
|
-
"""
|
|
112
|
-
return df.columns
|
|
113
|
-
|
|
114
|
-
def dataform_to_numpy(self, df):
|
|
115
|
-
"""
|
|
116
|
-
输出底层数据的 NumPy 对象。
|
|
117
|
-
注意,DataFrame 的列由多种数据类型组成时,该操作耗费系统资源较大,
|
|
118
|
-
输出不包含行索引和列标签
|
|
119
|
-
:return:
|
|
120
|
-
"""
|
|
121
|
-
return df.to_numpy()
|
|
122
|
-
|
|
123
|
-
def describe(self, df):
|
|
124
|
-
"""
|
|
125
|
-
可以快速查看数据的统计摘要:
|
|
126
|
-
:param df:
|
|
127
|
-
:return:
|
|
128
|
-
"""
|
|
129
|
-
return df.describe()
|
|
130
|
-
|
|
131
|
-
def df_T(self, df):
|
|
132
|
-
"""
|
|
133
|
-
转置数据
|
|
134
|
-
:param df:
|
|
135
|
-
:return:
|
|
136
|
-
"""
|
|
137
|
-
return df.T
|
|
138
|
-
|
|
139
|
-
def sort_index(self, df):
|
|
140
|
-
"""
|
|
141
|
-
按轴排序
|
|
142
|
-
:return:
|
|
143
|
-
"""
|
|
144
|
-
return df.sort_index(axis=1, ascending=False)
|
|
145
|
-
|
|
146
|
-
def sort_values(self, df):
|
|
147
|
-
"""
|
|
148
|
-
按值排序
|
|
149
|
-
:param df:
|
|
150
|
-
:return:
|
|
151
|
-
"""
|
|
152
|
-
return df.sort_values(by='B')
|
|
153
|
-
|
|
154
|
-
def get_series(self, df):
|
|
155
|
-
"""
|
|
156
|
-
获取单列数据 等于 df.A
|
|
157
|
-
:param df:
|
|
158
|
-
:return:
|
|
159
|
-
"""
|
|
160
|
-
return df["A"]
|
|
161
|
-
|
|
162
|
-
def get_spilt(self, df):
|
|
163
|
-
"""
|
|
164
|
-
切片行 或者 df['20130102':'20130104']
|
|
165
|
-
:return:
|
|
166
|
-
"""
|
|
167
|
-
return df[0:3]
|
|
168
|
-
|
|
169
|
-
def get_loc(self, df, dates: DatetimeIndex):
|
|
170
|
-
"""
|
|
171
|
-
标签提取一行数据
|
|
172
|
-
:return:
|
|
173
|
-
"""
|
|
174
|
-
return df.loc[dates[0]]
|
|
175
|
-
|
|
176
|
-
def get_many_loc(self, df):
|
|
177
|
-
"""
|
|
178
|
-
用标签选择多列数据
|
|
179
|
-
:return:
|
|
180
|
-
"""
|
|
181
|
-
return df.loc[:, ['A', 'B']]
|
|
182
|
-
|
|
183
|
-
def get_many_loc_index(self, df):
|
|
184
|
-
"""
|
|
185
|
-
用标签切片,包含行与列结束点
|
|
186
|
-
:param df:
|
|
187
|
-
:return:
|
|
188
|
-
"""
|
|
189
|
-
return df.loc['20130102':'20130104', ['A', 'B']]
|
|
190
|
-
|
|
191
|
-
def get_onedata(self, df, dates):
|
|
192
|
-
"""
|
|
193
|
-
提取标量值
|
|
194
|
-
快速访问标量,与上述方法等效:df.at[dates[0], 'A']
|
|
195
|
-
:return:
|
|
196
|
-
"""
|
|
197
|
-
return df.loc[dates[0], 'A']
|
|
198
|
-
|
|
199
|
-
def get_index(self, df):
|
|
200
|
-
"""
|
|
201
|
-
获取行 用整数位置选择
|
|
202
|
-
:return:
|
|
203
|
-
"""
|
|
204
|
-
return df.iloc[3]
|
|
205
|
-
|
|
206
|
-
def get_qiepian(self, df):
|
|
207
|
-
"""
|
|
208
|
-
3:5 为行 0:2列
|
|
209
|
-
用整数列表按位置切片
|
|
210
|
-
df.iloc[[1, 2, 4], [0, 2]]
|
|
211
|
-
显式整行切片
|
|
212
|
-
df.iloc[1:3, :]
|
|
213
|
-
显式整列切片
|
|
214
|
-
df.iloc[:, 1:3]
|
|
215
|
-
显式提取值
|
|
216
|
-
df.iloc[1, 1]
|
|
217
|
-
快速访问标量,与上述方法等效:
|
|
218
|
-
df.iat[1, 1]
|
|
219
|
-
:param df:
|
|
220
|
-
:return:
|
|
221
|
-
"""
|
|
222
|
-
return df.iloc[3:5, 0:2]
|
|
223
|
-
|
|
224
|
-
def select_data(self, df):
|
|
225
|
-
"""
|
|
226
|
-
用单列的值选择行数据
|
|
227
|
-
选择 DataFrame 里满足条件的值:
|
|
228
|
-
df[df > 0]
|
|
229
|
-
:param df:
|
|
230
|
-
:return:
|
|
231
|
-
"""
|
|
232
|
-
return df[df.A > 0]
|
|
233
|
-
|
|
234
|
-
def copy(self, df):
|
|
235
|
-
df2 = df.copy()
|
|
236
|
-
return df2
|
|
237
|
-
|
|
238
|
-
def add_col(self, df):
|
|
239
|
-
"""
|
|
240
|
-
添加列
|
|
241
|
-
:return:
|
|
242
|
-
"""
|
|
243
|
-
df['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
|
|
244
|
-
return df
|
|
245
|
-
|
|
246
|
-
def isin(self, df):
|
|
247
|
-
"""
|
|
248
|
-
用 isin() 筛选 行
|
|
249
|
-
:return:
|
|
250
|
-
"""
|
|
251
|
-
return df[df['E'].isin(['two', 'four'])]
|
|
252
|
-
|
|
253
|
-
def set_value(self, df):
|
|
254
|
-
"""
|
|
255
|
-
按标签赋值
|
|
256
|
-
df.at[dates[0], 'A'] = 0
|
|
257
|
-
按位置赋值:
|
|
258
|
-
df.iat[0, 1] = 0
|
|
259
|
-
按 NumPy 数组赋值:
|
|
260
|
-
df.loc[:, 'D'] = np.array([5] * len(df))
|
|
261
|
-
用 where 条件赋值:
|
|
262
|
-
df2 = df.copy()
|
|
263
|
-
df2[df2 > 0] = -df2
|
|
264
|
-
Pandas 主要用 np.nan 表示缺失数据
|
|
265
|
-
:param df:
|
|
266
|
-
:return:
|
|
267
|
-
"""
|
|
268
|
-
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
|
|
269
|
-
df['F'] = s1
|
|
270
|
-
|
|
271
|
-
def reindex(self, df, dates):
|
|
272
|
-
"""
|
|
273
|
-
重建索引(reindex)可以更改、添加、删除指定轴的索引,并返回数据副本,即不更改原数据。
|
|
274
|
-
:param df:
|
|
275
|
-
:return:
|
|
276
|
-
"""
|
|
277
|
-
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
|
|
278
|
-
df1.loc[dates[0]:dates[1], 'E'] = 1
|
|
279
|
-
|
|
280
|
-
def dropna(self, df):
|
|
281
|
-
"""
|
|
282
|
-
删除所有含缺失值的行:
|
|
283
|
-
:param df:
|
|
284
|
-
:return:
|
|
285
|
-
"""
|
|
286
|
-
return df.dropna(how='any')
|
|
287
|
-
|
|
288
|
-
def fillna(self, df):
|
|
289
|
-
"""
|
|
290
|
-
填充缺失值
|
|
291
|
-
:param df:
|
|
292
|
-
:return:
|
|
293
|
-
"""
|
|
294
|
-
return df.fillna(value=5)
|
|
295
|
-
|
|
296
|
-
def isna(self,df):
|
|
297
|
-
"""
|
|
298
|
-
提取 nan 值的布尔掩码
|
|
299
|
-
:param df:
|
|
300
|
-
:return:
|
|
301
|
-
"""
|
|
302
|
-
return pd.isna(df)
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from pandas import DatetimeIndex, DataFrame
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
https://www.pypandas.cn/docs/getting_started/10min.html#%E6%9F%A5%E7%9C%8B%E6%95%B0%E6%8D%AE
|
|
7
|
+
Series(一维数据);带标签的一维同构数组
|
|
8
|
+
DataFrame(二维数据);带标签的,大小可变的,二维异构表格;index(行)或 columns(列)
|
|
9
|
+
DataFrame 是 Series 的容器,Series 则是标量的容器
|
|
10
|
+
NumPy 数组只有一种数据类型,DataFrame 每列的数据类型各不相同
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BasePandas(object):
|
|
15
|
+
|
|
16
|
+
def __init__(self):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
def create_null_dataframe(self):
|
|
20
|
+
"""
|
|
21
|
+
创建空的dataframe
|
|
22
|
+
:return: type:DataFrame
|
|
23
|
+
"""
|
|
24
|
+
df = pd.DataFrame()
|
|
25
|
+
return df
|
|
26
|
+
|
|
27
|
+
def create_series_for_list(self, list):
|
|
28
|
+
"""
|
|
29
|
+
用值列表生成 Series 时,Pandas 默认自动生成整数索引
|
|
30
|
+
pd.Series([1, 3, 5, np.nan, 6, 8])
|
|
31
|
+
:return: type:Series
|
|
32
|
+
"""
|
|
33
|
+
s = pd.Series(list)
|
|
34
|
+
# print(type(s))
|
|
35
|
+
return s
|
|
36
|
+
|
|
37
|
+
def create_time_index(self, datastring, periods):
|
|
38
|
+
"""
|
|
39
|
+
创建行标
|
|
40
|
+
含日期时间索引与标签的 NumPy 的数组
|
|
41
|
+
:param datastring: '20130101'
|
|
42
|
+
:param periods: 6
|
|
43
|
+
:return: type:DatetimeIndex
|
|
44
|
+
"""
|
|
45
|
+
dates = pd.date_range(datastring, periods=periods)
|
|
46
|
+
return dates
|
|
47
|
+
|
|
48
|
+
def create_ndarray(self, index, columns):
|
|
49
|
+
"""
|
|
50
|
+
产生指定行列的随机数据
|
|
51
|
+
:param index: 行 6
|
|
52
|
+
:param columns: 列 4
|
|
53
|
+
:return: type:ndarray
|
|
54
|
+
"""
|
|
55
|
+
return np.random.randn(index, columns)
|
|
56
|
+
|
|
57
|
+
def create_time_dataform(self, data, dates: DatetimeIndex, columns=list('ABCD')):
|
|
58
|
+
return pd.DataFrame(data, index=dates, columns=columns)
|
|
59
|
+
|
|
60
|
+
def dicts_to_dataform(self, dicts):
|
|
61
|
+
"""
|
|
62
|
+
字典转二维数据
|
|
63
|
+
{'A': 1.,
|
|
64
|
+
'B': pd.Timestamp('20130102'),
|
|
65
|
+
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
|
|
66
|
+
'D': np.array([3] * 4, dtype='int32'),
|
|
67
|
+
'E': pd.Categorical(["test", "train", "test", "train"]),
|
|
68
|
+
'F': 'foo'}
|
|
69
|
+
:param dicts:
|
|
70
|
+
:return:
|
|
71
|
+
"""
|
|
72
|
+
df = pd.DataFrame(dicts)
|
|
73
|
+
return df
|
|
74
|
+
|
|
75
|
+
def dtypes(self, df: DataFrame):
|
|
76
|
+
"""
|
|
77
|
+
DataFrame 的列的数据类型
|
|
78
|
+
:return:
|
|
79
|
+
"""
|
|
80
|
+
return df.dtypes
|
|
81
|
+
|
|
82
|
+
def head(self, df, num=5):
|
|
83
|
+
"""
|
|
84
|
+
查看前几条数据(默认前五条)
|
|
85
|
+
:param num:
|
|
86
|
+
:return:
|
|
87
|
+
"""
|
|
88
|
+
return df.head(num)
|
|
89
|
+
|
|
90
|
+
def tail(self, df, num=5):
|
|
91
|
+
"""
|
|
92
|
+
查看后几条数据(默认后五条)
|
|
93
|
+
:param num:
|
|
94
|
+
:return:
|
|
95
|
+
"""
|
|
96
|
+
return df.tail(num)
|
|
97
|
+
|
|
98
|
+
def index(self, df):
|
|
99
|
+
"""
|
|
100
|
+
显示索引
|
|
101
|
+
:param df:
|
|
102
|
+
:return:
|
|
103
|
+
"""
|
|
104
|
+
return df.index
|
|
105
|
+
|
|
106
|
+
def columns(self, df):
|
|
107
|
+
"""
|
|
108
|
+
显示列名
|
|
109
|
+
:param df:
|
|
110
|
+
:return:
|
|
111
|
+
"""
|
|
112
|
+
return df.columns
|
|
113
|
+
|
|
114
|
+
def dataform_to_numpy(self, df):
|
|
115
|
+
"""
|
|
116
|
+
输出底层数据的 NumPy 对象。
|
|
117
|
+
注意,DataFrame 的列由多种数据类型组成时,该操作耗费系统资源较大,
|
|
118
|
+
输出不包含行索引和列标签
|
|
119
|
+
:return:
|
|
120
|
+
"""
|
|
121
|
+
return df.to_numpy()
|
|
122
|
+
|
|
123
|
+
def describe(self, df):
|
|
124
|
+
"""
|
|
125
|
+
可以快速查看数据的统计摘要:
|
|
126
|
+
:param df:
|
|
127
|
+
:return:
|
|
128
|
+
"""
|
|
129
|
+
return df.describe()
|
|
130
|
+
|
|
131
|
+
def df_T(self, df):
|
|
132
|
+
"""
|
|
133
|
+
转置数据
|
|
134
|
+
:param df:
|
|
135
|
+
:return:
|
|
136
|
+
"""
|
|
137
|
+
return df.T
|
|
138
|
+
|
|
139
|
+
def sort_index(self, df):
|
|
140
|
+
"""
|
|
141
|
+
按轴排序
|
|
142
|
+
:return:
|
|
143
|
+
"""
|
|
144
|
+
return df.sort_index(axis=1, ascending=False)
|
|
145
|
+
|
|
146
|
+
def sort_values(self, df):
|
|
147
|
+
"""
|
|
148
|
+
按值排序
|
|
149
|
+
:param df:
|
|
150
|
+
:return:
|
|
151
|
+
"""
|
|
152
|
+
return df.sort_values(by='B')
|
|
153
|
+
|
|
154
|
+
def get_series(self, df):
|
|
155
|
+
"""
|
|
156
|
+
获取单列数据 等于 df.A
|
|
157
|
+
:param df:
|
|
158
|
+
:return:
|
|
159
|
+
"""
|
|
160
|
+
return df["A"]
|
|
161
|
+
|
|
162
|
+
def get_spilt(self, df):
|
|
163
|
+
"""
|
|
164
|
+
切片行 或者 df['20130102':'20130104']
|
|
165
|
+
:return:
|
|
166
|
+
"""
|
|
167
|
+
return df[0:3]
|
|
168
|
+
|
|
169
|
+
def get_loc(self, df, dates: DatetimeIndex):
|
|
170
|
+
"""
|
|
171
|
+
标签提取一行数据
|
|
172
|
+
:return:
|
|
173
|
+
"""
|
|
174
|
+
return df.loc[dates[0]]
|
|
175
|
+
|
|
176
|
+
def get_many_loc(self, df):
|
|
177
|
+
"""
|
|
178
|
+
用标签选择多列数据
|
|
179
|
+
:return:
|
|
180
|
+
"""
|
|
181
|
+
return df.loc[:, ['A', 'B']]
|
|
182
|
+
|
|
183
|
+
def get_many_loc_index(self, df):
|
|
184
|
+
"""
|
|
185
|
+
用标签切片,包含行与列结束点
|
|
186
|
+
:param df:
|
|
187
|
+
:return:
|
|
188
|
+
"""
|
|
189
|
+
return df.loc['20130102':'20130104', ['A', 'B']]
|
|
190
|
+
|
|
191
|
+
def get_onedata(self, df, dates):
|
|
192
|
+
"""
|
|
193
|
+
提取标量值
|
|
194
|
+
快速访问标量,与上述方法等效:df.at[dates[0], 'A']
|
|
195
|
+
:return:
|
|
196
|
+
"""
|
|
197
|
+
return df.loc[dates[0], 'A']
|
|
198
|
+
|
|
199
|
+
def get_index(self, df):
|
|
200
|
+
"""
|
|
201
|
+
获取行 用整数位置选择
|
|
202
|
+
:return:
|
|
203
|
+
"""
|
|
204
|
+
return df.iloc[3]
|
|
205
|
+
|
|
206
|
+
def get_qiepian(self, df):
|
|
207
|
+
"""
|
|
208
|
+
3:5 为行 0:2列
|
|
209
|
+
用整数列表按位置切片
|
|
210
|
+
df.iloc[[1, 2, 4], [0, 2]]
|
|
211
|
+
显式整行切片
|
|
212
|
+
df.iloc[1:3, :]
|
|
213
|
+
显式整列切片
|
|
214
|
+
df.iloc[:, 1:3]
|
|
215
|
+
显式提取值
|
|
216
|
+
df.iloc[1, 1]
|
|
217
|
+
快速访问标量,与上述方法等效:
|
|
218
|
+
df.iat[1, 1]
|
|
219
|
+
:param df:
|
|
220
|
+
:return:
|
|
221
|
+
"""
|
|
222
|
+
return df.iloc[3:5, 0:2]
|
|
223
|
+
|
|
224
|
+
def select_data(self, df):
|
|
225
|
+
"""
|
|
226
|
+
用单列的值选择行数据
|
|
227
|
+
选择 DataFrame 里满足条件的值:
|
|
228
|
+
df[df > 0]
|
|
229
|
+
:param df:
|
|
230
|
+
:return:
|
|
231
|
+
"""
|
|
232
|
+
return df[df.A > 0]
|
|
233
|
+
|
|
234
|
+
def copy(self, df):
|
|
235
|
+
df2 = df.copy()
|
|
236
|
+
return df2
|
|
237
|
+
|
|
238
|
+
def add_col(self, df):
|
|
239
|
+
"""
|
|
240
|
+
添加列
|
|
241
|
+
:return:
|
|
242
|
+
"""
|
|
243
|
+
df['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
|
|
244
|
+
return df
|
|
245
|
+
|
|
246
|
+
def isin(self, df):
|
|
247
|
+
"""
|
|
248
|
+
用 isin() 筛选 行
|
|
249
|
+
:return:
|
|
250
|
+
"""
|
|
251
|
+
return df[df['E'].isin(['two', 'four'])]
|
|
252
|
+
|
|
253
|
+
def set_value(self, df):
|
|
254
|
+
"""
|
|
255
|
+
按标签赋值
|
|
256
|
+
df.at[dates[0], 'A'] = 0
|
|
257
|
+
按位置赋值:
|
|
258
|
+
df.iat[0, 1] = 0
|
|
259
|
+
按 NumPy 数组赋值:
|
|
260
|
+
df.loc[:, 'D'] = np.array([5] * len(df))
|
|
261
|
+
用 where 条件赋值:
|
|
262
|
+
df2 = df.copy()
|
|
263
|
+
df2[df2 > 0] = -df2
|
|
264
|
+
Pandas 主要用 np.nan 表示缺失数据
|
|
265
|
+
:param df:
|
|
266
|
+
:return:
|
|
267
|
+
"""
|
|
268
|
+
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
|
|
269
|
+
df['F'] = s1
|
|
270
|
+
|
|
271
|
+
def reindex(self, df, dates):
|
|
272
|
+
"""
|
|
273
|
+
重建索引(reindex)可以更改、添加、删除指定轴的索引,并返回数据副本,即不更改原数据。
|
|
274
|
+
:param df:
|
|
275
|
+
:return:
|
|
276
|
+
"""
|
|
277
|
+
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
|
|
278
|
+
df1.loc[dates[0]:dates[1], 'E'] = 1
|
|
279
|
+
|
|
280
|
+
def dropna(self, df):
|
|
281
|
+
"""
|
|
282
|
+
删除所有含缺失值的行:
|
|
283
|
+
:param df:
|
|
284
|
+
:return:
|
|
285
|
+
"""
|
|
286
|
+
return df.dropna(how='any')
|
|
287
|
+
|
|
288
|
+
def fillna(self, df):
|
|
289
|
+
"""
|
|
290
|
+
填充缺失值
|
|
291
|
+
:param df:
|
|
292
|
+
:return:
|
|
293
|
+
"""
|
|
294
|
+
return df.fillna(value=5)
|
|
295
|
+
|
|
296
|
+
def isna(self,df):
|
|
297
|
+
"""
|
|
298
|
+
提取 nan 值的布尔掩码
|
|
299
|
+
:param df:
|
|
300
|
+
:return:
|
|
301
|
+
"""
|
|
302
|
+
return pd.isna(df)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from peewee import SqliteDatabase
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class BasePeewee(object):
|
|
5
|
-
|
|
6
|
-
def __init__(self):
|
|
7
|
-
self.db = None
|
|
8
|
-
|
|
9
|
-
def connent_sqlite(self, path):
|
|
10
|
-
self.db = SqliteDatabase(path)
|
|
11
|
-
return self.db
|
|
1
|
+
from peewee import SqliteDatabase
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BasePeewee(object):
|
|
5
|
+
|
|
6
|
+
def __init__(self):
|
|
7
|
+
self.db = None
|
|
8
|
+
|
|
9
|
+
def connent_sqlite(self, path):
|
|
10
|
+
self.db = SqliteDatabase(path)
|
|
11
|
+
return self.db
|