Functions-d 1.0.2__tar.gz → 1.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,7 +27,6 @@ import numpy as np
27
27
  from pypinyin import lazy_pinyin
28
28
  from selenium import webdriver
29
29
  from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
30
- from selenium import webdriver
31
30
  from selenium.webdriver.edge.service import Service
32
31
  from selenium.webdriver.edge.options import Options
33
32
  from webdriver_manager.microsoft import EdgeChromiumDriverManager
@@ -35,18 +34,16 @@ import platform
35
34
  import requests
36
35
 
37
36
 
38
- class DataProcessingAndMessaging:
37
+ class DataProcessingAndMessaging:
39
38
  def __init__(self):
40
- # 获取调用者的堆栈信息
39
+ # -------------------------- 1. 主类日志初始化 --------------------------
40
+ # 获取调用者的堆栈信息(主类日志关联调用脚本)
41
41
  caller_frame = inspect.stack()[1]
42
- # 获取调用者的文件名
43
42
  caller_filename = caller_frame.filename
44
- # 获取主脚本的基本名称(不包含路径和后缀)
45
43
  log_file = os.path.splitext(os.path.basename(caller_filename))[0] + ".log"
46
- # 初始化日志记录
47
- logging.basicConfig(filename=log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
48
- self.logger = logging.getLogger()
49
44
 
45
+ # 初始化主类日志记录器
46
+ self.logger = logging.getLogger()
50
47
  self.logger.setLevel(logging.INFO)
51
48
  # 避免重复添加处理器
52
49
  if not self.logger.handlers:
@@ -60,9 +57,9 @@ class DataProcessingAndMessaging:
60
57
  self.logger.addHandler(file_handler)
61
58
  self.logger.addHandler(console_handler)
62
59
 
63
-
64
60
  self.logger.info("初始化 DataProcessingAndMessaging 类")
65
- # print("初始化 DataProcessingAndMessaging 类")
61
+
62
+ # -------------------------- 2. 主类核心参数初始化 --------------------------
66
63
  self.start_time = None
67
64
  self.current_script_name = None
68
65
  self.log_filename = None
@@ -70,10 +67,30 @@ class DataProcessingAndMessaging:
70
67
  self.current_path = None
71
68
  self.path = None
72
69
 
70
+ # 企业微信消息发送参数
73
71
  self.corpid = "wxd4e113eb4c0136b9"
74
72
  self.corpsecret = "PMfPOv2Qqq0iXZAdWHF7WdaW4kkWUZcwyGE4NZtve3k"
75
73
  self.agentid = "1000026"
76
74
 
75
+ # -------------------------- 3. 企业微信文档功能初始化(原WechatWorkDocs) --------------------------
76
+ # 企业微信文档参数(独立配置)
77
+ self.WECHAT_DOC_CORP_ID = "wxd4e113eb4c0136b9"
78
+ self.WECHAT_DOC_SECRET = "PMfPOv2Qqq0iXZAdWHF7WdaW4kkWUZcwyGE4NZtve3k"
79
+ self.WECHAT_DOC_SPACE_ID = None # 空间ID,根目录可留空
80
+ self.WECHAT_DOC_LOG_FILE = "docs_operation_log.txt" # 文档功能独立日志文件
81
+ self.wechat_doc_access_token = None
82
+
83
+ # 初始化文档功能的独立日志(确保日志文件存在)
84
+ if not os.path.exists(self.WECHAT_DOC_LOG_FILE):
85
+ with open(self.WECHAT_DOC_LOG_FILE, 'w', encoding='utf-8') as f:
86
+ f.write(f"文档操作日志 - 开始于 {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
87
+
88
+ # 预获取文档access_token(失败不中断主类初始化,后续操作自动重试)
89
+ try:
90
+ self.wechat_doc_access_token = self._wechat_doc_get_access_token()
91
+ except Exception as e:
92
+ self.logger.warning(f"企业微信文档access_token初始化失败(后续操作会自动重试):{str(e)}")
93
+
77
94
 
78
95
 
79
96
  def init_edge_driver(self, headless=True):
@@ -405,7 +422,6 @@ class DataProcessingAndMessaging:
405
422
  print(f"Hive原始错误详情:\n{full_error_details}")
406
423
  raise
407
424
 
408
-
409
425
  def run_sql_2(self, path=None, sql_name=None, channel=False, sql_content=None):
410
426
  """
411
427
  执行SQL(支持直接传入SQL内容或从文件读取)- 优化版Hive连接
@@ -839,6 +855,335 @@ class DataProcessingAndMessaging:
839
855
  self.logger.error(f"车辆分配处理出错: {str(e)}")
840
856
  raise
841
857
 
858
+ def _wechat_doc_get_access_token(self):
859
+ """获取企业微信文档接口访问令牌(内部辅助方法)"""
860
+ url = f"https://qyapi.weixin.qq.com/cgi-bin/gettoken?corpid={self.WECHAT_DOC_CORP_ID}&corpsecret={self.WECHAT_DOC_SECRET}"
861
+ response = requests.get(url)
862
+ result = response.json()
863
+
864
+ if result.get("errcode") != 0:
865
+ error_msg = f"获取文档access_token失败: {result.get('errmsg')}"
866
+ self._wechat_doc_log(error_msg)
867
+ raise Exception(error_msg)
868
+
869
+ self._wechat_doc_log("成功获取文档access_token")
870
+ return result.get("access_token")
871
+
872
+ def _wechat_doc_log(self, message):
873
+ """企业微信文档操作日志记录(单独日志文件,内部辅助方法)"""
874
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
875
+ log_entry = f"{timestamp} - {message}\n"
876
+ with open(self.WECHAT_DOC_LOG_FILE, 'a', encoding='utf-8') as f:
877
+ f.write(log_entry)
878
+ # 同时在主日志中同步记录(便于调试)
879
+ self.main_logger.info(f"[企业微信文档] {message}")
880
+
881
+ def _wechat_doc_refresh_token_if_needed(self):
882
+ """令牌过期时自动刷新(内部辅助方法)"""
883
+ if not self.wechat_doc_access_token:
884
+ self.wechat_doc_access_token = self._wechat_doc_get_access_token()
885
+
886
+
887
+ # -------------------------- 企业微信文档功能:核心辅助方法(原WechatWorkDocs内部方法) --------------------------
888
+ def _wechat_doc_get_access_token(self):
889
+ """(文档功能独立)获取企业微信文档访问令牌"""
890
+ url = f"https://qyapi.weixin.qq.com/cgi-bin/gettoken?corpid={self.WECHAT_DOC_CORP_ID}&corpsecret={self.WECHAT_DOC_SECRET}"
891
+ response = requests.get(url)
892
+ result = response.json()
893
+
894
+ if result.get("errcode") != 0:
895
+ error_msg = f"获取文档access_token失败: {result.get('errmsg')}"
896
+ self._wechat_doc_log(error_msg)
897
+ raise Exception(error_msg)
898
+
899
+ # self._wechat_doc_log("成功获取文档access_token")
900
+ return result.get("access_token")
901
+
902
+ def _wechat_doc_log(self, message):
903
+ """(文档功能独立)文档操作日志记录(不依赖主类日志)"""
904
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
905
+ log_entry = f"{timestamp} - {message}\n"
906
+ with open(self.WECHAT_DOC_LOG_FILE, 'a', encoding='utf-8') as f:
907
+ f.write(log_entry)
908
+ # 同时在主类日志中同步记录(便于统一排查)
909
+ self.logger.info(f"[企业微信文档] {message}")
910
+
911
+ def _wechat_doc_refresh_token_if_needed(self):
912
+ """(文档功能独立)自动刷新过期的access_token"""
913
+ if not self.wechat_doc_access_token:
914
+ self.wechat_doc_access_token = self._wechat_doc_get_access_token()
915
+
916
+ # -------------------------- 企业微信文档功能:对外方法(原WechatWorkDocs公开方法) --------------------------
917
+ def wx_create_table(self, sheet_name, admin_users=None):
918
+ """
919
+ 创建企业微信智能表格
920
+ :param sheet_name: 表格名称
921
+ :param admin_users: 管理员用户ID列表(可选)
922
+ :return: 新建表格的docid和url字典
923
+ """
924
+ self._wechat_doc_refresh_token_if_needed()
925
+
926
+ url = f"https://qyapi.weixin.qq.com/cgi-bin/wedoc/create_doc?access_token={self.wechat_doc_access_token}"
927
+ data = {
928
+ "spaceid": self.WECHAT_DOC_SPACE_ID,
929
+ "fatherid": self.WECHAT_DOC_SPACE_ID, # 在根目录创建
930
+ "doc_type": 10, # 10表示智能表格
931
+ "doc_name": sheet_name
932
+ }
933
+
934
+ if admin_users:
935
+ data["admin_users"] = admin_users
936
+
937
+ response = requests.post(url, data=json.dumps(data))
938
+ result = response.json()
939
+
940
+ if result.get("errcode") != 0:
941
+ error_msg = f"创建智能表格失败: {result.get('errmsg')}"
942
+ self._wechat_doc_log(error_msg)
943
+ raise Exception(error_msg)
944
+
945
+ # 记录操作日志
946
+ docid = result.get("docid")
947
+ self._wechat_doc_log(f"创建智能表格成功:名称={sheet_name},docid={docid}")
948
+ return {
949
+ "docid": docid,
950
+ "url": result.get("url")
951
+ }
952
+
953
+ def wx_get_sheets(self, docid):
954
+ """
955
+ 查询企业微信文档的子表信息
956
+ :param docid: 文档ID
957
+ :return: 子表列表(包含子表ID、标题、类型等信息)
958
+ """
959
+ self._wechat_doc_log(f"开始查询文档[{docid}]的子表信息")
960
+ self._wechat_doc_refresh_token_if_needed()
961
+
962
+ url = f"https://qyapi.weixin.qq.com/cgi-bin/wedoc/smartsheet/get_sheet?access_token={self.wechat_doc_access_token}"
963
+ data = {
964
+ "docid": docid,
965
+ "need_all_type_sheet": True
966
+ }
967
+
968
+ response = requests.post(url, data=json.dumps(data))
969
+ result = response.json()
970
+
971
+ if result.get("errcode") != 0:
972
+ error_msg = f"查询子表失败: {result.get('errmsg')}"
973
+ self._wechat_doc_log(error_msg)
974
+ raise Exception(error_msg)
975
+
976
+ sheet_list = result.get("sheet_list", [])
977
+ # 记录查询结果
978
+ self._wechat_doc_log(f"查询文档[{docid}]子表完成,共找到{len(sheet_list)}个子表")
979
+ for sheet in sheet_list:
980
+ sheet_info = (f"子表信息 - ID: {sheet['sheet_id']}, 标题: {sheet['title']}, "
981
+ f"类型: {sheet['type']}, 可见性: {'可见' if sheet['is_visible'] else '不可见'}")
982
+ self._wechat_doc_log(sheet_info)
983
+ print(f"[企业微信文档] {sheet_info}") # 控制台同步输出
984
+
985
+ return sheet_list
986
+
987
+ def wx_run_excel(self, docid, sheet_id):
988
+ """
989
+ 从企业微信智能表格导出记录到Excel(新增:含“日期”“时间”关键词列自动转为北京时间)
990
+ :param docid: 文档ID
991
+ :param sheet_id: 子表ID
992
+ :param excel_filename: 导出的Excel文件路径(含文件名)
993
+ """
994
+ self._wechat_doc_log(f"开始读取文档[{docid}]子表[{sheet_id}]的记录到dataframe")
995
+ self._wechat_doc_refresh_token_if_needed()
996
+
997
+ # -------------------------- 1. 分页获取企业微信表格记录(修复重复代码,保留核心逻辑) --------------------------
998
+ url = f"https://qyapi.weixin.qq.com/cgi-bin/wedoc/smartsheet/get_records?access_token={self.wechat_doc_access_token}"
999
+ data = {
1000
+ "docid": docid,
1001
+ "sheet_id": sheet_id,
1002
+ "key_type": "CELL_VALUE_KEY_TYPE_FIELD_TITLE",
1003
+ "limit": 1000,
1004
+ "offset": 0
1005
+ }
1006
+
1007
+ all_records = []
1008
+ has_more = True
1009
+ while has_more:
1010
+ response = requests.post(url, data=json.dumps(data))
1011
+ result = response.json()
1012
+ if result.get("errcode") != 0:
1013
+ error_msg = f"查询记录失败: {result.get('errmsg')}"
1014
+ self._wechat_doc_log(error_msg)
1015
+ raise Exception(error_msg)
1016
+ records = result.get("records", [])
1017
+ all_records.extend(records)
1018
+ has_more = result.get("has_more", False)
1019
+ data["offset"] = result.get("next", 0)
1020
+
1021
+ if not all_records:
1022
+ msg = f"文档[{docid}]子表[{sheet_id}]没有找到记录,无需导出"
1023
+ self._wechat_doc_log(msg)
1024
+ print(f"[企业微信文档] {msg}")
1025
+ return
1026
+
1027
+ # -------------------------- 2. 核心工具:统一时间转换(兼容所有格式) --------------------------
1028
+ def _unified_time_convert(value):
1029
+ """
1030
+ 统一转换工具:支持DateTimeFieldProperty、毫秒/秒级时间戳、文本嵌套等格式,输出北京时间
1031
+ :param value: 原始数据(任意格式)
1032
+ :return: 北京时间字符串(YYYY-MM-DD HH:MM:SS)或原始值(转换失败)
1033
+ """
1034
+ # 空值直接返回
1035
+ if value is None or str(value).strip() in ["", "None", "nan"]:
1036
+ return ""
1037
+
1038
+ # 场景1:处理DateTimeFieldProperty类型(企业微信日期字段标准格式)
1039
+ # 格式1:字典 → {"type":"DateTimeFieldProperty","value":1759852800000}
1040
+ if isinstance(value, dict) and value.get("type") == "DateTimeFieldProperty":
1041
+ ts = value.get("value")
1042
+ if isinstance(ts, (int, float)):
1043
+ return _timestamp_to_beijing(ts)
1044
+ else:
1045
+ self._wechat_doc_log(f"DateTimeFieldProperty时间戳非数字:{value}")
1046
+ return str(value)
1047
+
1048
+ # 格式2:列表嵌套字典 → [{"type":"DateTimeFieldProperty","value":1759852800000}]
1049
+ elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
1050
+ nested_dict = value[0]
1051
+ # 子场景1:嵌套DateTimeFieldProperty
1052
+ if nested_dict.get("type") == "DateTimeFieldProperty":
1053
+ ts = nested_dict.get("value")
1054
+ if isinstance(ts, (int, float)):
1055
+ return _timestamp_to_beijing(ts)
1056
+ else:
1057
+ self._wechat_doc_log(f"嵌套DateTimeFieldProperty时间戳非数字:{value}")
1058
+ return str(value)
1059
+ # 子场景2:嵌套文本(如[{"text":"1759852800000"}])
1060
+ elif "text" in nested_dict:
1061
+ text_val = nested_dict["text"]
1062
+ return _extract_and_convert_ts(text_val)
1063
+ # 其他嵌套格式(如[{"title":"2025-10-07"}])
1064
+ else:
1065
+ return str(nested_dict.get("title", nested_dict.get("text", str(value))))
1066
+
1067
+ # 场景2:纯文本/数字(直接提取时间戳)
1068
+ else:
1069
+ return _extract_and_convert_ts(str(value))
1070
+
1071
+ def _timestamp_to_beijing(timestamp):
1072
+ """毫秒级时间戳转北京时间(内部调用,不对外暴露)"""
1073
+ try:
1074
+ # 统一转为毫秒级(若传入秒级,自动补全)
1075
+ ts = int(timestamp)
1076
+ if len(str(ts)) == 10:
1077
+ ts *= 1000 # 秒级 → 毫秒级
1078
+ # UTC时间转北京时间(+8小时)
1079
+ utc_dt = datetime.datetime.utcfromtimestamp(ts / 1000)
1080
+ beijing_dt = utc_dt + datetime.timedelta(hours=8)
1081
+ return beijing_dt.strftime("%Y-%m-%d %H:%M:%S")
1082
+ except (ValueError, OverflowError) as e:
1083
+ self._wechat_doc_log(f"时间戳转换失败(值:{timestamp}),错误:{str(e)}")
1084
+ return str(timestamp)
1085
+
1086
+ def _extract_and_convert_ts(raw_text):
1087
+ """从文本中提取10-13位时间戳并转换(内部调用,不对外暴露)"""
1088
+ # 用正则提取文本中的10-13位数字(时间戳特征)
1089
+ ts_match = re.search(r"\d{10,13}", raw_text)
1090
+ if ts_match:
1091
+ ts = ts_match.group()
1092
+ return _timestamp_to_beijing(ts)
1093
+ # 无时间戳则返回原始文本(如已是“2025-10-07”格式)
1094
+ else:
1095
+ return raw_text
1096
+
1097
+ # -------------------------- 3. 格式化记录(新增:标记日期相关列并转换) --------------------------
1098
+ rows = []
1099
+ # 先获取所有自定义字段名(用于后续标记日期列)
1100
+ custom_fields = set()
1101
+ for record in all_records[:1]: # 取第一条记录即可(所有记录字段结构一致)
1102
+ if record.get("values"):
1103
+ custom_fields = set(record["values"].keys())
1104
+ break
1105
+ # 系统字段 + 自定义字段 = 所有字段
1106
+ all_fields = ["记录ID", "创建时间", "更新时间", "最后编辑者"] + list(custom_fields)
1107
+ # 标记含“日期”“时间”关键词的列(需转换的目标列)
1108
+ date_related_columns = [col for col in all_fields if any(kw in str(col) for kw in ["日期", "时间"])]
1109
+ self._wechat_doc_log(f"识别到需转换的日期相关列:{date_related_columns}")
1110
+
1111
+ # 循环处理每条记录
1112
+ for record in all_records:
1113
+ # 系统字段初始化(创建时间、更新时间已在日期列中,优先转换)
1114
+ row = {
1115
+ "记录ID": record.get("record_id"),
1116
+ "创建时间": _unified_time_convert(record.get("create_time")), # 系统时间戳转换
1117
+ "更新时间": _unified_time_convert(record.get("update_time")), # 系统时间戳转换
1118
+ "最后编辑者": record.get("updater_name")
1119
+ }
1120
+
1121
+ # 处理自定义字段(仅转换日期相关列)
1122
+ values = record.get("values", {})
1123
+ for field_name, field_value in values.items():
1124
+ # 判断是否为日期相关列,是则调用统一转换工具
1125
+ if field_name in date_related_columns:
1126
+ row[field_name] = _unified_time_convert(field_value)
1127
+ # 非日期列按原有逻辑处理
1128
+ else:
1129
+ if isinstance(field_value, list) and len(field_value) > 0:
1130
+ if isinstance(field_value[0], dict):
1131
+ row[field_name] = field_value[0].get("text",
1132
+ field_value[0].get("title", str(field_value[0])))
1133
+ else:
1134
+ row[field_name] = str(field_value)
1135
+ else:
1136
+ row[field_name] = str(field_value) if field_value is not None else ""
1137
+
1138
+ rows.append(row)
1139
+
1140
+ # -------------------------- 4. 导出Excel(优化:日期列格式美化) --------------------------
1141
+ df = pd.DataFrame(rows)
1142
+ df = df[df['更新时间'] != '0']
1143
+ # 使用openpyxl引擎,确保Excel中日期格式正常显示(避免文本格式)
1144
+ # with pd.ExcelWriter(excel_filename, engine="openpyxl") as writer:
1145
+ # df.to_excel(writer, index=False, sheet_name="企业微信数据")
1146
+ # worksheet = writer.sheets["企业微信数据"]
1147
+ #
1148
+ # # 对日期相关列设置格式:列宽适配 + 日期格式
1149
+ # for col_idx, col_name in enumerate(df.columns, 1): # Excel列号从1开始
1150
+ # if col_name in date_related_columns:
1151
+ # # 列宽设为22(适配“YYYY-MM-DD HH:MM:SS”)
1152
+ # worksheet.column_dimensions[chr(64 + col_idx)].width = 22
1153
+ # # 批量设置单元格格式为“日期时间”(避免Excel按文本显示)
1154
+ # for row_idx in range(2, len(df) + 2): # 第1行是表头,从第2行开始
1155
+ # cell = worksheet.cell(row=row_idx, column=col_idx)
1156
+ # cell.number_format = "YYYY-MM-DD HH:MM:SS"
1157
+
1158
+ msg = f"成功读取{len(rows)}条记录到dataframe({len(date_related_columns)}个日期相关列已转为北京时间)"
1159
+ self._wechat_doc_log(msg)
1160
+ print(f"[企业微信文档] {msg}")
1161
+
1162
+ return df
1163
+
1164
+ def wx_delete_table(self, docid):
1165
+ """
1166
+ 删除企业微信文档
1167
+ :param docid: 文档ID
1168
+ :return: 删除成功返回True
1169
+ """
1170
+ self._wechat_doc_log(f"开始删除文档:docid={docid}")
1171
+ self._wechat_doc_refresh_token_if_needed()
1172
+
1173
+ url = f"https://qyapi.weixin.qq.com/cgi-bin/wedoc/del_doc?access_token={self.wechat_doc_access_token}"
1174
+ data = {"docid": docid}
1175
+
1176
+ response = requests.post(url, data=json.dumps(data))
1177
+ result = response.json()
1178
+
1179
+ if result.get("errcode") != 0:
1180
+ error_msg = f"删除文档失败: {result.get('errmsg')}"
1181
+ self._wechat_doc_log(error_msg)
1182
+ raise Exception(error_msg)
1183
+
1184
+ self._wechat_doc_log(f"文档删除成功:docid={docid}")
1185
+ return True
1186
+
842
1187
  # 表格导出使用示例
843
1188
  # df1 = pd.DataFrame(df)
844
1189
  # df2 = pd.DataFrame(df)
@@ -871,4 +1216,3 @@ class DataProcessingAndMessaging:
871
1216
  # path = ux.path
872
1217
  # current_script_name = ux.current_script_name
873
1218
 
874
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: Functions_d
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: 包含数据处理、Hive交互、企业微信消息发送、Excel操作等功能的工具类库
5
5
  Author: DongYang
6
6
  Author-email: 649898871@qq.com
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: Functions_d
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: 包含数据处理、Hive交互、企业微信消息发送、Excel操作等功能的工具类库
5
5
  Author: DongYang
6
6
  Author-email: 649898871@qq.com
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = Functions_d
3
- version = 1.0.2
3
+ version = 1.0.4
4
4
  author = DongYang
5
5
  author_email = 649898871@qq.com
6
6
  description = 包含数据处理、Hive交互、企业微信消息发送、Excel操作等功能的工具类库
File without changes
File without changes
File without changes