PyPI - mdbq - Versions diffs - 2.2.2__tar.gz → 2.2.4__tar.gz - Mend

mdbq 2.2.2tar.gz → 2.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

{mdbq-2.2.2 → mdbq-2.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mdbq
-Version: 2.2.2
+Version: 2.2.4
 Home-page: https://pypi.org/project/mdbsql
 Author: xigua,
 Author-email: 2587125111@qq.com

{mdbq-2.2.2 → mdbq-2.2.4}/mdbq/aggregation/query_data.py RENAMED Viewed

@@ -1,4 +1,6 @@
 # -*- coding: UTF-8 –*-
+import re
 from mdbq.mongo import mongo
 from mdbq.mysql import mysql
 from mdbq.mysql import s_query
@@ -14,8 +16,6 @@ import getpass
 import json
 import os
-from sqlalchemy.event import remove
 """
 程序用于下载数据库(调用 s_query.py 下载并清洗), 并对数据进行聚合清洗, 不会更新数据库信息;
@@ -862,7 +862,47 @@ class GroupBy:
                        }
                 )
             df.insert(loc=1, column='推广渠道', value='万相台无界版')  # df中插入新列
+            # 1. 匹配 L后面接 2 个或以上数字，不区分大小写，示例：L345
+            # 2. 其余情况，L 后面接多个数字的都会被第一条 if 命中，不区分大小写
+            df['消费力层级'] = df.apply(
+                lambda x:
+                ''.join(re.findall(r'(l\d+)', x['人群名字'].upper(), re.IGNORECASE)) if re.findall(r'(l\d{2,})', x['人群名字'], re.IGNORECASE)
+                else 'L5' if re.findall(r'(l\d*5)', x['人群名字'], re.IGNORECASE)
+                else 'L4' if re.findall(r'(l\d*4)', x['人群名字'], re.IGNORECASE)
+                else 'L3' if re.findall(r'(l\d*3)', x['人群名字'], re.IGNORECASE)
+                else 'L2' if re.findall(r'(l\d*2)', x['人群名字'], re.IGNORECASE)
+                else 'L1' if re.findall(r'(l\d*1)', x['人群名字'], re.IGNORECASE)
+                else '', axis=1)
+            # 1. 匹配连续的 4 个数字且后面不能接数字或"元"或汉字，筛掉的人群示例：月均消费6000元｜受众20240729175213｜xxx2024真皮公文包
+            # 2. 匹配 2数字_2数字且前面不能是数字，合法匹配：人群_30_50_促； 非法示例：L345_3040 避免识别出 35～20 岁用户的情况
+            # pattern = r'(\d{4})(?!\d|[\u4e00-\u9fa5])'  # 匹配 4 个数字，后面不能接数字或汉字
+            # pattern = r'(?<![\d\u4e00-\u9fa5])(\d{4})' # 匹配前面不是数字或汉字的 4 个连续数字
+            # 匹配 4 个数字，前面和后面都不能是数字或汉字
+            pattern1 = r'(?<![\d\u4e00-\u9fa5])(\d{4})(?!\d|[\u4e00-\u9fa5])'
+            # 匹配指定字符，前面不能是数字或 l 或 L 开头
+            pattern2 = r'(?<![\dlL])(\d{2}_\d{2})'
+            df['用户年龄'] = df.apply(
+                lambda x:
+                ''.join(re.findall(pattern1, x['人群名字'].upper())) if re.findall(pattern1, x['人群名字'])
+                # else ''.join(re.findall(r'[^\d|l|L](\d{2}_\d{2})', x['人群名字'].upper())) if re.findall(r'[^\d|l|L](\d{2}_\d{2})', x['人群名字'])
+                else ''.join(re.findall(pattern2, x['人群名字'].upper())) if re.findall(pattern2, x['人群名字'])
+                else ''.join(re.findall(r'(\d{2}-\d{2})岁', x['人群名字'].upper())) if re.findall(r'(\d{2}-\d{2})岁', x['人群名字'])
+                else '', axis=1)
+            df['用户年龄'] = df['用户年龄'].apply(
+                lambda x: f'{x[:2]}~{x[2:4]}' if str(x).isdigit()
+                else str(x).replace('_', '~') if '_' in x
+                else str(x).replace('-', '~') if '-' in x
+                else x
+            )
+            # 年龄层不能是 0 开头
+            df['用户年龄'] = df['用户年龄'].apply(
+                lambda x: '' if str(x).startswith('0') else x)
+            # df = df.head(1000)
+            # df.to_csv('/Users/xigua/Downloads/test.csv', index=False, header=True, encoding='utf-8_sig')
+            # breakpoint()
             return df
         elif '天猫_关键词报表' in table_name:
             df.rename(columns={
                 '场景名字': '营销场景',
@@ -1774,4 +1814,3 @@ if __name__ == '__main__':
     data_aggregation(service_databases=[{'company': 'mysql'}], months=1)  # 正常的聚合所有数据
     # data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1)  # 单独聚合某一个数据库，具体库进函数编辑
     # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650)  # 立即启动对聚合数据的清理工作

{mdbq-2.2.2 → mdbq-2.2.4}/mdbq.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mdbq
-Version: 2.2.2
+Version: 2.2.4
 Home-page: https://pypi.org/project/mdbsql
 Author: xigua,
 Author-email: 2587125111@qq.com

{mdbq-2.2.2 → mdbq-2.2.4}/setup.py RENAMED Viewed

@@ -3,7 +3,7 @@
 from setuptools import setup, find_packages
 setup(name='mdbq',
-      version='2.2.2',
+      version='2.2.4',
       author='xigua, ',
       author_email="2587125111@qq.com",
       url='https://pypi.org/project/mdbsql',