PyPI - qrpa - Versions diffs - 1.0.89__tar.gz → 1.0.91__tar.gz - Mend

qrpa 1.0.89tar.gz → 1.0.91tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qrpa might be problematic. Click here for more details.

Files changed (37) hide show

{qrpa-1.0.89 → qrpa-1.0.91}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: qrpa
-Version: 1.0.89
+Version: 1.0.91
 Summary: qsir's rpa library
 Author: QSir
 Author-email: QSir <1171725650@qq.com>

{qrpa-1.0.89 → qrpa-1.0.91}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "qrpa"
-version = "1.0.89"
+version = "1.0.91"
 description = "qsir's rpa library"
 authors = [{ name = "QSir", email = "1171725650@qq.com" }]
 readme = "README.md"

{qrpa-1.0.89 → qrpa-1.0.91}/qrpa/db_migrator.py RENAMED Viewed

@@ -580,7 +580,7 @@ def create_default_migrator(silent: bool = False) -> DatabaseMigrator:
     )
     remote_config = RemoteConfig(
-        ssh_host="git@e3",
+        ssh_host="git@ecslz",
         temp_dir="/tmp/db_migration",
         database=remote_db
     )

{qrpa-1.0.89 → qrpa-1.0.91}/qrpa/fun_excel.py RENAMED Viewed

@@ -14,6 +14,9 @@ import threading
 from playwright.sync_api import sync_playwright
 import psutil
+import os, sys
+from pathlib import Path
 from .fun_base import log, sanitize_filename, create_file_path, copy_file, add_https, send_exception
 excel_color_index = {
@@ -195,13 +198,13 @@ def merge_by_column_v2(sheet, column_name, other_columns):
     # 更安全的数据获取方式，确保获取完整的数据范围
     last_row = get_last_row(sheet, col_letter)
     data = sheet.range(f'{col_letter}1:{col_letter}{last_row}').value
     # 确保data是列表格式
     if not isinstance(data, list):
         data = [data]
     log(f'数据范围: {col_letter}1:{col_letter}{last_row}, 数据长度: {len(data)}')
     start_row = 2  # 从第2行开始，跳过表头
     merge_row_ranges = []  # 用来存储需要合并的行范围 (start_row, end_row)
@@ -211,13 +214,13 @@ def merge_by_column_v2(sheet, column_name, other_columns):
         col_name = find_column_by_data(sheet, 1, col)
         if col_name:
             all_columns.append(col_name)
     log(f'需要合并的列: {all_columns}')
     # 遍历数据行，从第3行开始比较（因为第1行是表头，第2行是第一个数据行）
     for row in range(3, len(data) + 1):
-        log(f'查找 {row}/{len(data)}, 当前值: {data[row-1] if row-1 < len(data) else "超出范围"}, 前一个值: {data[row-2] if row-2 < len(data) else "超出范围"}')
+        log(f'查找 {row}/{len(data)}, 当前值: {data[row - 1] if row - 1 < len(data) else "超出范围"}, 前一个值: {data[row - 2] if row - 2 < len(data) else "超出范围"}')
         # 检查值是否发生变化
         if row <= len(data) and data[row - 1] != data[row - 2]:
             # 值发生变化，处理前一组
@@ -233,7 +236,7 @@ def merge_by_column_v2(sheet, column_name, other_columns):
         merge_row_ranges.append((start_row, end_row))
     log(f'行合并范围: {merge_row_ranges}')
     # 对每个行范围，在所有指定列中执行合并
     for start_row, end_row in merge_row_ranges:
         if start_row < end_row:  # 只有当开始行小于结束行时才合并（多行）
@@ -756,9 +759,33 @@ def download_images_concurrently(image_urls, platform='shein', img_save_dir=None
         results = list(executor.map(lambda url: download_img_v2(url, platform, img_save_path=img_save_dir), image_urls))
     return results
+def get_chromium_executable():
+    """
+    返回 Chromium 可执行文件路径，兼容 PyInstaller 打包后的 exe
+    """
+    # PyInstaller 临时目录
+    if getattr(sys, 'frozen', False):
+        base_path = Path(sys._MEIPASS)
+    else:
+        base_path = Path(__file__).parent
+    # 尝试查找打包后的浏览器文件
+    possible_path = base_path / "playwright" / "driver" / "package" / "chromium_headless_shell-1169" / "chrome-win" / "headless_shell.exe"
+    if possible_path.exists():
+        return str(possible_path)
+    # fallback: 系统 Playwright 安装目录
+    local_appdata = Path(os.environ.get("LOCALAPPDATA", ""))
+    fallback_path = local_appdata / "ms-playwright" / "chromium" / "chrome-win" / "chrome.exe"
+    if fallback_path.exists():
+        return str(fallback_path)
+    raise FileNotFoundError("Chromium 可执行文件未找到，请先执行 'playwright install' 下载浏览器。")
 def download_img_by_chrome(image_url, save_name):
+    chromium_path = get_chromium_executable()
     with sync_playwright() as p:
-        browser = p.chromium.launch(headless=True)  # 运行时可以看到浏览器
+        browser = p.chromium.launch(headless=True, executable_path=chromium_path)  # 运行时可以看到浏览器
         context = browser.new_context()
         page = context.new_page()
         # 直接通过Playwright下载图片

{qrpa-1.0.89 → qrpa-1.0.91}/qrpa/fun_web.py RENAMED Viewed

@@ -146,6 +146,54 @@ def full_screen_shot(web_page: Page, config):
     web_page.screenshot(path=full_screenshot_image_path, full_page=True)
     return full_screenshot_image_path
+def fetch_get(page: Page, url: str, headers: Optional[dict] = None, config: Optional[dict] = None) -> dict:
+    """
+    发送 HTTP GET 请求，支持自定义 headers 和配置。
+    :param page: Playwright 的 Page 对象
+    :param url: 请求地址
+    :param headers: 自定义 headers 字典
+    :param config: 请求配置字典，可包含 credentials, mode, referrer, referrerPolicy 等
+    :return: 服务器返回的 JSON 响应（dict）
+    """
+    if headers is not None and not isinstance(headers, dict):
+        raise ValueError("headers 参数必须是 dict 或 None")
+    if config is not None and not isinstance(config, dict):
+        raise ValueError("config 参数必须是 dict 或 None")
+    try:
+        page.wait_for_load_state('load')
+        response = page.evaluate("""
+            async ({ url, extraHeaders, config }) => {
+                try {
+                    const defaultHeaders = {
+                        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
+                    };
+                    const defaultConfig = {
+                        method: 'GET',
+                        credentials: 'include',
+                        mode: 'cors'
+                    };
+                    const headers = Object.assign({}, defaultHeaders, extraHeaders || {});
+                    const options = Object.assign({}, defaultConfig, config || {}, { headers: headers });
+                    const response = await fetch(url, options);
+                    if (!response.ok) {
+                        throw new Error(`HTTP ${response.status} - ${response.statusText}`);
+                    }
+                    return await response.json();
+                } catch (error) {
+                    return { "error": "fetch_failed", "message": error.message };
+                }
+            }
+        """, {"url": url, "extraHeaders": headers, "config": config})
+        return response
+    except Exception as e:
+        raise send_exception()
 def safe_goto(page, url, **kwargs):
     caller = inspect.stack()[1]
     log(f"[DEBUG] goto called from {caller.filename}:{caller.lineno} url={url}")

{qrpa-1.0.89 → qrpa-1.0.91}/qrpa/mysql_module/shein_product_model.py RENAMED Viewed

@@ -64,6 +64,9 @@ class SheinProductSkc(Base):
     created_at = Column(DateTime, default=datetime.now, comment='创建时间')
     updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now, comment='更新时间')
+    # 用户备注字段（供后续web界面使用）
+    user_notes = Column(Text, nullable=True, comment='用户备注')
     # 定义索引
     __table_args__ = (
         Index('ix_skc_id', 'skc_id'),
@@ -108,6 +111,7 @@ class SheinProductSku(Base):
     price = Column(DECIMAL(10, 2), nullable=True, comment='价格')
     erp_cost_price = Column(DECIMAL(10, 2), nullable=True, comment='ERP成本价')
     erp_supplier_name = Column(String(100), nullable=True, comment='ERP默认供货商')
+    erp_stock = Column(Integer, nullable=True, comment='ERP库存')
     # 时间戳
     created_at = Column(DateTime, default=datetime.now, comment='创建时间')
@@ -218,10 +222,10 @@ class SheinProductManager:
         session = self.Session()
         try:
             for data in data_list:
                 # 处理SKC数据
                 skc_record = self._upsert_skc_data(session, data)
                 # 处理SKU数据
                 for sku_data in data.get('skuList', []):
                     sku_data['local_skc_id'] = skc_record.id
@@ -319,6 +323,7 @@ class SheinProductManager:
             existing_sku.price = sku_data.get('price')
             existing_sku.erp_cost_price = sku_data.get('erp_cost_price')
             existing_sku.erp_supplier_name = sku_data.get('erp_supplier_name')
+            existing_sku.erp_stock = sku_data.get('erp_stock')
             existing_sku.updated_at = datetime.now()
         else:
             # 插入新记录
@@ -473,4 +478,7 @@ def example_usage2():
 if __name__ == "__main__":
     pass
-    # example_usage()
+    database_url = "mysql+pymysql://root:123wyk@localhost:3306/lz"
+    manager = SheinProductManager(database_url)
+    manager.create_tables()
+    # example_usage()

{qrpa-1.0.89 → qrpa-1.0.91}/qrpa/shein_excel.py RENAMED Viewed

@@ -2341,7 +2341,7 @@ class SheinExcel:
         self.dealFormula(sheet)  # 有空再封装优化
         colorize_by_field(sheet, 'SPU')
         autofit_column(sheet, ['商品信息', '店铺名称', 'SKC点击率/SKC转化率', '自主参与活动'])
-        column_to_left(sheet, ['店铺名称', 'SKC点击率/SKC转化率', '自主参与活动'])
+        column_to_left(sheet, ['店铺名称', 'SKC点击率/SKC转化率', '自主参与活动','近7天SKU销量/SKC销量/SKC曝光'])
         specify_column_width(sheet, ['商品标题'], 150 / 6)
         add_borders(sheet)
         InsertImageV2(sheet, ['SKC图片', 'SKU图片'], 'shein', 120, None, None, True)

{qrpa-1.0.89 → qrpa-1.0.91}/qrpa/shein_lib.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from .fun_file import read_dict_from_file, write_dict_to_file, read_dict_from_file_ex, write_dict_to_file_ex
 from .fun_base import log, send_exception, md5_string, get_safe_value, NetWorkIdleTimeout
-from .fun_web import fetch, full_screen_shot, safe_goto
+from .fun_web import fetch, fetch_get, full_screen_shot, safe_goto
 from .time_utils import TimeUtils
 from .wxwork import WxWorkBot
@@ -36,7 +36,7 @@ class SheinLib:
             try:
                 current_url = web_page.url
                 log(f"尝试获取页面信息 - URL: {current_url}", self.store_username, self.store_name)
                 # 检查是否在认证页面，如果是则直接跳转到目标页面
                 if '/auth/SSLS' in current_url:
                     log("检测到SSLS认证页面，直接跳转到首页", self.store_username, self.store_name)
@@ -44,17 +44,17 @@ class SheinLib:
                     web_page.wait_for_timeout(3000)
                     current_url = web_page.url
                     log(f"跳转后URL: {current_url}", self.store_username, self.store_name)
                 # 等待导航完成
                 web_page.wait_for_load_state("domcontentloaded", timeout=6000)
                 final_url = web_page.url
                 final_title = web_page.title()
                 log(f"页面稳定 - URL: {final_url}, 标题: {final_title}", self.store_username, self.store_name)
                 break
             except Exception as e:
-                log(f"第{attempt+1}次等待页面稳定失败: {e}", self.store_username, self.store_name)
+                log(f"第{attempt + 1}次等待页面稳定失败: {e}", self.store_username, self.store_name)
                 if "crashed" in str(e) or "Target" in str(e):
                     log("页面稳定检查时崩溃，直接继续", self.store_username, self.store_name)
                     break
@@ -66,7 +66,7 @@ class SheinLib:
                     log("页面稳定等待最终失败，继续执行", self.store_username, self.store_name)
                     break
                 web_page.wait_for_timeout(2000)
         web_page.wait_for_timeout(2000)
         # 定义最大重试次数
@@ -83,19 +83,19 @@ class SheinLib:
                 retries += 1
                 while not web_page.locator('//div[contains(text(),"商家后台")]').nth(1).is_visible():
                     try:
                         current_url = web_page.url
                         current_title = web_page.title()
                         log(f"循环检查 - URL: {current_url}, 标题: {current_title}", self.store_username, self.store_name)
                         # 如果在认证页面且出现问题，直接跳转
                         if '/auth/SSLS' in current_url:
                             log("在主循环中检测到SSLS认证页面，跳转到首页", self.store_username, self.store_name)
                             web_page.goto('https://sso.geiwohuo.com/#/home', wait_until='domcontentloaded', timeout=15000)
                             web_page.wait_for_timeout(3000)
                             continue
                     except Exception as status_error:
                         log(f"获取页面状态失败: {status_error}", self.store_username, self.store_name)
                         if "crashed" in str(status_error):
@@ -200,13 +200,13 @@ class SheinLib:
             except Exception as e:
                 log(f"错误发生: {e}, 重试中...({self.store_username}, {self.store_name})")
                 log(traceback.format_exc())
                 # 收集崩溃时的详细信息
                 try:
                     crash_url = web_page.url
                     crash_title = web_page.title()
                     log(f"崩溃时页面信息 - URL: {crash_url}, 标题: {crash_title}", self.store_username, self.store_name)
                     # 尝试截图保存崩溃现场
                     try:
                         screenshot_path = f"crash_screenshot_{self.store_username}_{int(time.time())}.png"
@@ -214,19 +214,19 @@ class SheinLib:
                         log(f"已保存崩溃截图: {screenshot_path}", self.store_username, self.store_name)
                     except:
                         log("无法截取崩溃时的页面截图", self.store_username, self.store_name)
                 except:
                     log("无法获取崩溃时的页面信息", self.store_username, self.store_name)
                 # 检查特定类型的错误
                 if any(keyword in str(e).lower() for keyword in ['memory', 'out of memory', 'oom']):
                     log("检测到内存相关崩溃", self.store_username, self.store_name)
                 if "destroyed" in str(e) or "navigation" in str(e):
                     log("检测到导航中断，等待页面稳定后重试", self.store_username, self.store_name)
                     web_page.wait_for_timeout(5000)
                     continue
                 if 'crashed' in str(e) or 'Target' in str(e):
                     log("检测到页面或目标崩溃，直接退出当前循环", self.store_username, self.store_name)
                     raise e
@@ -241,6 +241,44 @@ class SheinLib:
         # web_page.wait_for_load_state("networkidle")
         web_page.wait_for_timeout(3000)
+    # 获取用户信息
+    def get_user(self, uuid=None):
+        log(f'获取用户信息:{self.store_username} {self.store_name}')
+        # 生成 uuid 参数，如果没有提供则使用时间戳
+        if uuid is None:
+            import time
+            uuid = str(int(time.time() * 1000))
+        url = f"https://sso.geiwohuo.com/sso-prefix/auth/getUser?uuid={uuid}"
+        # 设置请求头，根据 Chrome 请求
+        headers = {
+            "gmpsso-language": "CN",
+            "origin-url"     : "https://sso.geiwohuo.com/#/home/",
+            "x-sso-scene"    : "gmpsso"
+        }
+        # 特定于此请求的配置
+        fetch_config = {
+            "credentials"   : "omit",
+            "referrer"      : "https://sso.geiwohuo.com/",
+            "referrerPolicy": "strict-origin-when-cross-origin"
+        }
+        response_text = fetch_get(self.web_page, url, headers, fetch_config)
+        error_code = response_text.get('code')
+        if str(error_code) != '0':
+            raise send_exception(json.dumps(response_text, ensure_ascii=False))
+        info = response_text.get('info', {})
+        log(info)
+        cache_file = f'{self.config.auto_dir}/shein_user.json'
+        info['store_username'] = self.store_username
+        info['store_name'] = self.store_name
+        write_dict_to_file_ex(cache_file, {self.store_username: info}, [self.store_username])
+        return info
     # 获取质检报告pdf地址
     def get_qc_report_url(self, deliverCode, purchaseCode):
         log(f'获取质检报告:{deliverCode} {purchaseCode}')
@@ -849,6 +887,8 @@ class SheinLib:
                 cost_price = self.bridge.get_sku_cost(sku_item['supplierSku'], self.config.erp_source)
                 sku_item['erp_cost_price'] = cost_price if isinstance(cost_price, (int, float)) else None
                 sku_item['erp_supplier_name'] = self.bridge.get_sku_supplier(sku_item['supplierSku'], self.config.erp_source)
+                stock = self.bridge.get_sku_stock(sku_item['supplierSku'], self.config.erp_source)
+                sku_item['erp_stock'] = stock if isinstance(stock, (int, float)) else None
         cache_file = f'{self.config.auto_dir}/shein/product/skc_list_{self.store_username}.json'
         write_dict_to_file_ex(cache_file, {self.store_username: skc_list}, [self.store_username])
@@ -2276,6 +2316,7 @@ class SheinLib:
             skc = str(spu_info['skc'])
             # if not shein_db.exists_sales_1_days_ago(skc):
             #     log(f'未查到昨天销量: {skc}')
+            self.get_skc_week_actual_sales(skc)
             self.get_skc_sales(skc, date_60_days_ago, date_1_days_ago)
             skcCode = spu_info['supplierCode']
             product_name = DictSpuInfo[spu]['product_name_en']
@@ -2396,7 +2437,7 @@ class SheinLib:
                 # SKC趋势数据
                 sku_item.append(skc_trend.get('saleCnt', 0))  # SKC近7天销量
                 sku_item.append(skc_trend.get('epsUvIdx', 0))  # SKC近7天曝光人数
-                sku_item.append(skc_trend.get('goodsUvIdx', 0))  # SKC近7天商详访客
+                sku_item.append(skc_trend.get('goodsUv', 0))  # SKC近7天商详访客
                 sku_item.append(skc_trend.get('epsGdsCtrIdx', 0))  # SKC近7天点击率
                 sku_item.append(skc_trend.get('payUvIdx', 0))  # SKC近7天支付人数
                 sku_item.append(skc_trend.get('gdsPayCtrIdx', 0))  # SKC近7天支付率

{qrpa-1.0.89 → qrpa-1.0.91}/qrpa/shein_ziniao.py RENAMED Viewed

@@ -5,7 +5,7 @@
 import os
 import platform
 import shutil
-import time
+import time, datetime
 import traceback
 import uuid
 import json
@@ -74,7 +74,7 @@ class ZiniaoClient:
         """启动客户端"""
         try:
             if self.is_windows:
-                cmd = [self.client_path, '--run_type=web_driver', '--ipc_type=http', '--port=' + str(self.socket_port)]
+                cmd = [self.client_path, '--run_type=web_driver', '--show_sidb=true', '--ipc_type=http', '--port=' + str(self.socket_port)]
             elif self.is_mac:
                 cmd = ['open', '-a', self.client_path, '--args', '--run_type=web_driver', '--ipc_type=http',
                        '--port=' + str(self.socket_port)]
@@ -272,6 +272,73 @@ class ZiniaoTaskManager:
         self.browser = browser
         self.config = config
+    def daily_cleanup_superbrowser(self, browser_id):
+        """
+        每天删除一次SuperBrowser缓存文件夹
+        Args:
+            browser_id (str): 浏览器ID，如 '26986387919128'
+        """
+        # 获取本地AppData路径
+        local_appdata = os.getenv('LOCALAPPDATA')
+        if not local_appdata:
+            log("错误: 无法获取LOCALAPPDATA路径")
+            return False
+        # 构建路径
+        cache_path = os.path.join(local_appdata, 'SuperBrowser')
+        target_folder = os.path.join(cache_path, f'User Data\\Chromium_{browser_id}')
+        flag_file = os.path.join(cache_path, f'User Data\\cleanup_flag_{browser_id}.txt')
+        # 检查目标文件夹是否存在
+        if not os.path.exists(target_folder):
+            log(f"目标文件夹不存在: {target_folder}")
+            return False
+        # 获取当前日期
+        today = datetime.date.today()
+        today_str = today.strftime('%Y-%m-%d')
+        # 检查标志文件
+        need_cleanup = True
+        if os.path.exists(flag_file):
+            try:
+                # 读取标志文件中的日期
+                with open(flag_file, 'r', encoding='utf-8') as f:
+                    last_cleanup_date = f.read().strip()
+                # 如果是今天已经清理过，则跳过
+                if last_cleanup_date == today_str:
+                    log(f"今天({today_str})已经清理过，跳过删除操作")
+                    need_cleanup = False
+            except Exception as e:
+                log(f"读取标志文件时出错: {e}")
+                # 如果读取出错，继续执行清理
+        if need_cleanup:
+            try:
+                # 删除目标文件夹
+                log(f"正在删除文件夹: {target_folder}")
+                shutil.rmtree(target_folder)
+                log("删除成功!")
+                # 创建/更新标志文件
+                os.makedirs(os.path.dirname(flag_file), exist_ok=True)
+                with open(flag_file, 'w', encoding='utf-8') as f:
+                    f.write(today_str)
+                log(f"已创建标志文件: {flag_file}")
+                return True
+            except Exception as e:
+                log(f"删除文件夹时出错: {e}")
+                return False
+        return True
     def run_single_store_task(self, browser_info: Dict[str, Any],
             run_func: Callable, task_key: str,
             just_store_username: Optional[List[str]] = None,
@@ -282,6 +349,10 @@ class ZiniaoTaskManager:
         store_name = browser_info.get("browserName")
         store_username = browser_info.get("store_username")
+        # 删除浏览器缓存,一天一删
+        browser_id = browser_info.get("browserId")
+        self.daily_cleanup_superbrowser(browser_id)
         retry_count = 0
         while True:
             try:

{qrpa-1.0.89 → qrpa-1.0.91}/qrpa/time_utils.py RENAMED Viewed

@@ -260,20 +260,31 @@ class TimeUtils:
     @staticmethod
     def get_dates_from_first_of_month_to_yesterday() -> List[str]:
-        """获取从本月第一天到昨天的日期列表"""
+        """获取从本月第一天到昨天的日期列表
+        如果今天是本月第一天，则返回上个月的日期列表
+        """
         today = datetime.today()
-        first_day_of_month = today.replace(day=1)
         yesterday = today - timedelta(days=1)
-        date_list = []
-        current_date = first_day_of_month
+        # 如果今天是本月第一天，取上个月
+        if today.day == 1:
+            # 找到上个月最后一天
+            last_month_last_day = today - timedelta(days=1)
+            # 上个月第一天
+            first_day_of_last_month = last_month_last_day.replace(day=1)
+            start_date = first_day_of_last_month
+            end_date = last_month_last_day
+        else:
+            start_date = today.replace(day=1)
+            end_date = yesterday
-        while current_date <= yesterday:
+        date_list = []
+        current_date = start_date
+        while current_date <= end_date:
             date_list.append(current_date.strftime('%Y-%m-%d'))
             current_date += timedelta(days=1)
         return date_list
     # ==================== 月份相关 ====================
     @staticmethod

{qrpa-1.0.89 → qrpa-1.0.91}/qrpa.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: qrpa
-Version: 1.0.89
+Version: 1.0.91
 Summary: qsir's rpa library
 Author: QSir
 Author-email: QSir <1171725650@qq.com>