npm - @tapdb/tapdb-data-analysis - Versions diffs - 0.1.28 → 0.1.29 - Mend

@tapdb/tapdb-data-analysis 0.1.28 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/tapdb-data-analysis/SKILL.md +4 -3
package/tapdb-data-analysis/scripts/tapdb_query.py +40 -25

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tapdb/tapdb-data-analysis",
-  "version": "0.1.28",
+  "version": "0.1.29",
   "description": "TapDB 游戏数据分析 AI Agent Skill - 查询和分析 TapDB 中的游戏运营数据（活跃/留存/付费/来源/LTV 等）",
   "keywords": [
     "tapdb",

package/tapdb-data-analysis/SKILL.md CHANGED Viewed

@@ -11,7 +11,7 @@ description: >
 # TapDB 数据分析
-> Skill 版本：v0.1.28
+> Skill 版本：v0.1.29
 通过 Python 脚本调用 TapDB 运营数据查询接口，获取游戏指标数据并分析。
@@ -172,7 +172,7 @@ python3 <SKILL_DIR>/scripts/tapdb_query.py raw /op/active '{"project_id":2588,"s
 ## 数据截断规则
-脚本**默认自动截断**，`_truncation` 字段含总行数、省略行数和关键列 min/max/avg。
+脚本**默认自动截断**，`_truncation` 字段包含总行数与省略行数。TapDB API 会在结果末尾附带**汇总行**（分组字段为 `null`），脚本截断时会保留该汇总行。
 | 场景 | 阈值 | 方式 |
 |------|------|------|
@@ -182,7 +182,7 @@ python3 <SKILL_DIR>/scripts/tapdb_query.py raw /op/active '{"project_id":2588,"s
 - 不加 `--all-retention` 通常仅返回 `DR1-DR30 + DR60/90/120/150/180`；加上后会额外补齐 `DR31-DR59`（及对应 `_newDevice/_rate` 列）。
-- 分析时利用 `_truncation.summary` 统计量，不要仅基于可见行下结论
+- 汇总数据以 API 返回的汇总行为准（分组字段为 `null`），不要在本地再计算汇总
 - 多次查询：每次先提取关键数值再下一个查询，不累积原始数据
 - 版本分布：一次性查询，不按天拆分（除非用户要求"按日趋势"）
 - 需完整数据加 `--no-truncate`
@@ -192,6 +192,7 @@ python3 <SKILL_DIR>/scripts/tapdb_query.py raw /op/active '{"project_id":2588,"s
 - **货币转换**：默认将金额转为人民币（CNY）。通过 `--exchange-to-currency` 可切换目标货币（如 USD/JPY/EUR），传 `none` 禁用转换返回原始金额。影响 income/source/user_value/life_cycle/ad_monet 等含金额字段的接口
 - `filters` 即使无条件也必须传 `[]`，否则 500
 - `group` 必传。retention/source 不传 `-g` 时自动用 `activation_time`，其他默认 `time`
+- `life_cycle` 在 `-g activation_os` 时仅支持 `--quota payment_cvs_rate`（其他 quota 会 500）
 - filters 格式: `{"col_name":"...", "data_type":"string|number|bool|date", "calculate_symbol":"include|un_include", "ftv":[...]}`
 - 各接口维度不同，不支持的维度返回 500。**先 `describe` 确认**

package/tapdb-data-analysis/scripts/tapdb_query.py CHANGED Viewed

@@ -83,21 +83,7 @@ _HEAD = 15
 _TAIL = 15
-def _numeric_summary(rows, limit=6):
-    """Compute min/max/avg for first N numeric columns."""
-    if not rows:
-        return {}
-    keys = [k for k, v in rows[0].items() if isinstance(v, (int, float))][:limit]
-    out = {}
-    for k in keys:
-        vals = [r[k] for r in rows if isinstance(r.get(k), (int, float))]
-        if vals:
-            out[k] = {"min": min(vals), "max": max(vals),
-                       "avg": round(sum(vals) / len(vals), 2)}
-    return out
-def _slim_rows(rows, cmd_type):
+def _slim_rows(rows, cmd_type, group_alias=None):
     """Truncate row count; time-series keeps head+tail, others keep head."""
     if not rows:
         return rows, None
@@ -110,7 +96,24 @@ def _slim_rows(rows, cmd_type):
         rows = rows[:cap] + [f"... 省略 {omit} 条 ..."]
         return rows, {"total_rows": total, "omitted": omit}
-    has_time = bool(_TIME_FIELDS & set(rows[0].keys()))
+    total = len(rows)
+    # TapDB API often appends a summary row at the end (group field is null).
+    # Keep that row (API-provided) when truncating.
+    summary_row = None
+    group_key = group_alias
+    if group_key and isinstance(rows[-1], dict) and group_key in rows[-1] and rows[-1].get(group_key) is None:
+        summary_row = rows[-1]
+        rows = rows[:-1]
+    else:
+        # Best-effort fallback when caller didn't provide group_alias.
+        for time_key in _TIME_FIELDS:
+            if time_key in rows[-1] and rows[-1].get(time_key) is None:
+                summary_row = rows[-1]
+                rows = rows[:-1]
+                break
+    has_time = bool(rows and (_TIME_FIELDS & set(rows[0].keys())))
     if cmd_type == "whale_user":
         cap = _MAX_WHALE_ROWS
     elif has_time:
@@ -118,17 +121,19 @@ def _slim_rows(rows, cmd_type):
     else:
         cap = _MAX_GROUP_ROWS
     if len(rows) <= cap:
+        if summary_row is not None:
+            return rows + [summary_row], None
         return rows, None
-    total = len(rows)
-    summary = _numeric_summary(rows)
     if has_time:
-        omit = total - _HEAD - _TAIL
+        omit = len(rows) - _HEAD - _TAIL
         rows = rows[:_HEAD] + [{"_": f"... 省略 {omit} 行 ..."}] + rows[-_TAIL:]
     else:
-        omit = total - cap
+        omit = len(rows) - cap
         rows = rows[:cap] + [{"_": f"... 省略 {omit} 行 ..."}]
-    return rows, {"total_rows": total, "omitted": omit, "summary": summary}
+    if summary_row is not None:
+        rows.append(summary_row)
+    return rows, {"total_rows": total, "omitted": omit}
 def _list_of_lists_to_dicts(lol):
@@ -177,7 +182,7 @@ def _rebuild(resp, path, rows, info):
     return result
-def truncate_response(resp, cmd_type=None):
+def truncate_response(resp, cmd_type=None, group_alias=None):
     """Truncate API response to save context window tokens."""
     if not resp or (isinstance(resp, dict) and resp.get("error")):
         return resp
@@ -186,7 +191,7 @@ def truncate_response(resp, cmd_type=None):
         return resp
     info = {}
-    rows, row_info = _slim_rows(rows, cmd_type)
+    rows, row_info = _slim_rows(rows, cmd_type, group_alias=group_alias)
     if row_info:
         info.update(row_info)
     if not info:
@@ -466,7 +471,7 @@ ENDPOINT_CAPS = {
             "lang_system": "过滤值会自动翻译，可直接用展示名",
         },
         "group_notes": {
-            "activation_os": "分组仅支持 time/activation_time/activation_os 三个维度",
+            "activation_os": "仅当 quota=payment_cvs_rate 时支持；（建议改用 time/activation_time）",
         },
         "unsupported_note": "分组仅支持 time/activation_time/activation_os；过滤不支持 activation_app_version/first_server/current_server/utmsrc/login_type/payment_source",
     },
@@ -572,7 +577,10 @@ def do_query(args, endpoint_path, extra=None, cmd_type=None):
     url = f"{base_url}/mcp/op/{endpoint_path}"
     result = http_request("POST", url, {"MCP-KEY": key}, body)
     if not getattr(args, "no_truncate", False):
-        result = truncate_response(result, cmd_type or endpoint_path)
+        group_alias = None
+        if isinstance(body.get("group"), dict):
+            group_alias = body["group"].get("col_alias")
+        result = truncate_response(result, cmd_type or endpoint_path, group_alias=group_alias)
     output(result)
@@ -633,6 +641,13 @@ def cmd_whale_user(args):
 def cmd_life_cycle(args):
+    if getattr(args, "group_by", None) == "activation_os" and getattr(args, "quota", None) != "payment_cvs_rate":
+        output({
+            "error": True,
+            "message": "life_cycle 接口在 -g activation_os 时仅支持 --quota payment_cvs_rate（其他 quota 会 500）",
+            "hint": "请改用 --quota payment_cvs_rate 或改用 -g time / -g activation_time",
+        })
+        return
     do_query(args, "life_cycle", {"quota": args.quota})