@tapdb/tapdb-data-analysis 0.1.27 → 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -11,7 +11,7 @@ description: >
|
|
|
11
11
|
|
|
12
12
|
# TapDB 数据分析
|
|
13
13
|
|
|
14
|
-
> Skill 版本:v0.1.
|
|
14
|
+
> Skill 版本:v0.1.29
|
|
15
15
|
|
|
16
16
|
通过 Python 脚本调用 TapDB 运营数据查询接口,获取游戏指标数据并分析。
|
|
17
17
|
|
|
@@ -64,10 +64,11 @@ npm view @tapdb/tapdb-data-analysis version --registry https://registry.npmjs.or
|
|
|
64
64
|
|
|
65
65
|
**触发**:"分析趋势/有没有异常/为什么下降/波动大"
|
|
66
66
|
|
|
67
|
-
1.
|
|
68
|
-
2. 按 `analysis_guide.md`
|
|
69
|
-
3.
|
|
70
|
-
4.
|
|
67
|
+
1. 先查 60 天**汇总趋势**(当前30天 + 上一周期30天,优先周粒度):DAU→`active -g time --quota dau --group-unit week`,收入→`income -g time --group-unit week`,留存→`retention -g activation_time --group-unit week`,新增→`source -g activation_time --group-unit week`
|
|
68
|
+
2. 按 `analysis_guide.md` 异常检测方法判断,先检查节假日效应(周粒度无法定位时再按日)
|
|
69
|
+
3. 需要定位异常日期/用户要求按日 → 对异常区间切到按日(`--group-unit day`),并缩小时间窗定位异常日期
|
|
70
|
+
4. 需要解释原因 → 做维度下钻:`-g <维度> --limit 10`(一次只查一个维度;默认 Top10)
|
|
71
|
+
5. 输出执行摘要式报告
|
|
71
72
|
|
|
72
73
|
### C: 版本/卡池/活动分析
|
|
73
74
|
|
|
@@ -116,6 +117,7 @@ python3 <SKILL_DIR>/scripts/tapdb_query.py describe active # 查看接口
|
|
|
116
117
|
| `--language` | 语言(国家分组时) | `--language cn` |
|
|
117
118
|
| `--group-dim` | 分组维度 cy/scon | `--group-dim cy` |
|
|
118
119
|
| `--de-water` | 去水 | |
|
|
120
|
+
| `--limit` | 结果数量上限(默认5000) | `--limit 10` |
|
|
119
121
|
| `--no-truncate` | 不截断输出 | |
|
|
120
122
|
| `-r` | 区域 cn/sg | `-r sg` |
|
|
121
123
|
|
|
@@ -157,9 +159,20 @@ python3 <SKILL_DIR>/scripts/tapdb_query.py raw /op/active '{"project_id":2588,"s
|
|
|
157
159
|
| `life_cycle` | 生命周期 | `--quota payment_amount\|payment_cvs_rate\|payment_cvs\|acc_payment` | `activation_time` |
|
|
158
160
|
| `ad_monet` | 广告变现 | 通用参数 | 可能返回 404(未开通或路径不同) |
|
|
159
161
|
|
|
162
|
+
## 数据量控制策略(先小后大,必须遵守)
|
|
163
|
+
|
|
164
|
+
目标:用**最省 token** 的查询顺序先定位问题,再逐步下钻;避免一上来拉按日/全量/多维明细。
|
|
165
|
+
|
|
166
|
+
- 第一次查询:只返回**汇总 + Top10**
|
|
167
|
+
- 汇总:优先用更粗时间粒度(`--group-unit week/month`)或更窄时间窗,而不是直接按日拉满大范围
|
|
168
|
+
- Top10:需要维度分布时,加 `--limit 10`(如 `-g activation_channel --limit 10`)
|
|
169
|
+
- 维度分组:一次只下钻一个维度,只看 **Top10**(`--limit 10`);需要更多再迭代
|
|
170
|
+
- 按日明细:只在需要定位**异常日期**/用户明确要求**按日趋势**时使用;先用周/月趋势锁定区间,再切到 `day` 并缩小时间窗
|
|
171
|
+
- 需要完整明细:只在必须时才用 `--no-truncate`,并同时缩小时间范围/limit,避免上下文爆炸
|
|
172
|
+
|
|
160
173
|
## 数据截断规则
|
|
161
174
|
|
|
162
|
-
脚本**默认自动截断**,`_truncation`
|
|
175
|
+
脚本**默认自动截断**,`_truncation` 字段包含总行数与省略行数。TapDB API 会在结果末尾附带**汇总行**(分组字段为 `null`),脚本截断时会保留该汇总行。
|
|
163
176
|
|
|
164
177
|
| 场景 | 阈值 | 方式 |
|
|
165
178
|
|------|------|------|
|
|
@@ -169,7 +182,7 @@ python3 <SKILL_DIR>/scripts/tapdb_query.py raw /op/active '{"project_id":2588,"s
|
|
|
169
182
|
|
|
170
183
|
- 不加 `--all-retention` 通常仅返回 `DR1-DR30 + DR60/90/120/150/180`;加上后会额外补齐 `DR31-DR59`(及对应 `_newDevice/_rate` 列)。
|
|
171
184
|
|
|
172
|
-
-
|
|
185
|
+
- 汇总数据以 API 返回的汇总行为准(分组字段为 `null`),不要在本地再计算汇总
|
|
173
186
|
- 多次查询:每次先提取关键数值再下一个查询,不累积原始数据
|
|
174
187
|
- 版本分布:一次性查询,不按天拆分(除非用户要求"按日趋势")
|
|
175
188
|
- 需完整数据加 `--no-truncate`
|
|
@@ -179,6 +192,7 @@ python3 <SKILL_DIR>/scripts/tapdb_query.py raw /op/active '{"project_id":2588,"s
|
|
|
179
192
|
- **货币转换**:默认将金额转为人民币(CNY)。通过 `--exchange-to-currency` 可切换目标货币(如 USD/JPY/EUR),传 `none` 禁用转换返回原始金额。影响 income/source/user_value/life_cycle/ad_monet 等含金额字段的接口
|
|
180
193
|
- `filters` 即使无条件也必须传 `[]`,否则 500
|
|
181
194
|
- `group` 必传。retention/source 不传 `-g` 时自动用 `activation_time`,其他默认 `time`
|
|
195
|
+
- `life_cycle` 在 `-g activation_os` 时仅支持 `--quota payment_cvs_rate`(其他 quota 会 500)
|
|
182
196
|
- filters 格式: `{"col_name":"...", "data_type":"string|number|bool|date", "calculate_symbol":"include|un_include", "ftv":[...]}`
|
|
183
197
|
- 各接口维度不同,不支持的维度返回 500。**先 `describe` 确认**
|
|
184
198
|
|
|
@@ -83,21 +83,7 @@ _HEAD = 15
|
|
|
83
83
|
_TAIL = 15
|
|
84
84
|
|
|
85
85
|
|
|
86
|
-
def
|
|
87
|
-
"""Compute min/max/avg for first N numeric columns."""
|
|
88
|
-
if not rows:
|
|
89
|
-
return {}
|
|
90
|
-
keys = [k for k, v in rows[0].items() if isinstance(v, (int, float))][:limit]
|
|
91
|
-
out = {}
|
|
92
|
-
for k in keys:
|
|
93
|
-
vals = [r[k] for r in rows if isinstance(r.get(k), (int, float))]
|
|
94
|
-
if vals:
|
|
95
|
-
out[k] = {"min": min(vals), "max": max(vals),
|
|
96
|
-
"avg": round(sum(vals) / len(vals), 2)}
|
|
97
|
-
return out
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def _slim_rows(rows, cmd_type):
|
|
86
|
+
def _slim_rows(rows, cmd_type, group_alias=None):
|
|
101
87
|
"""Truncate row count; time-series keeps head+tail, others keep head."""
|
|
102
88
|
if not rows:
|
|
103
89
|
return rows, None
|
|
@@ -110,7 +96,24 @@ def _slim_rows(rows, cmd_type):
|
|
|
110
96
|
rows = rows[:cap] + [f"... 省略 {omit} 条 ..."]
|
|
111
97
|
return rows, {"total_rows": total, "omitted": omit}
|
|
112
98
|
|
|
113
|
-
|
|
99
|
+
total = len(rows)
|
|
100
|
+
|
|
101
|
+
# TapDB API often appends a summary row at the end (group field is null).
|
|
102
|
+
# Keep that row (API-provided) when truncating.
|
|
103
|
+
summary_row = None
|
|
104
|
+
group_key = group_alias
|
|
105
|
+
if group_key and isinstance(rows[-1], dict) and group_key in rows[-1] and rows[-1].get(group_key) is None:
|
|
106
|
+
summary_row = rows[-1]
|
|
107
|
+
rows = rows[:-1]
|
|
108
|
+
else:
|
|
109
|
+
# Best-effort fallback when caller didn't provide group_alias.
|
|
110
|
+
for time_key in _TIME_FIELDS:
|
|
111
|
+
if time_key in rows[-1] and rows[-1].get(time_key) is None:
|
|
112
|
+
summary_row = rows[-1]
|
|
113
|
+
rows = rows[:-1]
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
has_time = bool(rows and (_TIME_FIELDS & set(rows[0].keys())))
|
|
114
117
|
if cmd_type == "whale_user":
|
|
115
118
|
cap = _MAX_WHALE_ROWS
|
|
116
119
|
elif has_time:
|
|
@@ -118,17 +121,19 @@ def _slim_rows(rows, cmd_type):
|
|
|
118
121
|
else:
|
|
119
122
|
cap = _MAX_GROUP_ROWS
|
|
120
123
|
if len(rows) <= cap:
|
|
124
|
+
if summary_row is not None:
|
|
125
|
+
return rows + [summary_row], None
|
|
121
126
|
return rows, None
|
|
122
127
|
|
|
123
|
-
total = len(rows)
|
|
124
|
-
summary = _numeric_summary(rows)
|
|
125
128
|
if has_time:
|
|
126
|
-
omit =
|
|
129
|
+
omit = len(rows) - _HEAD - _TAIL
|
|
127
130
|
rows = rows[:_HEAD] + [{"_": f"... 省略 {omit} 行 ..."}] + rows[-_TAIL:]
|
|
128
131
|
else:
|
|
129
|
-
omit =
|
|
132
|
+
omit = len(rows) - cap
|
|
130
133
|
rows = rows[:cap] + [{"_": f"... 省略 {omit} 行 ..."}]
|
|
131
|
-
|
|
134
|
+
if summary_row is not None:
|
|
135
|
+
rows.append(summary_row)
|
|
136
|
+
return rows, {"total_rows": total, "omitted": omit}
|
|
132
137
|
|
|
133
138
|
|
|
134
139
|
def _list_of_lists_to_dicts(lol):
|
|
@@ -177,7 +182,7 @@ def _rebuild(resp, path, rows, info):
|
|
|
177
182
|
return result
|
|
178
183
|
|
|
179
184
|
|
|
180
|
-
def truncate_response(resp, cmd_type=None):
|
|
185
|
+
def truncate_response(resp, cmd_type=None, group_alias=None):
|
|
181
186
|
"""Truncate API response to save context window tokens."""
|
|
182
187
|
if not resp or (isinstance(resp, dict) and resp.get("error")):
|
|
183
188
|
return resp
|
|
@@ -186,7 +191,7 @@ def truncate_response(resp, cmd_type=None):
|
|
|
186
191
|
return resp
|
|
187
192
|
|
|
188
193
|
info = {}
|
|
189
|
-
rows, row_info = _slim_rows(rows, cmd_type)
|
|
194
|
+
rows, row_info = _slim_rows(rows, cmd_type, group_alias=group_alias)
|
|
190
195
|
if row_info:
|
|
191
196
|
info.update(row_info)
|
|
192
197
|
if not info:
|
|
@@ -466,7 +471,7 @@ ENDPOINT_CAPS = {
|
|
|
466
471
|
"lang_system": "过滤值会自动翻译,可直接用展示名",
|
|
467
472
|
},
|
|
468
473
|
"group_notes": {
|
|
469
|
-
"activation_os": "
|
|
474
|
+
"activation_os": "仅当 quota=payment_cvs_rate 时支持;(建议改用 time/activation_time)",
|
|
470
475
|
},
|
|
471
476
|
"unsupported_note": "分组仅支持 time/activation_time/activation_os;过滤不支持 activation_app_version/first_server/current_server/utmsrc/login_type/payment_source",
|
|
472
477
|
},
|
|
@@ -572,7 +577,10 @@ def do_query(args, endpoint_path, extra=None, cmd_type=None):
|
|
|
572
577
|
url = f"{base_url}/mcp/op/{endpoint_path}"
|
|
573
578
|
result = http_request("POST", url, {"MCP-KEY": key}, body)
|
|
574
579
|
if not getattr(args, "no_truncate", False):
|
|
575
|
-
|
|
580
|
+
group_alias = None
|
|
581
|
+
if isinstance(body.get("group"), dict):
|
|
582
|
+
group_alias = body["group"].get("col_alias")
|
|
583
|
+
result = truncate_response(result, cmd_type or endpoint_path, group_alias=group_alias)
|
|
576
584
|
output(result)
|
|
577
585
|
|
|
578
586
|
|
|
@@ -633,6 +641,13 @@ def cmd_whale_user(args):
|
|
|
633
641
|
|
|
634
642
|
|
|
635
643
|
def cmd_life_cycle(args):
|
|
644
|
+
if getattr(args, "group_by", None) == "activation_os" and getattr(args, "quota", None) != "payment_cvs_rate":
|
|
645
|
+
output({
|
|
646
|
+
"error": True,
|
|
647
|
+
"message": "life_cycle 接口在 -g activation_os 时仅支持 --quota payment_cvs_rate(其他 quota 会 500)",
|
|
648
|
+
"hint": "请改用 --quota payment_cvs_rate 或改用 -g time / -g activation_time",
|
|
649
|
+
})
|
|
650
|
+
return
|
|
636
651
|
do_query(args, "life_cycle", {"quota": args.quota})
|
|
637
652
|
|
|
638
653
|
|