npm - @shadowforge0/aquifer-memory - Versions diffs - 1.5.9 → 1.6.0 - Mend

@shadowforge0/aquifer-memory 1.5.9 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/.env.example +23 -0
package/README.md +96 -73
package/README_CN.md +659 -0
package/README_TW.md +680 -0
package/aquifer.config.example.json +34 -0
package/consumers/claude-code.js +11 -11
package/consumers/cli.js +374 -39
package/consumers/codex-handoff.js +152 -0
package/consumers/codex.js +1549 -0
package/consumers/default/daily-entries.js +23 -4
package/consumers/default/index.js +2 -2
package/consumers/default/prompts/summary.js +6 -6
package/consumers/mcp.js +131 -7
package/consumers/openclaw-ext/index.js +0 -1
package/consumers/openclaw-plugin.js +44 -4
package/consumers/shared/config.js +28 -0
package/consumers/shared/factory.js +2 -0
package/consumers/shared/ingest.js +1 -1
package/consumers/shared/normalize.js +14 -3
package/consumers/shared/recall-format.js +53 -0
package/consumers/shared/summary-parser.js +151 -0
package/core/aquifer.js +384 -18
package/core/finalization-review.js +319 -0
package/core/insights.js +210 -58
package/core/mcp-manifest.js +69 -2
package/core/memory-bootstrap.js +188 -0
package/core/memory-consolidation.js +1236 -0
package/core/memory-promotion.js +544 -0
package/core/memory-recall.js +247 -0
package/core/memory-records.js +581 -0
package/core/memory-safety-gate.js +224 -0
package/core/session-finalization.js +350 -0
package/core/storage.js +456 -2
package/docs/getting-started.md +99 -0
package/docs/postprocess-contract.md +2 -2
package/docs/setup.md +51 -2
package/package.json +31 -9
package/pipeline/normalize/adapters/codex.js +106 -0
package/pipeline/normalize/detect.js +3 -2
package/schema/001-base.sql +3 -0
package/schema/007-v1-foundation.sql +273 -0
package/schema/008-session-finalizations.sql +50 -0
package/schema/009-v1-assertion-plane.sql +193 -0
package/schema/010-v1-finalization-review.sql +160 -0
package/schema/011-v1-compaction-claim.sql +46 -0
package/schema/012-v1-compaction-lease.sql +39 -0
package/schema/013-v1-compaction-lineage.sql +193 -0
package/scripts/backfill-canonical-key.js +250 -0
package/scripts/codex-recovery.js +532 -0
package/consumers/miranda/context-inject.js +0 -119
package/consumers/miranda/daily-entries.js +0 -224
package/consumers/miranda/index.js +0 -364
package/consumers/miranda/instance.js +0 -55
package/consumers/miranda/llm.js +0 -99
package/consumers/miranda/profile.json +0 -145
package/consumers/miranda/prompts/summary.js +0 -303
package/consumers/miranda/recall-format.js +0 -76
package/consumers/miranda/render-daily-md.js +0 -186
package/consumers/miranda/workspace-files.js +0 -91
package/scripts/drop-entity-state-history.sql +0 -17
package/scripts/drop-insights.sql +0 -12
package/scripts/install-openclaw.sh +0 -59
package/scripts/queries.json +0 -45
package/scripts/retro-recall-bench.js +0 -409
package/scripts/sample-bench-queries.sql +0 -75

package/scripts/retro-recall-bench.js DELETED Viewed

@@ -1,409 +0,0 @@
-#!/usr/bin/env node
-'use strict';
-/**
- * Retro recall bench — runs the same query set across 6 pipelines and
- * reports nDCG@5 / MRR / latency / empty-rate. Designed for the post-1.3.0
- * Phase 0 audit; see ~/.claude/develop-runs/20260419-142432-aquifer-memory-routes/spec.md.
- *
- * Pipelines:
- *   fts-simple       storage.searchSessions(ftsConfig='simple')
- *   fts-zhcfg        storage.searchSessions(ftsConfig='zhcfg')   [skip if zhcfg missing]
- *   summary-vector   storage.searchSummaryEmbeddings
- *   turn-only        storage.searchTurnEmbeddings
- *   hybrid           aquifer.recall(mode='hybrid', rerank disabled)
- *   hybrid-rerank    aquifer.recall(mode='hybrid', rerank forced)
- *
- * Usage:
- *   node scripts/retro-recall-bench.js \
- *     --query-set queries.json \
- *     [--judgements judgements.json] \
- *     [--output report.json] \
- *     [--markdown summary.md] \
- *     [--pipelines fts-simple,fts-zhcfg,summary-vector,turn-only,hybrid,hybrid-rerank] \
- *     [--limit 5] [--warmup 1] [--schema miranda] [--tenant-id default]
- *
- * env:
- *   DATABASE_URL          required
- *   AQUIFER_SCHEMA        default 'miranda'
- *   EMBED_PROVIDER + key  required for vector pipelines
- *   AQUIFER_LLM_PROVIDER  unused here (no enrich)
- *   OPENROUTER_API_KEY    required for hybrid-rerank pipeline
- */
-const fs = require('fs');
-const { Pool } = require('pg');
-const aquiferIndex = require('..');
-const storage = require('../core/storage');
-const { createEmbedder } = require('..');
-const ALL_PIPELINES = [
-  'fts-simple',
-  'fts-zhcfg',
-  'summary-vector',
-  'turn-only',
-  'hybrid',
-  'hybrid-rerank',
-];
-function parseArgs(argv) {
-  const args = {
-    querySet: null,
-    judgements: null,
-    output: null,
-    markdown: null,
-    pipelines: ALL_PIPELINES,
-    limit: 5,
-    warmup: 1,
-    schema: process.env.AQUIFER_SCHEMA || 'miranda',
-    tenantId: process.env.AQUIFER_TENANT_ID || 'default',
-    rerankTopK: 20,
-  };
-  for (let i = 0; i < argv.length; i++) {
-    const a = argv[i];
-    const v = argv[i + 1];
-    if (a === '--query-set') { args.querySet = v; i++; }
-    else if (a === '--judgements') { args.judgements = v; i++; }
-    else if (a === '--output') { args.output = v; i++; }
-    else if (a === '--markdown') { args.markdown = v; i++; }
-    else if (a === '--pipelines') { args.pipelines = v.split(',').map(s => s.trim()).filter(Boolean); i++; }
-    else if (a === '--limit') { args.limit = parseInt(v, 10); i++; }
-    else if (a === '--warmup') { args.warmup = parseInt(v, 10); i++; }
-    else if (a === '--schema') { args.schema = v; i++; }
-    else if (a === '--tenant-id') { args.tenantId = v; i++; }
-    else if (a === '--rerank-topk') { args.rerankTopK = parseInt(v, 10); i++; }
-    else if (a === '-h' || a === '--help') { args.help = true; }
-  }
-  return args;
-}
-function printHelp() {
-  console.log(fs.readFileSync(__filename, 'utf8').split('\n').slice(0, 36).join('\n'));
-}
-function detectFtsConfigsAvailable(pool) {
-  return pool.query(`SELECT cfgname FROM pg_ts_config WHERE cfgname IN ('simple','zhcfg')`)
-    .then(r => new Set(r.rows.map(row => row.cfgname)));
-}
-async function withLatency(fn) {
-  const t0 = process.hrtime.bigint();
-  let result; let error = null;
-  try { result = await fn(); } catch (e) { error = e; }
-  const t1 = process.hrtime.bigint();
-  return { latencyMs: Number(t1 - t0) / 1e6, result, error };
-}
-function normalizeHits(rows, scoreKey) {
-  return rows.map((r, i) => ({
-    rank: i + 1,
-    sessionId: r.session_id || r.sessionId,
-    sessionRowId: r.id || r.session_row_id || r.sessionRowId || null,
-    agentId: r.agent_id || r.agentId || null,
-    score: r[scoreKey] ?? null,
-    summaryDistance: r.distance ?? null,
-    turnDistance: r.turn_distance ?? null,
-  }));
-}
-function normalizeAquiferHits(rows) {
-  return rows.map((r, i) => ({
-    rank: i + 1,
-    sessionId: r.sessionId,
-    sessionRowId: null,
-    agentId: r.agentId,
-    score: r.score ?? null,
-    summaryDistance: r._debug?.hybridScore ?? null,
-    turnDistance: null,
-    rerankApplied: r._debug?.rerankApplied ?? false,
-    rerankReason: r._debug?.rerankReason ?? null,
-  }));
-}
-async function runPipeline(name, ctx, query) {
-  const { pool, schema, tenantId, limit, queryVec, aquifer, ftsAvailable } = ctx;
-  switch (name) {
-    case 'fts-simple': {
-      if (!ftsAvailable.has('simple')) return { skipped: true, reason: 'simple tsconfig missing' };
-      return withLatency(async () => {
-        const rows = await storage.searchSessions(pool, query.text, {
-          schema, tenantId, agentId: query.agentId, limit, ftsConfig: 'simple',
-        });
-        return normalizeHits(rows, 'fts_rank');
-      });
-    }
-    case 'fts-zhcfg': {
-      if (!ftsAvailable.has('zhcfg')) return { skipped: true, reason: 'zhcfg tsconfig missing — install zhparser' };
-      return withLatency(async () => {
-        const rows = await storage.searchSessions(pool, query.text, {
-          schema, tenantId, agentId: query.agentId, limit, ftsConfig: 'zhcfg',
-        });
-        return normalizeHits(rows, 'fts_rank');
-      });
-    }
-    case 'summary-vector': {
-      if (!queryVec) return { skipped: true, reason: 'no embed provider' };
-      return withLatency(async () => {
-        const { rows } = await storage.searchSummaryEmbeddings(pool, {
-          schema, tenantId, queryVec, agentId: query.agentId, limit,
-        });
-        return normalizeHits(rows, 'distance');
-      });
-    }
-    case 'turn-only': {
-      if (!queryVec) return { skipped: true, reason: 'no embed provider' };
-      return withLatency(async () => {
-        const { rows } = await storage.searchTurnEmbeddings(pool, {
-          schema, tenantId, queryVec, agentId: query.agentId, limit,
-        });
-        return normalizeHits(rows, 'turn_distance');
-      });
-    }
-    case 'hybrid': {
-      return withLatency(async () => {
-        const rows = await aquifer.recall(query.text, {
-          agentId: query.agentId, limit, mode: 'hybrid', rerank: false,
-        });
-        return normalizeAquiferHits(rows);
-      });
-    }
-    case 'hybrid-rerank': {
-      return withLatency(async () => {
-        const rows = await aquifer.recall(query.text, {
-          agentId: query.agentId, limit, mode: 'hybrid', rerank: true,
-        });
-        return normalizeAquiferHits(rows);
-      });
-    }
-    default:
-      return { skipped: true, reason: `unknown pipeline ${name}` };
-  }
-}
-function dcg(rels) {
-  return rels.reduce((acc, rel, i) => acc + (Math.pow(2, rel) - 1) / Math.log2(i + 2), 0);
-}
-function nDcgAtK(judgedHits, k) {
-  const at = judgedHits.slice(0, k);
-  const ideal = [...judgedHits].sort((a, b) => b - a).slice(0, k);
-  const idcg = dcg(ideal);
-  if (idcg === 0) return null;
-  return dcg(at) / idcg;
-}
-function reciprocalRank(judgedHits) {
-  const idx = judgedHits.findIndex(r => r > 0);
-  return idx < 0 ? 0 : 1 / (idx + 1);
-}
-function computeMetrics(runs, judgements, k) {
-  const judgeMap = new Map();
-  for (const j of judgements) {
-    judgeMap.set(`${j.queryId}::${j.sessionId}`, j.relevance);
-  }
-  const byPipeline = {};
-  for (const run of runs) {
-    if (run.skipped || run.error) continue;
-    const arr = byPipeline[run.pipeline] || (byPipeline[run.pipeline] = { judged: [], latency: [], empty: 0, total: 0, judgeable: 0 });
-    arr.total++;
-    arr.latency.push(run.latencyMs);
-    if (!run.hits || run.hits.length === 0) arr.empty++;
-    const rels = (run.hits || []).map(h => judgeMap.get(`${run.queryId}::${h.sessionId}`) ?? 0);
-    if (rels.some(r => r > 0)) arr.judgeable++;
-    arr.judged.push(rels);
-  }
-  const result = [];
-  for (const [pipeline, agg] of Object.entries(byPipeline)) {
-    const ndcgs = agg.judged.map(rels => nDcgAtK(rels, k)).filter(v => v !== null);
-    const mrrs = agg.judged.map(reciprocalRank);
-    const lat = agg.latency.slice().sort((a, b) => a - b);
-    const p = (frac) => lat.length === 0 ? null : lat[Math.min(lat.length - 1, Math.floor(frac * lat.length))];
-    result.push({
-      pipeline,
-      count: agg.total,
-      nDCG5: ndcgs.length ? ndcgs.reduce((a, b) => a + b, 0) / ndcgs.length : null,
-      MRR: mrrs.length ? mrrs.reduce((a, b) => a + b, 0) / mrrs.length : null,
-      latencyMs: {
-        mean: lat.length ? lat.reduce((a, b) => a + b, 0) / lat.length : null,
-        p50: p(0.5),
-        p95: p(0.95),
-      },
-      emptyResultRate: agg.total ? agg.empty / agg.total : 0,
-      judgeableRate: agg.total ? agg.judgeable / agg.total : 0,
-    });
-  }
-  return result;
-}
-function renderMarkdown(report, k) {
-  const lines = [];
-  lines.push(`# Aquifer Retro Recall Bench`);
-  lines.push('');
-  lines.push(`- Generated: ${report.meta.generatedAt}`);
-  lines.push(`- Schema: \`${report.meta.schema}\` / Tenant: \`${report.meta.tenantId}\``);
-  lines.push(`- Queries: ${report.queries.length} (warmup ${report.meta.warmup} excluded from metrics)`);
-  lines.push(`- Pipelines: ${report.meta.pipelines.join(', ')}`);
-  lines.push('');
-  lines.push(`## Overall (top ${k})`);
-  lines.push('| Pipeline | nDCG@5 | MRR | Mean ms | p50 | p95 | Empty% | Judgeable% | N |');
-  lines.push('|---|---|---|---|---|---|---|---|---|');
-  for (const m of report.metrics.overall) {
-    lines.push(`| ${m.pipeline} | ${fmt(m.nDCG5)} | ${fmt(m.MRR)} | ${fmtMs(m.latencyMs.mean)} | ${fmtMs(m.latencyMs.p50)} | ${fmtMs(m.latencyMs.p95)} | ${pct(m.emptyResultRate)} | ${pct(m.judgeableRate)} | ${m.count} |`);
-  }
-  if (report.metrics.zhMixed) {
-    lines.push('');
-    lines.push(`## ZH+Mixed subset (top ${k})`);
-    lines.push('| Pipeline | nDCG@5 | MRR | Empty% | N |');
-    lines.push('|---|---|---|---|---|');
-    for (const m of report.metrics.zhMixed) {
-      lines.push(`| ${m.pipeline} | ${fmt(m.nDCG5)} | ${fmt(m.MRR)} | ${pct(m.emptyResultRate)} | ${m.count} |`);
-    }
-  }
-  if (report.metrics.en) {
-    lines.push('');
-    lines.push(`## EN subset (top ${k})`);
-    lines.push('| Pipeline | nDCG@5 | MRR | Empty% | N |');
-    lines.push('|---|---|---|---|---|');
-    for (const m of report.metrics.en) {
-      lines.push(`| ${m.pipeline} | ${fmt(m.nDCG5)} | ${fmt(m.MRR)} | ${pct(m.emptyResultRate)} | ${m.count} |`);
-    }
-  }
-  if (report.skipped.length > 0) {
-    lines.push('');
-    lines.push('## Skipped pipelines');
-    for (const s of report.skipped) lines.push(`- \`${s.pipeline}\`: ${s.reason}`);
-  }
-  return lines.join('\n') + '\n';
-}
-function fmt(v) { return (v === null || v === undefined) ? '—' : v.toFixed(3); }
-function fmtMs(v) { return (v === null || v === undefined) ? '—' : v.toFixed(1); }
-function pct(v) { return (v === null || v === undefined) ? '—' : `${(v * 100).toFixed(0)}%`; }
-async function main() {
-  const args = parseArgs(process.argv.slice(2));
-  if (args.help || !args.querySet) { printHelp(); process.exit(args.help ? 0 : 2); }
-  const querySet = JSON.parse(fs.readFileSync(args.querySet, 'utf8'));
-  const queries = querySet.queries || [];
-  if (queries.length === 0) { console.error('Empty query set'); process.exit(2); }
-  const judgements = args.judgements
-    ? (JSON.parse(fs.readFileSync(args.judgements, 'utf8')).judgements || [])
-    : [];
-  const dbUrl = process.env.DATABASE_URL || process.env.AQUIFER_DB_URL;
-  if (!dbUrl) { console.error('DATABASE_URL is required'); process.exit(2); }
-  const pool = new Pool({ connectionString: dbUrl });
-  const ftsAvailable = await detectFtsConfigsAvailable(pool);
-  const embedFn = (() => {
-    try {
-      const e = createEmbedder({});  // autodetect via EMBED_PROVIDER
-      return (texts) => e.embedBatch(texts);
-    } catch (err) {
-      console.warn(`[bench] embed unavailable: ${err.message} — vector pipelines will skip`);
-      return null;
-    }
-  })();
-  const aquifer = aquiferIndex.createAquifer({
-    db: pool,
-    schema: args.schema,
-    tenantId: args.tenantId,
-    embed: embedFn ? { fn: embedFn } : undefined,
-    rerank: process.env.OPENROUTER_API_KEY
-      ? { provider: 'openrouter', openrouterApiKey: process.env.OPENROUTER_API_KEY, topK: args.rerankTopK, autoTrigger: { enabled: false } }
-      : null,
-  });
-  const ctx = {
-    pool, schema: args.schema, tenantId: args.tenantId,
-    limit: args.limit, queryVec: null, aquifer, ftsAvailable,
-  };
-  const runs = [];
-  const skipped = new Set();
-  // Warmup: just run hybrid once to prime the pool.
-  for (let w = 0; w < args.warmup; w++) {
-    try { await aquifer.recall(queries[0].text, { limit: args.limit }); } catch { /* ignore */ }
-  }
-  for (const q of queries) {
-    let queryVec = null;
-    if (embedFn) {
-      try { queryVec = (await embedFn([q.text]))[0]; }
-      catch (err) { console.warn(`[bench] embed failed for "${q.id}": ${err.message}`); }
-    }
-    ctx.queryVec = queryVec;
-    for (const pipeline of args.pipelines) {
-      const r = await runPipeline(pipeline, ctx, q);
-      if (r.skipped) {
-        skipped.add(JSON.stringify({ pipeline, reason: r.reason }));
-        continue;
-      }
-      runs.push({
-        queryId: q.id,
-        pipeline,
-        latencyMs: r.latencyMs,
-        empty: !r.result || r.result.length === 0,
-        error: r.error ? { code: r.error.code || 'ERR', message: r.error.message } : null,
-        hits: r.result || [],
-      });
-    }
-  }
-  await aquifer.close?.().catch(() => {});
-  await pool.end().catch(() => {});
-  const overallRuns = runs;
-  const zhMixedQueries = new Set(queries.filter(q => q.lang === 'zh' || q.lang === 'mixed').map(q => q.id));
-  const enQueries = new Set(queries.filter(q => q.lang === 'en').map(q => q.id));
-  const subset = (set) => runs.filter(r => set.has(r.queryId));
-  const report = {
-    meta: {
-      generatedAt: new Date().toISOString(),
-      schema: args.schema,
-      tenantId: args.tenantId,
-      limit: args.limit,
-      warmup: args.warmup,
-      pipelines: args.pipelines,
-      ftsConfigsAvailable: [...ftsAvailable],
-    },
-    queries,
-    judgements,
-    runs,
-    metrics: {
-      overall: computeMetrics(overallRuns, judgements, args.limit),
-      zhMixed: zhMixedQueries.size > 0 ? computeMetrics(subset(zhMixedQueries), judgements, args.limit) : null,
-      en: enQueries.size > 0 ? computeMetrics(subset(enQueries), judgements, args.limit) : null,
-    },
-    skipped: [...skipped].map(s => JSON.parse(s)),
-  };
-  const outPath = args.output || `bench-report-${Date.now()}.json`;
-  fs.writeFileSync(outPath, JSON.stringify(report, null, 2));
-  console.log(`Wrote ${outPath}`);
-  const md = renderMarkdown(report, args.limit);
-  const mdPath = args.markdown || outPath.replace(/\.json$/, '.md');
-  fs.writeFileSync(mdPath, md);
-  console.log(`Wrote ${mdPath}`);
-  if (judgements.length === 0) {
-    console.log('\nNo judgements provided — metrics are coverage/latency only.');
-    console.log('Edit the JSON output to add judgements like:');
-    console.log('  "judgements": [{"queryId":"q-001","sessionId":"sess_abc","relevance":3}, ...]');
-    console.log('then re-run with --judgements <path>.');
-  }
-}
-main().catch(err => {
-  console.error('[bench] fatal:', err.stack || err.message);
-  process.exit(1);
-});

package/scripts/sample-bench-queries.sql DELETED Viewed

@@ -1,75 +0,0 @@
--- Sample real user "first question" turns from sessions for retro recall bench.
--- Output: 30 rows balanced across agent (cc/main/life) × language (zh/en/mixed).
--- Usage:
---   psql $DATABASE_URL -f scripts/sample-bench-queries.sql --csv > queries.csv
---   then convert to JSON via:
---     node -e "const fs=require('fs');const lines=fs.readFileSync('queries.csv','utf8').trim().split('\n').slice(1);const out={version:1,queries:lines.map((l,i)=>{const[id,sid,ag,src,lang,qt]=l.split(','); return {id:'q-sql-'+(i+1),sessionRowId:Number(id),sessionId:sid,agentId:ag,source:src,sourceKind:'sql-sampled',lang,text:qt.replace(/^\"|\"$/g,'').replace(/\"\"/g,'\"')}})};fs.writeFileSync('queries.json',JSON.stringify(out,null,2))"
--- Override with `psql -v schema=aquifer -f sample-bench-queries.sql`
-\if :{?schema}
-\else
-  \set schema 'miranda'
-\endif
-WITH raw_turns AS (
-  SELECT
-    s.id AS session_row_id,
-    s.session_id,
-    s.agent_id,
-    s.source,
-    s.started_at,
-    m.ordinality AS turn_ordinal,
-    m.msg->>'role' AS role,
-    m.msg->>'content' AS content
-  FROM :"schema".sessions s
-  CROSS JOIN LATERAL jsonb_array_elements(
-    COALESCE(s.messages->'normalized', s.messages)
-  ) WITH ORDINALITY AS m(msg, ordinality)
-  WHERE s.agent_id IN ('main', 'life', 'cc')
-    AND s.user_count > 0
-),
-question_turns AS (
-  SELECT *,
-    CASE
-      WHEN content ~ '[\u4e00-\u9fff]' AND content ~ '[A-Za-z]' THEN 'mixed'
-      WHEN content ~ '[\u4e00-\u9fff]' THEN 'zh'
-      WHEN content ~ '[A-Za-z]' THEN 'en'
-      ELSE 'other'
-    END AS lang_bucket,
-    row_number() OVER (PARTITION BY session_row_id ORDER BY turn_ordinal) AS seq_in_session
-  FROM raw_turns
-  WHERE role = 'user'
-    AND content IS NOT NULL
-    AND length(content) BETWEEN 4 AND 200
-    AND (content ~ '[?？]' OR content ~ '(嗎|呢|怎麼|如何|why|how|what|can you|could you)')
-),
-first_questions AS (
-  SELECT *
-  FROM question_turns
-  WHERE seq_in_session = 1
-    AND lang_bucket IN ('mixed', 'zh', 'en')
-),
-balanced AS (
-  SELECT *,
-    row_number() OVER (PARTITION BY agent_id, lang_bucket ORDER BY random()) AS bucket_rn
-  FROM first_questions
-)
-SELECT
-  session_row_id,
-  session_id,
-  agent_id,
-  source,
-  lang_bucket AS lang,
-  content AS query_text
-FROM balanced
-WHERE
-  (agent_id = 'main' AND lang_bucket = 'mixed' AND bucket_rn <= 4) OR
-  (agent_id = 'main' AND lang_bucket = 'zh'    AND bucket_rn <= 3) OR
-  (agent_id = 'main' AND lang_bucket = 'en'    AND bucket_rn <= 2) OR
-  (agent_id = 'life' AND lang_bucket = 'mixed' AND bucket_rn <= 3) OR
-  (agent_id = 'life' AND lang_bucket = 'zh'    AND bucket_rn <= 2) OR
-  (agent_id = 'life' AND lang_bucket = 'en'    AND bucket_rn <= 2) OR
-  (agent_id = 'cc'   AND lang_bucket = 'mixed' AND bucket_rn <= 4) OR
-  (agent_id = 'cc'   AND lang_bucket = 'zh'    AND bucket_rn <= 3) OR
-  (agent_id = 'cc'   AND lang_bucket = 'en'    AND bucket_rn <= 2)
-ORDER BY agent_id, lang;