bulltrackers-module 1.0.993 → 1.0.995
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/api-v3/routes/popular-investors.js +4 -3
- package/functions/api-v3/routes/tutorial.js +74 -5
- package/functions/api-v3/services/WriteService.js +10 -0
- package/functions/core/utils/analytics_registry.js +116 -0
- package/functions/maintenance/analytics-load-firestore-to-bq/ANALYTICS_SYSTEM_DOCUMENTATION.md +49 -7
- package/functions/maintenance/analytics-load-firestore-to-bq/README.md +6 -0
- package/functions/maintenance/analytics-load-firestore-to-bq/index.js +55 -215
- package/functions/maintenance/analytics-load-firestore-to-bq/sourceConfig.js +268 -0
- package/package.json +1 -1
|
@@ -9,6 +9,7 @@ const { requireFirebaseAuth } = require('../middleware/auth');
|
|
|
9
9
|
const { requireVerifiedUser, sanitizeCid } = require('../middleware/identity');
|
|
10
10
|
|
|
11
11
|
const { EtoroApiService } = require('../services/EtoroApiService');
|
|
12
|
+
const { recordAnalyticsWrite } = require('../../core/utils/analytics_registry');
|
|
12
13
|
|
|
13
14
|
const router = express.Router();
|
|
14
15
|
|
|
@@ -137,7 +138,7 @@ router.get('/search', async (req, res, next) => {
|
|
|
137
138
|
query: validated.query,
|
|
138
139
|
resultCount: results.length,
|
|
139
140
|
createdAt: new Date()
|
|
140
|
-
}).catch(() => {});
|
|
141
|
+
}).then(() => recordAnalyticsWrite(db, 'analytics_events_search_query').catch(() => {})).catch(() => {});
|
|
141
142
|
}
|
|
142
143
|
|
|
143
144
|
res.json({ success: true, count: results.length, data: results });
|
|
@@ -295,7 +296,7 @@ router.post('/request-addition', async (req, res, next) => {
|
|
|
295
296
|
requestedAt: new Date(),
|
|
296
297
|
status: 'rejected_not_pi',
|
|
297
298
|
etoroCid: info.realCID || info.gcid || null
|
|
298
|
-
}).catch(() => {});
|
|
299
|
+
}).then(() => recordAnalyticsWrite(db, 'pi_addition_requests').catch(() => {})).catch(() => {});
|
|
299
300
|
}
|
|
300
301
|
|
|
301
302
|
return res.status(400).json({
|
|
@@ -355,7 +356,7 @@ router.post('/request-addition', async (req, res, next) => {
|
|
|
355
356
|
requestedAt: new Date(),
|
|
356
357
|
status: 'accepted',
|
|
357
358
|
isPi: true
|
|
358
|
-
}).catch(() => {});
|
|
359
|
+
}).then(() => recordAnalyticsWrite(db, 'pi_addition_requests').catch(() => {})).catch(() => {});
|
|
359
360
|
}
|
|
360
361
|
|
|
361
362
|
return res.json({
|
|
@@ -27,6 +27,59 @@ function getTutorialProgressRef(db, cid) {
|
|
|
27
27
|
.doc('progress');
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
+
async function getDerivedCompletions(db, cid) {
|
|
31
|
+
const completed = new Set();
|
|
32
|
+
const userDocRef = db.collection('SignedInUsers').doc(String(cid));
|
|
33
|
+
|
|
34
|
+
try {
|
|
35
|
+
const userDoc = await userDocRef.get();
|
|
36
|
+
if (userDoc.exists) {
|
|
37
|
+
completed.add('account_create');
|
|
38
|
+
}
|
|
39
|
+
} catch (e) {
|
|
40
|
+
// Non-fatal; ignore and continue.
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
const verificationRef = userDocRef.collection('verification').doc('data');
|
|
45
|
+
const verificationSnap = await verificationRef.get();
|
|
46
|
+
if (verificationSnap.exists) {
|
|
47
|
+
const vData = verificationSnap.data() || {};
|
|
48
|
+
if (vData.etoroCID || vData.etoroUsername || vData.accountSetupComplete) {
|
|
49
|
+
completed.add('account_link_etoro');
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
} catch (e) {
|
|
53
|
+
// Non-fatal; ignore and continue.
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const subSnap = await userDocRef
|
|
58
|
+
.collection('alert_subscriptions')
|
|
59
|
+
.limit(1)
|
|
60
|
+
.get();
|
|
61
|
+
if (!subSnap.empty) {
|
|
62
|
+
completed.add('alerts_create_threshold');
|
|
63
|
+
}
|
|
64
|
+
} catch (e) {
|
|
65
|
+
// Non-fatal; ignore and continue.
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
try {
|
|
69
|
+
const alertsSnap = await userDocRef
|
|
70
|
+
.collection('alerts')
|
|
71
|
+
.limit(1)
|
|
72
|
+
.get();
|
|
73
|
+
if (!alertsSnap.empty) {
|
|
74
|
+
completed.add('alerts_receive_notification');
|
|
75
|
+
}
|
|
76
|
+
} catch (e) {
|
|
77
|
+
// Non-fatal; ignore and continue.
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return completed;
|
|
81
|
+
}
|
|
82
|
+
|
|
30
83
|
async function getTutorialConfig(req) {
|
|
31
84
|
const config = (req.config && req.config.tutorial) || {};
|
|
32
85
|
return {
|
|
@@ -40,15 +93,31 @@ router.get('/progress', requireVerifiedUser, async (req, res, next) => {
|
|
|
40
93
|
const { db } = req.services;
|
|
41
94
|
const cid = req.targetUserId;
|
|
42
95
|
|
|
43
|
-
const
|
|
44
|
-
const
|
|
96
|
+
const ref = getTutorialProgressRef(db, cid);
|
|
97
|
+
const doc = await ref.get();
|
|
98
|
+
const data = doc.exists ? doc.data() || {} : {};
|
|
99
|
+
|
|
100
|
+
const persistedCompleted = new Set(
|
|
101
|
+
Array.isArray(data.completedTaskIds) ? data.completedTaskIds : []
|
|
102
|
+
);
|
|
103
|
+
const derivedCompleted = await getDerivedCompletions(db, cid);
|
|
104
|
+
const mergedCompleted = Array.from(
|
|
105
|
+
new Set([...persistedCompleted, ...derivedCompleted])
|
|
106
|
+
);
|
|
107
|
+
|
|
108
|
+
if (mergedCompleted.length !== persistedCompleted.size) {
|
|
109
|
+
await ref.set(
|
|
110
|
+
{
|
|
111
|
+
completedTaskIds: mergedCompleted,
|
|
112
|
+
},
|
|
113
|
+
{ merge: true }
|
|
114
|
+
);
|
|
115
|
+
}
|
|
45
116
|
|
|
46
117
|
res.json({
|
|
47
118
|
success: true,
|
|
48
119
|
data: {
|
|
49
|
-
completedTaskIds:
|
|
50
|
-
? data.completedTaskIds
|
|
51
|
-
: [],
|
|
120
|
+
completedTaskIds: mergedCompleted,
|
|
52
121
|
explicitlyDismissedTaskIds: Array.isArray(
|
|
53
122
|
data.explicitlyDismissedTaskIds
|
|
54
123
|
)
|
|
@@ -11,6 +11,7 @@ const {
|
|
|
11
11
|
} = require('../../shared/upstashRedis');
|
|
12
12
|
|
|
13
13
|
const { VALID_WATCHLIST_TAGS } = require('../constants');
|
|
14
|
+
const { recordAnalyticsWrite } = require('../../core/utils/analytics_registry');
|
|
14
15
|
|
|
15
16
|
const LIMITS = {
|
|
16
17
|
WATCHLIST_NAME_MAX: 200,
|
|
@@ -349,6 +350,7 @@ class WriteService {
|
|
|
349
350
|
|
|
350
351
|
await this._updateWatchlistMembership(validUserId, watchlistId, oldItems, firestoreData.items || []);
|
|
351
352
|
|
|
353
|
+
recordAnalyticsWrite(this.db, 'watchlists').catch(() => {});
|
|
352
354
|
firestoreSuccess = true;
|
|
353
355
|
} catch (error) {
|
|
354
356
|
console.error('[WriteService] Firestore write failed for watchlist:', error.message);
|
|
@@ -381,6 +383,7 @@ class WriteService {
|
|
|
381
383
|
// Delete from Firestore only. Hourly analytics-load job syncs watchlist state to BigQuery.
|
|
382
384
|
await docRef.delete();
|
|
383
385
|
|
|
386
|
+
recordAnalyticsWrite(this.db, 'watchlists').catch(() => {});
|
|
384
387
|
await this._updateWatchlistMembership(validUserId, validWatchlistId, oldItems, []);
|
|
385
388
|
}
|
|
386
389
|
|
|
@@ -438,6 +441,7 @@ class WriteService {
|
|
|
438
441
|
.doc(validUserId);
|
|
439
442
|
transaction.set(piReviewRef, review);
|
|
440
443
|
});
|
|
444
|
+
recordAnalyticsWrite(this.db, 'reviews').catch(() => {});
|
|
441
445
|
} catch (error) {
|
|
442
446
|
// Rethrow explicit errors, log others
|
|
443
447
|
if (error.message === 'Already reviewed') {
|
|
@@ -520,6 +524,7 @@ class WriteService {
|
|
|
520
524
|
viewedAt: now,
|
|
521
525
|
createdAt: now
|
|
522
526
|
});
|
|
527
|
+
recordAnalyticsWrite(this.db, 'analytics_events_pi_page_view').catch(() => {});
|
|
523
528
|
} catch (error) {
|
|
524
529
|
console.error('[WriteService] analytics_events pi_page_view write failed:', error.message);
|
|
525
530
|
}
|
|
@@ -544,6 +549,7 @@ class WriteService {
|
|
|
544
549
|
new_watchlist_id: this._validateId(newWatchlistId, 'New watchlist ID'),
|
|
545
550
|
createdAt: now
|
|
546
551
|
});
|
|
552
|
+
recordAnalyticsWrite(this.db, 'analytics_events_watchlist_copied').catch(() => {});
|
|
547
553
|
} catch (error) {
|
|
548
554
|
console.error('[WriteService] analytics_events watchlist_copied write failed:', error.message);
|
|
549
555
|
}
|
|
@@ -689,6 +695,7 @@ class WriteService {
|
|
|
689
695
|
...settings,
|
|
690
696
|
updatedAt: new Date()
|
|
691
697
|
}, { merge: true });
|
|
698
|
+
recordAnalyticsWrite(this.db, 'alert_subscriptions').catch(() => {});
|
|
692
699
|
}
|
|
693
700
|
|
|
694
701
|
/**
|
|
@@ -707,6 +714,7 @@ class WriteService {
|
|
|
707
714
|
.collection('alert_subscriptions')
|
|
708
715
|
.doc(validPiCid)
|
|
709
716
|
.delete();
|
|
717
|
+
recordAnalyticsWrite(this.db, 'alert_subscriptions').catch(() => {});
|
|
710
718
|
}
|
|
711
719
|
|
|
712
720
|
// =========================================================================
|
|
@@ -743,6 +751,8 @@ class WriteService {
|
|
|
743
751
|
|
|
744
752
|
await firestoreRef.set({ investors }, { merge: true });
|
|
745
753
|
|
|
754
|
+
recordAnalyticsWrite(this.db, 'pi_master_list').catch(() => {});
|
|
755
|
+
|
|
746
756
|
// Invalidate API cache for PI master list so new PI appears quickly.
|
|
747
757
|
if (this.redisEnabled) {
|
|
748
758
|
const cacheKey = `api:pi-master-list:${coll}:${docId}`;
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Analytics load registry: time-bucketed Firestore docs for "what to sync".
|
|
3
|
+
* API calls recordAnalyticsWrite(db, sourceKey) after writing analytics data; the loader
|
|
4
|
+
* reads period docs to discover which sources had writes in the run window.
|
|
5
|
+
* Documents are named by time period (e.g. YYYY-MM-DD-HH for 2h buckets) to avoid 1MB limit;
|
|
6
|
+
* each doc has sources: { [sourceKey]: lastWrittenAt } and expire_at for TTL (30 days).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const BUCKET_HOURS = parseInt(process.env.REGISTRY_BUCKET_HOURS || '2', 10) || 2;
|
|
10
|
+
const TTL_DAYS = parseInt(process.env.REGISTRY_TTL_DAYS || '30', 10) || 30;
|
|
11
|
+
const REGISTRY_COLLECTION = process.env.FIRESTORE_ANALYTICS_REGISTRY_COLLECTION || 'analytics_load_registry';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Get the period document ID for a given date (e.g. "2025-03-13-00" for 2h buckets).
|
|
15
|
+
* @param {Date} date - Reference time
|
|
16
|
+
* @returns {string} Period doc ID (YYYY-MM-DD-HH)
|
|
17
|
+
*/
|
|
18
|
+
function getPeriodDocId(date = new Date()) {
|
|
19
|
+
const y = date.getUTCFullYear();
|
|
20
|
+
const m = String(date.getUTCMonth() + 1).padStart(2, '0');
|
|
21
|
+
const d = String(date.getUTCDate()).padStart(2, '0');
|
|
22
|
+
const hour = date.getUTCHours();
|
|
23
|
+
const bucketHour = Math.floor(hour / BUCKET_HOURS) * BUCKET_HOURS;
|
|
24
|
+
const h = String(bucketHour).padStart(2, '0');
|
|
25
|
+
return `${y}-${m}-${d}-${h}`;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Get period start date (UTC) for expire_at calculation.
|
|
30
|
+
* @param {string} periodDocId - e.g. "2025-03-13-00"
|
|
31
|
+
* @returns {Date} Start of that period (UTC)
|
|
32
|
+
*/
|
|
33
|
+
function getPeriodStartFromDocId(periodDocId) {
|
|
34
|
+
const [y, m, d, h] = periodDocId.split('-').map(Number);
|
|
35
|
+
return new Date(Date.UTC(y, m - 1, d, h, 0, 0, 0));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Record that a source was written to (for the analytics loader). Fire-and-forget; logs errors only.
|
|
40
|
+
* Updates the current period's registry doc: merge source key + writtenAt into sources, set expire_at.
|
|
41
|
+
* @param {object} db - Firestore instance
|
|
42
|
+
* @param {string} sourceKey - e.g. 'watchlists', 'analytics_events_pi_page_view'
|
|
43
|
+
*/
|
|
44
|
+
async function recordAnalyticsWrite(db, sourceKey) {
|
|
45
|
+
const now = new Date();
|
|
46
|
+
const periodDocId = getPeriodDocId(now);
|
|
47
|
+
const periodStart = getPeriodStartFromDocId(periodDocId);
|
|
48
|
+
const expireAt = new Date(periodStart);
|
|
49
|
+
expireAt.setUTCDate(expireAt.getUTCDate() + TTL_DAYS);
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
const col = db.collection(REGISTRY_COLLECTION);
|
|
53
|
+
const ref = col.doc(periodDocId);
|
|
54
|
+
await ref.set(
|
|
55
|
+
{
|
|
56
|
+
[`sources.${sourceKey}`]: now,
|
|
57
|
+
expire_at: expireAt
|
|
58
|
+
},
|
|
59
|
+
{ merge: true }
|
|
60
|
+
);
|
|
61
|
+
} catch (error) {
|
|
62
|
+
console.error('[analytics_registry] recordAnalyticsWrite failed:', error.message);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* List period doc IDs that overlap the given time window [since, now].
|
|
68
|
+
* @param {Date} since - Start of window
|
|
69
|
+
* @param {Date} end - End of window (default now)
|
|
70
|
+
* @returns {string[]} Period doc IDs in range
|
|
71
|
+
*/
|
|
72
|
+
function getPeriodDocIdsInWindow(since, end = new Date()) {
|
|
73
|
+
const ids = [];
|
|
74
|
+
const cursor = new Date(since);
|
|
75
|
+
cursor.setUTCMinutes(0, 0, 0);
|
|
76
|
+
const hour = cursor.getUTCHours();
|
|
77
|
+
const bucketHour = Math.floor(hour / BUCKET_HOURS) * BUCKET_HOURS;
|
|
78
|
+
cursor.setUTCHours(bucketHour, 0, 0, 0);
|
|
79
|
+
|
|
80
|
+
while (cursor <= end) {
|
|
81
|
+
ids.push(getPeriodDocId(cursor));
|
|
82
|
+
cursor.setUTCHours(cursor.getUTCHours() + BUCKET_HOURS);
|
|
83
|
+
}
|
|
84
|
+
return [...new Set(ids)];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Read registry docs for the given period IDs and return the union of source keys that had writes.
|
|
89
|
+
* @param {object} db - Firestore instance
|
|
90
|
+
* @param {string[]} periodDocIds - Period document IDs to read
|
|
91
|
+
* @returns {Promise<Set<string>>} Set of source keys
|
|
92
|
+
*/
|
|
93
|
+
async function getSourceKeysFromRegistry(db, periodDocIds) {
|
|
94
|
+
const keys = new Set();
|
|
95
|
+
const col = db.collection(REGISTRY_COLLECTION);
|
|
96
|
+
await Promise.all(
|
|
97
|
+
periodDocIds.map(async (id) => {
|
|
98
|
+
const snap = await col.doc(id).get();
|
|
99
|
+
if (!snap.exists) return;
|
|
100
|
+
const data = snap.data();
|
|
101
|
+
const sources = data && data.sources && typeof data.sources === 'object' ? data.sources : {};
|
|
102
|
+
Object.keys(sources).forEach((k) => keys.add(k));
|
|
103
|
+
})
|
|
104
|
+
);
|
|
105
|
+
return keys;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
module.exports = {
|
|
109
|
+
recordAnalyticsWrite,
|
|
110
|
+
getPeriodDocId,
|
|
111
|
+
getPeriodDocIdsInWindow,
|
|
112
|
+
getSourceKeysFromRegistry,
|
|
113
|
+
REGISTRY_COLLECTION,
|
|
114
|
+
BUCKET_HOURS,
|
|
115
|
+
TTL_DAYS
|
|
116
|
+
};
|
package/functions/maintenance/analytics-load-firestore-to-bq/ANALYTICS_SYSTEM_DOCUMENTATION.md
CHANGED
|
@@ -82,11 +82,49 @@ Full schema definitions live in `functions/core/utils/bigquery_utils.js` (SCHEMA
|
|
|
82
82
|
| **Notification preferences** | `SignedInUsers/{cid}/settings/notifications` | Preference analytics | Firestore only; not in loader. |
|
|
83
83
|
| **User computation create/update** | `users/{userId}/computations/{id}` | Feature usage | Firestore only; not in loader. |
|
|
84
84
|
|
|
85
|
-
To add any of these: (1) ensure the action is written to Firestore (if not already), (2)
|
|
85
|
+
To add any of these: (1) ensure the action is written to Firestore (if not already), (2) call `recordAnalyticsWrite(db, sourceKey)` after the write, (3) add a source entry in `sourceConfig.js` and ensure the BQ table/schema exist in `bigquery_utils.js`, and (4) run the loader on the same schedule.
|
|
86
86
|
|
|
87
87
|
---
|
|
88
88
|
|
|
89
|
-
## 5.
|
|
89
|
+
## 5. Analytics load registry (what to sync)
|
|
90
|
+
|
|
91
|
+
The loader is **registry-driven**: the API records which analytics sources were written to in **time-bucketed** Firestore documents, and the loader reads those to decide which sources to sync for the run window. This avoids hardcoding paths in the loader and makes adding a new path a single config entry + API registry write.
|
|
92
|
+
|
|
93
|
+
### 5.1 Registry collection
|
|
94
|
+
|
|
95
|
+
- **Collection:** `analytics_load_registry` (override: `FIRESTORE_ANALYTICS_REGISTRY_COLLECTION`).
|
|
96
|
+
- **Document ID:** Time-period based so a **new document is created every few hours** (e.g. 2h), avoiding the 1MB-per-document limit. Format: `YYYY-MM-DD-HH` (e.g. `2025-03-13-00`, `2025-03-13-02`). Bucket length: **`REGISTRY_BUCKET_HOURS`** (default `2`).
|
|
97
|
+
- **Document fields:**
|
|
98
|
+
- `sources` – map of source key → Timestamp of last write in that period.
|
|
99
|
+
- **`expire_at`** – Timestamp set to **period start + 30 days** (configurable via **`REGISTRY_TTL_DAYS`**). Used by Firestore TTL to auto-delete old docs.
|
|
100
|
+
- **TTL policy:** A **Firestore TTL policy must be created** on the registry collection (Firestore console or IaC) that deletes documents when `expire_at` is in the past. Without it, documents will not auto-delete.
|
|
101
|
+
|
|
102
|
+
### 5.2 Source keys
|
|
103
|
+
|
|
104
|
+
Each analytics path is identified by a **source key**. The API calls `recordAnalyticsWrite(db, sourceKey)` after writing; the loader uses the same keys in `sourceConfig.js`.
|
|
105
|
+
|
|
106
|
+
| Source key | Firestore source | BQ table |
|
|
107
|
+
|------------|-------------------|----------|
|
|
108
|
+
| `watchlists` | Collection group `watchlists` | `watchlists` |
|
|
109
|
+
| `reviews` | Collection group `reviews` | `pi_reviews` |
|
|
110
|
+
| `analytics_events_pi_page_view` | `analytics_events` (eventType `pi_page_view`) | `pi_page_view_events` |
|
|
111
|
+
| `pi_master_list` | `system_state/popular_investor_master_list` | `pi_master_list` |
|
|
112
|
+
| `analytics_events_search_query` | `analytics_events` (eventType `search_query`) | `search_queries` |
|
|
113
|
+
| `analytics_events_watchlist_copied` | `analytics_events` (eventType `watchlist_copied`) | `watchlist_copied_events` |
|
|
114
|
+
| `alert_subscriptions` | Collection group `alert_subscriptions` | `alert_subscriptions` |
|
|
115
|
+
| `pi_addition_requests` | Root collection `pi_addition_requests` | `pi_addition_requests` |
|
|
116
|
+
|
|
117
|
+
### 5.3 How to add a new path
|
|
118
|
+
|
|
119
|
+
1. **API:** After writing to the new Firestore path, call `recordAnalyticsWrite(db, 'your_source_key')` (fire-and-forget). Helper: `core/utils/analytics_registry.js`.
|
|
120
|
+
2. **Loader config:** Add an entry in `maintenance/analytics-load-firestore-to-bq/sourceConfig.js`: `sourceKey`, `ensureTable`, `fetch`, `mapRows`, `bqTable`, `mergeKeys` (or append).
|
|
121
|
+
3. **BQ schema:** Ensure the table and schema exist in `core/utils/bigquery_utils.js` (SCHEMAS, ensure*Table if needed).
|
|
122
|
+
|
|
123
|
+
No change to the loader loop in `index.js` is required.
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## 6. Firestore → BigQuery mapping (load job)
|
|
90
128
|
|
|
91
129
|
| Firestore source | How the loader reads it | BigQuery dataset.table | Load method |
|
|
92
130
|
|------------------|--------------------------|-------------------------|-------------|
|
|
@@ -98,11 +136,13 @@ To add any of these: (1) ensure the action is written to Firestore (if not alrea
|
|
|
98
136
|
| `SignedInUsers/{cid}/alert_subscriptions` | Collection group `alert_subscriptions` | `bt_core.alert_subscriptions` | insertRowsWithMerge(key: user_id, pi_cid) |
|
|
99
137
|
| `pi_addition_requests` | Root collection; optional filter `requestedAt >= since` | `bt_core.pi_addition_requests` | insertRowsWithMerge(key: request_id) |
|
|
100
138
|
|
|
139
|
+
Loader behaviour: **Incremental run** – loader reads registry period docs that overlap the run window (e.g. last 1–2h), collects the union of source keys that had writes, then runs the config-driven sync only for those sources. **Full sync** – runs all configured sources (no registry read). **Fallback** – if no registry docs exist for the window, loader runs all sources for the window so cold start still populates BQ.
|
|
140
|
+
|
|
101
141
|
Loader defaults: **incremental window** = last 2 hours (`since`); **full sync** = no time filter. Dataset for all above tables: **`bt_core`** (override via `BQ_BT_CORE_DATASET_ID` or `BIGQUERY_DATASET_ID`).
|
|
102
142
|
|
|
103
143
|
---
|
|
104
144
|
|
|
105
|
-
##
|
|
145
|
+
## 7. Access control: own-user only
|
|
106
146
|
|
|
107
147
|
**Computation access:** When the computation system or downstream services read from `bt_core` analytics tables for a given user/owner, they must only see **that user's own** behavioural data. Users may not query other users' watchlists, reviews, search_queries, pi_page_view_events, or alert_subscriptions.
|
|
108
148
|
|
|
@@ -112,17 +152,19 @@ Loader defaults: **incremental window** = last 2 hours (`since`); **full sync**
|
|
|
112
152
|
|
|
113
153
|
---
|
|
114
154
|
|
|
115
|
-
##
|
|
155
|
+
## 8. Related files
|
|
116
156
|
|
|
117
|
-
- **Loader:** `functions/maintenance/analytics-load-firestore-to-bq/index.js`
|
|
157
|
+
- **Loader:** `functions/maintenance/analytics-load-firestore-to-bq/index.js` (registry-driven; reads `sourceConfig.js`).
|
|
158
|
+
- **Source config:** `functions/maintenance/analytics-load-firestore-to-bq/sourceConfig.js` (source key → Firestore query, row mapper, BQ table).
|
|
159
|
+
- **Registry helper:** `functions/core/utils/analytics_registry.js` (recordAnalyticsWrite, getPeriodDocIdsInWindow, getSourceKeysFromRegistry; time-bucketed docs + TTL).
|
|
118
160
|
- **Firestore event schema (short):** `functions/maintenance/analytics-load-firestore-to-bq/ANALYTICS_EVENTS_SCHEMA.md`
|
|
119
161
|
- **BQ schemas and ensure*:** `functions/core/utils/bigquery_utils.js` (SCHEMAS.watchlists, pi_reviews, pi_page_view_events, search_queries, alert_subscriptions, pi_addition_requests; getBtCoreDatasetId; ensureWatchlistsTable, etc.)
|
|
120
|
-
- **API writes (Firestore
|
|
162
|
+
- **API writes (Firestore + registry):** `api-v3/services/WriteService.js`, `api-v3/routes/popular-investors.js` (track-view, search logging, pi_addition_requests; each calls recordAnalyticsWrite with the appropriate source key).
|
|
121
163
|
- **Run/schedule:** `maintenance/analytics-load-firestore-to-bq/README.md` (CLI, HTTP, env, schedule)
|
|
122
164
|
|
|
123
165
|
---
|
|
124
166
|
|
|
125
|
-
##
|
|
167
|
+
## 9. Summary tables
|
|
126
168
|
|
|
127
169
|
**Stored in Firestore and loaded to BigQuery (monitored):**
|
|
128
170
|
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
Hourly job that reads user analytics from Firestore and bulk-loads into BigQuery (`bt_core`). Replaces API-time dual writes to reduce latency.
|
|
4
4
|
|
|
5
|
+
**Registry-driven:** The API records which sources were written to in time-bucketed Firestore docs (`analytics_load_registry`). The loader reads those docs to decide which sources to sync for the run window. Documents are named by period (e.g. `YYYY-MM-DD-HH` for 2h buckets) to avoid the 1MB limit; each doc has `expire_at` set to 30 days. **A Firestore TTL policy must be created** on the registry collection so documents auto-delete when `expire_at` is in the past.
|
|
6
|
+
|
|
5
7
|
## What it loads
|
|
6
8
|
|
|
7
9
|
- **Watchlists** – `SignedInUsers/{cid}/watchlists` → `bt_core.watchlists` (MERGE)
|
|
@@ -9,6 +11,7 @@ Hourly job that reads user analytics from Firestore and bulk-loads into BigQuery
|
|
|
9
11
|
- **PI page view events** – `analytics_events` (eventType `pi_page_view`) → `bt_core.pi_page_view_events` (append)
|
|
10
12
|
- **PI master list** – `system_state/popular_investor_master_list` → `bt_core.pi_master_list` (MERGE)
|
|
11
13
|
- **Search queries** – `analytics_events` (eventType `search_query`) → `bt_core.search_queries` (append)
|
|
14
|
+
- **Watchlist copied events** – `analytics_events` (eventType `watchlist_copied`) → `bt_core.watchlist_copied_events` (append)
|
|
12
15
|
- **Alert subscriptions** – collection group `alert_subscriptions` → `bt_core.alert_subscriptions` (MERGE)
|
|
13
16
|
- **PI addition requests** – `pi_addition_requests` → `bt_core.pi_addition_requests` (MERGE on request_id)
|
|
14
17
|
|
|
@@ -51,3 +54,6 @@ Then create a Cloud Scheduler job that POSTs to that URL hourly with no body (or
|
|
|
51
54
|
- `BQ_BT_CORE_DATASET_ID` or `BIGQUERY_DATASET_ID` – BigQuery dataset for `bt_core` (default `bt_core`)
|
|
52
55
|
- `FIRESTORE_PI_MASTER_LIST_COLLECTION` – default `system_state`
|
|
53
56
|
- `FIRESTORE_PI_MASTER_LIST_DOCUMENT` – default `popular_investor_master_list`
|
|
57
|
+
- **Registry:** `FIRESTORE_ANALYTICS_REGISTRY_COLLECTION` – default `analytics_load_registry`
|
|
58
|
+
- `REGISTRY_BUCKET_HOURS` – period length in hours for time-bucketed doc IDs (default `2`)
|
|
59
|
+
- `REGISTRY_TTL_DAYS` – days after period start before doc is eligible for TTL delete (default `30`). Create a **Firestore TTL policy** on the registry collection using the `expire_at` field so documents are auto-deleted.
|
|
@@ -7,6 +7,11 @@
|
|
|
7
7
|
* Trigger: HTTP (Cloud Scheduler) or onSchedule. Use window-based incremental load
|
|
8
8
|
* (e.g. last 2 hours) to avoid cursor state.
|
|
9
9
|
*
|
|
10
|
+
* Registry-driven: reads analytics_load_registry (time-bucketed docs) to discover which
|
|
11
|
+
* sources had writes in the run window, then runs config-driven sync for each. Full sync
|
|
12
|
+
* runs all configured sources. If no registry docs exist for the window, falls back to
|
|
13
|
+
* running all sources for the window.
|
|
14
|
+
*
|
|
10
15
|
* Usage:
|
|
11
16
|
* node index.js (run with default 2h window)
|
|
12
17
|
* node index.js --windowHours=1 (1 hour window)
|
|
@@ -16,14 +21,11 @@
|
|
|
16
21
|
const { Firestore } = require('@google-cloud/firestore');
|
|
17
22
|
const {
|
|
18
23
|
getBtCoreDatasetId,
|
|
19
|
-
ensureWatchlistsTable,
|
|
20
|
-
ensurePiReviewsTable,
|
|
21
|
-
ensurePiPageViewEventsTable,
|
|
22
|
-
ensureTableExists,
|
|
23
|
-
getSchema,
|
|
24
24
|
insertRows,
|
|
25
25
|
insertRowsWithMerge
|
|
26
26
|
} = require('../../core/utils/bigquery_utils');
|
|
27
|
+
const { getPeriodDocIdsInWindow, getSourceKeysFromRegistry } = require('../../core/utils/analytics_registry');
|
|
28
|
+
const { SOURCE_CONFIG_BY_KEY, ALL_SOURCE_KEYS } = require('./sourceConfig');
|
|
27
29
|
|
|
28
30
|
const db = new Firestore();
|
|
29
31
|
const DEFAULT_WINDOW_HOURS = 2;
|
|
@@ -32,7 +34,7 @@ const DEFAULT_WINDOW_HOURS = 2;
|
|
|
32
34
|
* @param {object} logger - { log(level, message) }
|
|
33
35
|
* @param {Date} since - Start of time window
|
|
34
36
|
* @param {boolean} fullSync - If true, ignore since and load all
|
|
35
|
-
* @returns {Promise<{ watchlists: number, pi_reviews: number, pi_page_view_events: number, pi_master_list: number }>}
|
|
37
|
+
* @returns {Promise<{ watchlists: number, pi_reviews: number, pi_page_view_events: number, pi_master_list: number, search_queries: number, watchlist_copied_events: number, alert_subscriptions: number, pi_addition_requests: number }>}
|
|
36
38
|
*/
|
|
37
39
|
async function runAnalyticsLoad(logger = console, since = null, fullSync = false) {
|
|
38
40
|
const log = (level, msg, ...args) => (typeof logger.log === 'function' ? logger.log(level, msg, ...args) : console.log(`[${level}] ${msg}`, ...args));
|
|
@@ -42,220 +44,58 @@ async function runAnalyticsLoad(logger = console, since = null, fullSync = false
|
|
|
42
44
|
const stats = { watchlists: 0, pi_reviews: 0, pi_page_view_events: 0, pi_master_list: 0, search_queries: 0, watchlist_copied_events: 0, alert_subscriptions: 0, pi_addition_requests: 0 };
|
|
43
45
|
|
|
44
46
|
try {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
user_id: userId,
|
|
59
|
-
watchlist_id: doc.id,
|
|
60
|
-
name: data.name ?? null,
|
|
61
|
-
type: data.type ?? null,
|
|
62
|
-
visibility: data.visibility ?? null,
|
|
63
|
-
items: typeof data.items === 'string' ? data.items : JSON.stringify(data.items || []),
|
|
64
|
-
item_config: data.itemConfig != null ? JSON.stringify(data.itemConfig) : null,
|
|
65
|
-
dynamic_config: data.dynamicConfig != null ? JSON.stringify(data.dynamicConfig) : null,
|
|
66
|
-
updated_at: updatedAt.toISOString().replace(/\.\d{3}Z$/, 'Z'),
|
|
67
|
-
deleted: false,
|
|
68
|
-
deleted_at: null
|
|
69
|
-
});
|
|
70
|
-
}
|
|
71
|
-
if (watchlistRows.length > 0) {
|
|
72
|
-
await insertRowsWithMerge(datasetId, 'watchlists', watchlistRows, ['user_id', 'watchlist_id'], logger);
|
|
73
|
-
stats.watchlists = watchlistRows.length;
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
// --- Reviews (collection group; MERGE on user_cid + pi_cid) ---
|
|
78
|
-
await ensurePiReviewsTable(logger, datasetId);
|
|
79
|
-
const reviewsSnap = fullSync
|
|
80
|
-
? await db.collectionGroup('reviews').get()
|
|
81
|
-
: await db.collectionGroup('reviews').where('createdAt', '>=', sinceTime).get();
|
|
82
|
-
|
|
83
|
-
if (!reviewsSnap.empty) {
|
|
84
|
-
const reviewRows = [];
|
|
85
|
-
const seen = new Set();
|
|
86
|
-
for (const doc of reviewsSnap.docs) {
|
|
87
|
-
const data = doc.data();
|
|
88
|
-
const userCid = String(data.userCid ?? doc.ref.parent.parent.id);
|
|
89
|
-
const piCid = String(data.piCid ?? doc.id);
|
|
90
|
-
const key = `${userCid}_${piCid}`;
|
|
91
|
-
if (seen.has(key)) continue;
|
|
92
|
-
seen.add(key);
|
|
93
|
-
const createdAt = data.createdAt ? (data.createdAt.toDate ? data.createdAt.toDate() : data.createdAt) : new Date();
|
|
94
|
-
reviewRows.push({
|
|
95
|
-
user_cid: userCid,
|
|
96
|
-
pi_cid: piCid,
|
|
97
|
-
rating: data.rating ?? 0,
|
|
98
|
-
comment: data.comment ?? '',
|
|
99
|
-
is_anonymous: data.isAnonymous === true,
|
|
100
|
-
created_at: createdAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
101
|
-
});
|
|
102
|
-
}
|
|
103
|
-
if (reviewRows.length > 0) {
|
|
104
|
-
await insertRowsWithMerge(datasetId, 'pi_reviews', reviewRows, ['user_cid', 'pi_cid'], logger);
|
|
105
|
-
stats.pi_reviews = reviewRows.length;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// --- PI page view events (analytics_events, append) ---
|
|
110
|
-
await ensurePiPageViewEventsTable(logger, datasetId);
|
|
111
|
-
const eventsRef = db.collection('analytics_events');
|
|
112
|
-
const pageViewQuery = fullSync
|
|
113
|
-
? eventsRef.where('eventType', '==', 'pi_page_view')
|
|
114
|
-
: eventsRef.where('eventType', '==', 'pi_page_view').where('createdAt', '>=', sinceTime);
|
|
115
|
-
const pageViewSnap = await pageViewQuery.get();
|
|
116
|
-
|
|
117
|
-
if (!pageViewSnap.empty) {
|
|
118
|
-
const eventRows = [];
|
|
119
|
-
for (const doc of pageViewSnap.docs) {
|
|
120
|
-
const d = doc.data();
|
|
121
|
-
const viewedAt = d.viewedAt ? (d.viewedAt.toDate ? d.viewedAt.toDate() : d.viewedAt) : new Date();
|
|
122
|
-
const dateStr = viewedAt.toISOString().split('T')[0];
|
|
123
|
-
eventRows.push({
|
|
124
|
-
date: dateStr,
|
|
125
|
-
pi_id: String(d.piId ?? ''),
|
|
126
|
-
viewer_cid: String(d.viewerCid ?? ''),
|
|
127
|
-
viewed_at: viewedAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
128
|
-
});
|
|
129
|
-
}
|
|
130
|
-
if (eventRows.length > 0) {
|
|
131
|
-
await insertRows(datasetId, 'pi_page_view_events', eventRows, logger);
|
|
132
|
-
stats.pi_page_view_events = eventRows.length;
|
|
47
|
+
let sourceKeysToSync;
|
|
48
|
+
if (fullSync) {
|
|
49
|
+
sourceKeysToSync = new Set(ALL_SOURCE_KEYS);
|
|
50
|
+
log('INFO', '[AnalyticsLoad] Full sync: running all configured sources');
|
|
51
|
+
} else {
|
|
52
|
+
const periodIds = getPeriodDocIdsInWindow(sinceTime, new Date());
|
|
53
|
+
const keysFromRegistry = await getSourceKeysFromRegistry(db, periodIds);
|
|
54
|
+
if (keysFromRegistry.size === 0) {
|
|
55
|
+
sourceKeysToSync = new Set(ALL_SOURCE_KEYS);
|
|
56
|
+
log('INFO', '[AnalyticsLoad] No registry docs for window; running all sources for window');
|
|
57
|
+
} else {
|
|
58
|
+
sourceKeysToSync = keysFromRegistry;
|
|
59
|
+
log('INFO', '[AnalyticsLoad] Registry sources for window:', [...sourceKeysToSync].join(', '));
|
|
133
60
|
}
|
|
134
61
|
}
|
|
135
62
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
const investors = masterDoc.data().investors || {};
|
|
142
|
-
const masterRows = [];
|
|
143
|
-
for (const [cid, info] of Object.entries(investors)) {
|
|
144
|
-
if (!info || typeof info !== 'object') continue;
|
|
145
|
-
const firstSeen = info.firstSeenAt?.toDate ? info.firstSeenAt.toDate() : info.firstSeenAt;
|
|
146
|
-
const lastSeen = info.lastSeenAt?.toDate ? info.lastSeenAt.toDate() : info.lastSeenAt;
|
|
147
|
-
const t = (v) => (v && v.toISOString ? v.toISOString().replace(/\.\d{3}Z$/, '') : (v ? String(v) : new Date().toISOString().replace(/\.\d{3}Z$/, '')));
|
|
148
|
-
masterRows.push({
|
|
149
|
-
cid: parseInt(String(cid), 10) || 0,
|
|
150
|
-
username: info.username || '',
|
|
151
|
-
first_seen_at: t(firstSeen),
|
|
152
|
-
last_seen_at: t(lastSeen),
|
|
153
|
-
last_updated: t(lastSeen)
|
|
154
|
-
});
|
|
63
|
+
for (const sourceKey of sourceKeysToSync) {
|
|
64
|
+
const config = SOURCE_CONFIG_BY_KEY[sourceKey];
|
|
65
|
+
if (!config) {
|
|
66
|
+
log('WARN', `[AnalyticsLoad] No config for sourceKey ${sourceKey}, skipping`);
|
|
67
|
+
continue;
|
|
155
68
|
}
|
|
156
|
-
if (masterRows.length > 0) {
|
|
157
|
-
await ensureTableExists(datasetId, 'pi_master_list', getSchema('pi_master_list'), {}, logger);
|
|
158
|
-
await insertRowsWithMerge(datasetId, 'pi_master_list', masterRows, ['cid'], logger);
|
|
159
|
-
stats.pi_master_list = masterRows.length;
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// --- Search queries (analytics_events, append) ---
|
|
164
|
-
const searchQuery = fullSync
|
|
165
|
-
? db.collection('analytics_events').where('eventType', '==', 'search_query')
|
|
166
|
-
: db.collection('analytics_events').where('eventType', '==', 'search_query').where('createdAt', '>=', sinceTime);
|
|
167
|
-
const searchSnap = await searchQuery.get();
|
|
168
|
-
if (!searchSnap.empty) {
|
|
169
|
-
const { ensureSearchQueriesTable } = require('../../core/utils/bigquery_utils');
|
|
170
|
-
await ensureSearchQueriesTable(logger, datasetId);
|
|
171
|
-
const searchRows = searchSnap.docs.map((doc) => {
|
|
172
|
-
const d = doc.data();
|
|
173
|
-
const createdAt = d.createdAt ? (d.createdAt.toDate ? d.createdAt.toDate() : d.createdAt) : new Date();
|
|
174
|
-
return {
|
|
175
|
-
user_id: d.userId ?? null,
|
|
176
|
-
query: d.query ?? '',
|
|
177
|
-
result_count: d.resultCount != null ? d.resultCount : null,
|
|
178
|
-
created_at: createdAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
179
|
-
};
|
|
180
|
-
});
|
|
181
|
-
if (searchRows.length > 0) {
|
|
182
|
-
await insertRows(datasetId, 'search_queries', searchRows, logger);
|
|
183
|
-
stats.search_queries = searchRows.length;
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
// --- Watchlist copied events (analytics_events, append) ---
|
|
188
|
-
const copyEventsRef = db.collection('analytics_events');
|
|
189
|
-
const copyQuery = fullSync
|
|
190
|
-
? copyEventsRef.where('eventType', '==', 'watchlist_copied')
|
|
191
|
-
: copyEventsRef.where('eventType', '==', 'watchlist_copied').where('createdAt', '>=', sinceTime);
|
|
192
|
-
const copySnap = await copyQuery.get();
|
|
193
|
-
if (!copySnap.empty) {
|
|
194
|
-
const { ensureWatchlistCopiedEventsTable } = require('../../core/utils/bigquery_utils');
|
|
195
|
-
await ensureWatchlistCopiedEventsTable(logger, datasetId);
|
|
196
|
-
const copyRows = copySnap.docs.map((doc) => {
|
|
197
|
-
const d = doc.data();
|
|
198
|
-
const createdAt = d.createdAt ? (d.createdAt.toDate ? d.createdAt.toDate() : d.createdAt) : new Date();
|
|
199
|
-
return {
|
|
200
|
-
user_id: d.user_id ?? '',
|
|
201
|
-
source_user_id: d.source_user_id ?? '',
|
|
202
|
-
source_watchlist_id: d.source_watchlist_id ?? '',
|
|
203
|
-
new_watchlist_id: d.new_watchlist_id ?? '',
|
|
204
|
-
created_at: createdAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
205
|
-
};
|
|
206
|
-
});
|
|
207
|
-
if (copyRows.length > 0) {
|
|
208
|
-
await insertRows(datasetId, 'watchlist_copied_events', copyRows, logger);
|
|
209
|
-
stats.watchlist_copied_events = copyRows.length;
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
// --- Alert subscriptions (collection group, MERGE) ---
|
|
214
|
-
const subsSnap = fullSync
|
|
215
|
-
? await db.collectionGroup('alert_subscriptions').get()
|
|
216
|
-
: await db.collectionGroup('alert_subscriptions').get(); // no updatedAt filter on all; for incremental we could add updatedAt
|
|
217
|
-
if (!subsSnap.empty) {
|
|
218
|
-
const { ensureAlertSubscriptionsTable } = require('../../core/utils/bigquery_utils');
|
|
219
|
-
await ensureAlertSubscriptionsTable(logger, datasetId);
|
|
220
|
-
const subRows = [];
|
|
221
|
-
for (const doc of subsSnap.docs) {
|
|
222
|
-
const userId = doc.ref.parent.parent.id;
|
|
223
|
-
const data = doc.data();
|
|
224
|
-
const updatedAt = data.updatedAt ? (data.updatedAt.toDate ? data.updatedAt.toDate() : data.updatedAt) : new Date();
|
|
225
|
-
subRows.push({
|
|
226
|
-
user_id: userId,
|
|
227
|
-
pi_cid: doc.id,
|
|
228
|
-
settings: JSON.stringify(data),
|
|
229
|
-
updated_at: updatedAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
230
|
-
});
|
|
231
|
-
}
|
|
232
|
-
if (subRows.length > 0) {
|
|
233
|
-
await insertRowsWithMerge(datasetId, 'alert_subscriptions', subRows, ['user_id', 'pi_cid'], logger);
|
|
234
|
-
stats.alert_subscriptions = subRows.length;
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
69
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
70
|
+
try {
|
|
71
|
+
await config.ensureTable(logger, datasetId);
|
|
72
|
+
|
|
73
|
+
let rows = [];
|
|
74
|
+
if (config.singleDoc) {
|
|
75
|
+
const doc = await config.fetch(db);
|
|
76
|
+
rows = config.mapRows(doc);
|
|
77
|
+
} else {
|
|
78
|
+
const snapshot = await config.fetch(db, sinceTime, fullSync);
|
|
79
|
+
rows = config.mapRows(snapshot.docs || []);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (rows.length === 0) continue;
|
|
83
|
+
|
|
84
|
+
if (config.mergeKeys) {
|
|
85
|
+
await insertRowsWithMerge(datasetId, config.bqTable, rows, config.mergeKeys, logger);
|
|
86
|
+
} else {
|
|
87
|
+
await insertRows(datasetId, config.bqTable, rows, logger);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const prev = stats[config.bqTable];
|
|
91
|
+
if (typeof prev === 'number') {
|
|
92
|
+
stats[config.bqTable] = prev + rows.length;
|
|
93
|
+
} else {
|
|
94
|
+
stats[config.bqTable] = rows.length;
|
|
95
|
+
}
|
|
96
|
+
} catch (err) {
|
|
97
|
+
log('ERROR', `[AnalyticsLoad] ${sourceKey} failed: ${err.message}`);
|
|
98
|
+
throw err;
|
|
259
99
|
}
|
|
260
100
|
}
|
|
261
101
|
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Config-driven source definitions for analytics load: Firestore → BigQuery.
|
|
3
|
+
* Each source key maps to Firestore query, row mapper, BQ table, and merge/append behavior.
|
|
4
|
+
* Add a new path: add an entry here and call recordAnalyticsWrite(db, sourceKey) from the API.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const {
|
|
8
|
+
ensureWatchlistsTable,
|
|
9
|
+
ensurePiReviewsTable,
|
|
10
|
+
ensurePiPageViewEventsTable,
|
|
11
|
+
ensureSearchQueriesTable,
|
|
12
|
+
ensureWatchlistCopiedEventsTable,
|
|
13
|
+
ensureAlertSubscriptionsTable,
|
|
14
|
+
ensurePiAdditionRequestsTable,
|
|
15
|
+
ensureTableExists,
|
|
16
|
+
getSchema
|
|
17
|
+
} = require('../../core/utils/bigquery_utils');
|
|
18
|
+
|
|
19
|
+
const PI_MASTER_LIST_COLLECTION = process.env.FIRESTORE_PI_MASTER_LIST_COLLECTION || 'system_state';
|
|
20
|
+
const PI_MASTER_LIST_DOCUMENT = process.env.FIRESTORE_PI_MASTER_LIST_DOCUMENT || 'popular_investor_master_list';
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @typedef {Object} SourceConfig
|
|
24
|
+
* @property {string} sourceKey - Registry source key (e.g. 'watchlists')
|
|
25
|
+
* @property {function(object, string): Promise<void>} ensureTable - (logger, datasetId) => ensure BQ table exists
|
|
26
|
+
* @property {function(object, Date, boolean): Promise<FirebaseFirestore.QuerySnapshot>} fetch - (db, since, fullSync) => snapshot
|
|
27
|
+
* @property {function(FirebaseFirestore.QueryDocumentSnapshot[]): Array<Object>} mapRows - (docs) => BQ rows
|
|
28
|
+
* @property {string} bqTable - BigQuery table name
|
|
29
|
+
* @property {string[]|null} mergeKeys - Key fields for MERGE; null = append only
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* All configured analytics sources. Keys must match sourceKey used in recordAnalyticsWrite().
|
|
34
|
+
* @type {SourceConfig[]}
|
|
35
|
+
*/
|
|
36
|
+
const SOURCE_CONFIGS = [
|
|
37
|
+
{
|
|
38
|
+
sourceKey: 'watchlists',
|
|
39
|
+
ensureTable: (logger, datasetId) => ensureWatchlistsTable(logger, datasetId),
|
|
40
|
+
fetch: async (db, since, fullSync) => {
|
|
41
|
+
const q = fullSync
|
|
42
|
+
? db.collectionGroup('watchlists')
|
|
43
|
+
: db.collectionGroup('watchlists').where('updatedAt', '>=', since);
|
|
44
|
+
return q.get();
|
|
45
|
+
},
|
|
46
|
+
mapRows: (docs) => {
|
|
47
|
+
return docs.map((doc) => {
|
|
48
|
+
const userId = doc.ref.parent.parent.id;
|
|
49
|
+
const data = doc.data();
|
|
50
|
+
const updatedAt = data.updatedAt ? (data.updatedAt.toDate ? data.updatedAt.toDate() : data.updatedAt) : new Date();
|
|
51
|
+
return {
|
|
52
|
+
user_id: userId,
|
|
53
|
+
watchlist_id: doc.id,
|
|
54
|
+
name: data.name ?? null,
|
|
55
|
+
type: data.type ?? null,
|
|
56
|
+
visibility: data.visibility ?? null,
|
|
57
|
+
items: typeof data.items === 'string' ? data.items : JSON.stringify(data.items || []),
|
|
58
|
+
item_config: data.itemConfig != null ? JSON.stringify(data.itemConfig) : null,
|
|
59
|
+
dynamic_config: data.dynamicConfig != null ? JSON.stringify(data.dynamicConfig) : null,
|
|
60
|
+
updated_at: updatedAt.toISOString().replace(/\.\d{3}Z$/, 'Z'),
|
|
61
|
+
deleted: false,
|
|
62
|
+
deleted_at: null
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
},
|
|
66
|
+
bqTable: 'watchlists',
|
|
67
|
+
mergeKeys: ['user_id', 'watchlist_id']
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
sourceKey: 'reviews',
|
|
71
|
+
ensureTable: (logger, datasetId) => ensurePiReviewsTable(logger, datasetId),
|
|
72
|
+
fetch: async (db, since, fullSync) => {
|
|
73
|
+
const q = fullSync
|
|
74
|
+
? db.collectionGroup('reviews')
|
|
75
|
+
: db.collectionGroup('reviews').where('createdAt', '>=', since);
|
|
76
|
+
return q.get();
|
|
77
|
+
},
|
|
78
|
+
mapRows: (docs) => {
|
|
79
|
+
const rows = [];
|
|
80
|
+
const seen = new Set();
|
|
81
|
+
for (const doc of docs) {
|
|
82
|
+
const data = doc.data();
|
|
83
|
+
const userCid = String(data.userCid ?? doc.ref.parent.parent.id);
|
|
84
|
+
const piCid = String(data.piCid ?? doc.id);
|
|
85
|
+
const key = `${userCid}_${piCid}`;
|
|
86
|
+
if (seen.has(key)) continue;
|
|
87
|
+
seen.add(key);
|
|
88
|
+
const createdAt = data.createdAt ? (data.createdAt.toDate ? data.createdAt.toDate() : data.createdAt) : new Date();
|
|
89
|
+
rows.push({
|
|
90
|
+
user_cid: userCid,
|
|
91
|
+
pi_cid: piCid,
|
|
92
|
+
rating: data.rating ?? 0,
|
|
93
|
+
comment: data.comment ?? '',
|
|
94
|
+
is_anonymous: data.isAnonymous === true,
|
|
95
|
+
created_at: createdAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
return rows;
|
|
99
|
+
},
|
|
100
|
+
bqTable: 'pi_reviews',
|
|
101
|
+
mergeKeys: ['user_cid', 'pi_cid']
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
sourceKey: 'analytics_events_pi_page_view',
|
|
105
|
+
ensureTable: (logger, datasetId) => ensurePiPageViewEventsTable(logger, datasetId),
|
|
106
|
+
fetch: async (db, since, fullSync) => {
|
|
107
|
+
const ref = db.collection('analytics_events');
|
|
108
|
+
const q = fullSync
|
|
109
|
+
? ref.where('eventType', '==', 'pi_page_view')
|
|
110
|
+
: ref.where('eventType', '==', 'pi_page_view').where('createdAt', '>=', since);
|
|
111
|
+
return q.get();
|
|
112
|
+
},
|
|
113
|
+
mapRows: (docs) => {
|
|
114
|
+
return docs.map((doc) => {
|
|
115
|
+
const d = doc.data();
|
|
116
|
+
const viewedAt = d.viewedAt ? (d.viewedAt.toDate ? d.viewedAt.toDate() : d.viewedAt) : new Date();
|
|
117
|
+
const dateStr = viewedAt.toISOString().split('T')[0];
|
|
118
|
+
return {
|
|
119
|
+
date: dateStr,
|
|
120
|
+
pi_id: String(d.piId ?? ''),
|
|
121
|
+
viewer_cid: String(d.viewerCid ?? ''),
|
|
122
|
+
viewed_at: viewedAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
123
|
+
};
|
|
124
|
+
});
|
|
125
|
+
},
|
|
126
|
+
bqTable: 'pi_page_view_events',
|
|
127
|
+
mergeKeys: null
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
sourceKey: 'pi_master_list',
|
|
131
|
+
ensureTable: (logger, datasetId) => ensureTableExists(datasetId, 'pi_master_list', getSchema('pi_master_list'), {}, logger),
|
|
132
|
+
fetch: async (db) => {
|
|
133
|
+
return db.collection(PI_MASTER_LIST_COLLECTION).doc(PI_MASTER_LIST_DOCUMENT).get();
|
|
134
|
+
},
|
|
135
|
+
mapRows: (docOrSnapshot) => {
|
|
136
|
+
const doc = docOrSnapshot && docOrSnapshot.exists ? docOrSnapshot : (Array.isArray(docOrSnapshot) ? docOrSnapshot[0] : null);
|
|
137
|
+
if (!doc || !doc.exists) return [];
|
|
138
|
+
const investors = doc.data().investors || {};
|
|
139
|
+
const rows = [];
|
|
140
|
+
for (const [cid, info] of Object.entries(investors)) {
|
|
141
|
+
if (!info || typeof info !== 'object') continue;
|
|
142
|
+
const firstSeen = info.firstSeenAt?.toDate ? info.firstSeenAt.toDate() : info.firstSeenAt;
|
|
143
|
+
const lastSeen = info.lastSeenAt?.toDate ? info.lastSeenAt.toDate() : info.lastSeenAt;
|
|
144
|
+
const t = (v) => (v && v.toISOString ? v.toISOString().replace(/\.\d{3}Z$/, '') : (v ? String(v) : new Date().toISOString().replace(/\.\d{3}Z$/, '')));
|
|
145
|
+
rows.push({
|
|
146
|
+
cid: parseInt(String(cid), 10) || 0,
|
|
147
|
+
username: info.username || '',
|
|
148
|
+
first_seen_at: t(firstSeen),
|
|
149
|
+
last_seen_at: t(lastSeen),
|
|
150
|
+
last_updated: t(lastSeen)
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
return rows;
|
|
154
|
+
},
|
|
155
|
+
bqTable: 'pi_master_list',
|
|
156
|
+
mergeKeys: ['cid'],
|
|
157
|
+
singleDoc: true
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
sourceKey: 'analytics_events_search_query',
|
|
161
|
+
ensureTable: (logger, datasetId) => ensureSearchQueriesTable(logger, datasetId),
|
|
162
|
+
fetch: async (db, since, fullSync) => {
|
|
163
|
+
const ref = db.collection('analytics_events');
|
|
164
|
+
const q = fullSync
|
|
165
|
+
? ref.where('eventType', '==', 'search_query')
|
|
166
|
+
: ref.where('eventType', '==', 'search_query').where('createdAt', '>=', since);
|
|
167
|
+
return q.get();
|
|
168
|
+
},
|
|
169
|
+
mapRows: (docs) => {
|
|
170
|
+
return docs.map((doc) => {
|
|
171
|
+
const d = doc.data();
|
|
172
|
+
const createdAt = d.createdAt ? (d.createdAt.toDate ? d.createdAt.toDate() : d.createdAt) : new Date();
|
|
173
|
+
return {
|
|
174
|
+
user_id: d.userId ?? null,
|
|
175
|
+
query: d.query ?? '',
|
|
176
|
+
result_count: d.resultCount != null ? d.resultCount : null,
|
|
177
|
+
created_at: createdAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
178
|
+
};
|
|
179
|
+
});
|
|
180
|
+
},
|
|
181
|
+
bqTable: 'search_queries',
|
|
182
|
+
mergeKeys: null
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
sourceKey: 'analytics_events_watchlist_copied',
|
|
186
|
+
ensureTable: (logger, datasetId) => ensureWatchlistCopiedEventsTable(logger, datasetId),
|
|
187
|
+
fetch: async (db, since, fullSync) => {
|
|
188
|
+
const ref = db.collection('analytics_events');
|
|
189
|
+
const q = fullSync
|
|
190
|
+
? ref.where('eventType', '==', 'watchlist_copied')
|
|
191
|
+
: ref.where('eventType', '==', 'watchlist_copied').where('createdAt', '>=', since);
|
|
192
|
+
return q.get();
|
|
193
|
+
},
|
|
194
|
+
mapRows: (docs) => {
|
|
195
|
+
return docs.map((doc) => {
|
|
196
|
+
const d = doc.data();
|
|
197
|
+
const createdAt = d.createdAt ? (d.createdAt.toDate ? d.createdAt.toDate() : d.createdAt) : new Date();
|
|
198
|
+
return {
|
|
199
|
+
user_id: d.user_id ?? '',
|
|
200
|
+
source_user_id: d.source_user_id ?? '',
|
|
201
|
+
source_watchlist_id: d.source_watchlist_id ?? '',
|
|
202
|
+
new_watchlist_id: d.new_watchlist_id ?? '',
|
|
203
|
+
created_at: createdAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
204
|
+
};
|
|
205
|
+
});
|
|
206
|
+
},
|
|
207
|
+
bqTable: 'watchlist_copied_events',
|
|
208
|
+
mergeKeys: null
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
sourceKey: 'alert_subscriptions',
|
|
212
|
+
ensureTable: (logger, datasetId) => ensureAlertSubscriptionsTable(logger, datasetId),
|
|
213
|
+
fetch: async (db, _since, fullSync) => {
|
|
214
|
+
return db.collectionGroup('alert_subscriptions').get();
|
|
215
|
+
},
|
|
216
|
+
mapRows: (docs) => {
|
|
217
|
+
return docs.map((doc) => {
|
|
218
|
+
const userId = doc.ref.parent.parent.id;
|
|
219
|
+
const data = doc.data();
|
|
220
|
+
const updatedAt = data.updatedAt ? (data.updatedAt.toDate ? data.updatedAt.toDate() : data.updatedAt) : new Date();
|
|
221
|
+
return {
|
|
222
|
+
user_id: userId,
|
|
223
|
+
pi_cid: doc.id,
|
|
224
|
+
settings: JSON.stringify(data),
|
|
225
|
+
updated_at: updatedAt.toISOString().replace(/\.\d{3}Z$/, 'Z')
|
|
226
|
+
};
|
|
227
|
+
});
|
|
228
|
+
},
|
|
229
|
+
bqTable: 'alert_subscriptions',
|
|
230
|
+
mergeKeys: ['user_id', 'pi_cid']
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
sourceKey: 'pi_addition_requests',
|
|
234
|
+
ensureTable: (logger, datasetId) => ensurePiAdditionRequestsTable(logger, datasetId),
|
|
235
|
+
fetch: async (db, since, fullSync) => {
|
|
236
|
+
const ref = db.collection('pi_addition_requests');
|
|
237
|
+
const q = fullSync ? ref : ref.where('requestedAt', '>=', since);
|
|
238
|
+
return q.get();
|
|
239
|
+
},
|
|
240
|
+
mapRows: (docs) => {
|
|
241
|
+
return docs.map((doc) => {
|
|
242
|
+
const d = doc.data();
|
|
243
|
+
const requestedAt = d.requestedAt ? (d.requestedAt.toDate ? d.requestedAt.toDate() : d.requestedAt) : new Date();
|
|
244
|
+
return {
|
|
245
|
+
request_id: doc.id,
|
|
246
|
+
username: d.username ?? null,
|
|
247
|
+
requested_by: d.requestedBy ?? null,
|
|
248
|
+
requested_at: requestedAt.toISOString().replace(/\.\d{3}Z$/, 'Z'),
|
|
249
|
+
status: d.status ?? null
|
|
250
|
+
};
|
|
251
|
+
});
|
|
252
|
+
},
|
|
253
|
+
bqTable: 'pi_addition_requests',
|
|
254
|
+
mergeKeys: ['request_id']
|
|
255
|
+
}
|
|
256
|
+
];
|
|
257
|
+
|
|
258
|
+
/** Map sourceKey → config for quick lookup */
|
|
259
|
+
const SOURCE_CONFIG_BY_KEY = Object.fromEntries(SOURCE_CONFIGS.map((c) => [c.sourceKey, c]));
|
|
260
|
+
|
|
261
|
+
/** All source keys that have a config (for full sync fallback) */
|
|
262
|
+
const ALL_SOURCE_KEYS = SOURCE_CONFIGS.map((c) => c.sourceKey);
|
|
263
|
+
|
|
264
|
+
module.exports = {
|
|
265
|
+
SOURCE_CONFIGS,
|
|
266
|
+
SOURCE_CONFIG_BY_KEY,
|
|
267
|
+
ALL_SOURCE_KEYS
|
|
268
|
+
};
|