@gonzih/of-scraper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env node
2
+ import { connectRedis, xadd, isSeen, markSeen } from './redis';
3
+ import { launchBrowser, ensureLoggedIn, takeErrorScreenshot } from './browser';
4
+ import { scrapeNewMessages } from './messages';
5
+ import { scrapeProfile } from './profiles';
6
+ import { scrapeFinancials } from './financials';
7
+ import type { Browser, Page } from 'puppeteer';
8
+
9
+ const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS ?? '30000', 10);
10
+ const RATE_LIMIT_MS = 2000;
11
+
12
+ async function sleep(ms: number): Promise<void> {
13
+ return new Promise((resolve) => setTimeout(resolve, ms));
14
+ }
15
+
16
+ async function processMessage(
17
+ page: Page,
18
+ messageId: string,
19
+ fromUsername: string,
20
+ text: string,
21
+ timestamp: string,
22
+ threadId: string
23
+ ): Promise<void> {
24
+ const ts = new Date().toISOString();
25
+ console.log(`[${ts}] Processing message ${messageId} from @${fromUsername}`);
26
+
27
+ await xadd('of:messages', {
28
+ messageId,
29
+ fromUsername,
30
+ text,
31
+ timestamp,
32
+ threadId,
33
+ });
34
+
35
+ await sleep(RATE_LIMIT_MS);
36
+
37
+ const profile = await scrapeProfile(page, fromUsername);
38
+ if (profile) {
39
+ await xadd('of:profiles', {
40
+ username: profile.username,
41
+ displayName: profile.displayName,
42
+ isSubscribed: String(profile.isSubscribed),
43
+ profilePic: profile.profilePic,
44
+ bio: profile.bio,
45
+ fetchedAt: profile.fetchedAt,
46
+ });
47
+ }
48
+
49
+ await sleep(RATE_LIMIT_MS);
50
+
51
+ const financials = await scrapeFinancials(page, fromUsername, threadId);
52
+ if (financials) {
53
+ await xadd('of:financials', {
54
+ username: financials.username,
55
+ totalTips: financials.totalTips,
56
+ totalPurchases: financials.totalPurchases,
57
+ subscriptionStatus: financials.subscriptionStatus,
58
+ lastPaymentDate: financials.lastPaymentDate,
59
+ lifetimeValue: financials.lifetimeValue,
60
+ updatedAt: financials.updatedAt,
61
+ });
62
+ }
63
+ }
64
+
65
+ async function pollLoop(browser: Browser, page: Page): Promise<void> {
66
+ while (true) {
67
+ const loopStart = new Date().toISOString();
68
+ console.log(`[${loopStart}] Starting poll cycle`);
69
+
70
+ try {
71
+ const messages = await scrapeNewMessages(page);
72
+
73
+ for (const msg of messages) {
74
+ const seen = await isSeen(msg.messageId);
75
+ if (seen) {
76
+ continue;
77
+ }
78
+
79
+ await markSeen(msg.messageId);
80
+ await processMessage(
81
+ page,
82
+ msg.messageId,
83
+ msg.fromUsername,
84
+ msg.text,
85
+ msg.timestamp,
86
+ msg.threadId
87
+ );
88
+ }
89
+ } catch (err) {
90
+ console.error(`[${new Date().toISOString()}] Poll cycle error:`, err);
91
+ try {
92
+ await takeErrorScreenshot(page);
93
+ } catch {
94
+ // ignore screenshot errors
95
+ }
96
+ }
97
+
98
+ console.log(`[${new Date().toISOString()}] Poll cycle complete — sleeping ${POLL_INTERVAL_MS}ms`);
99
+ await sleep(POLL_INTERVAL_MS);
100
+ }
101
+ }
102
+
103
+ async function main(): Promise<void> {
104
+ console.log(`[${new Date().toISOString()}] of-scraper starting up`);
105
+ console.log(`[${new Date().toISOString()}] POLL_INTERVAL_MS=${POLL_INTERVAL_MS}`);
106
+
107
+ await connectRedis();
108
+
109
+ let browser: Browser | null = null;
110
+ let page: Page | null = null;
111
+
112
+ try {
113
+ browser = await launchBrowser();
114
+ page = await ensureLoggedIn(browser);
115
+ } catch (err) {
116
+ console.error(`[${new Date().toISOString()}] Failed to launch browser or log in:`, err);
117
+ if (browser) await browser.close().catch(() => {});
118
+ process.exit(1);
119
+ }
120
+
121
+ const shutdown = async (): Promise<void> => {
122
+ console.log(`[${new Date().toISOString()}] Shutting down...`);
123
+ if (browser) await browser.close().catch(() => {});
124
+ process.exit(0);
125
+ };
126
+
127
+ process.on('SIGINT', () => { shutdown().catch(console.error); });
128
+ process.on('SIGTERM', () => { shutdown().catch(console.error); });
129
+
130
+ await pollLoop(browser, page);
131
+ }
132
+
133
+ main().catch((err) => {
134
+ console.error(`[${new Date().toISOString()}] Fatal error:`, err);
135
+ process.exit(1);
136
+ });
@@ -0,0 +1,163 @@
1
+ import type { Page } from 'puppeteer';
2
+
3
+ export interface Message {
4
+ messageId: string;
5
+ fromUsername: string;
6
+ text: string;
7
+ timestamp: string;
8
+ threadId: string;
9
+ }
10
+
11
+ export async function scrapeNewMessages(page: Page): Promise<Message[]> {
12
+ const ts = new Date().toISOString();
13
+ console.log(`[${ts}] Polling https://onlyfans.com/my/chats for new messages`);
14
+
15
+ try {
16
+ await page.goto('https://onlyfans.com/my/chats', {
17
+ waitUntil: 'networkidle2',
18
+ timeout: 30000,
19
+ });
20
+ } catch (err) {
21
+ console.error(`[${new Date().toISOString()}] Failed to navigate to chats:`, err);
22
+ return [];
23
+ }
24
+
25
+ try {
26
+ await page.waitForSelector('.b-chats__list, .b-chat-item, [class*="chat-list"], [class*="chatItem"]', {
27
+ timeout: 15000,
28
+ });
29
+ } catch {
30
+ console.warn(`[${new Date().toISOString()}] Chat list selector not found — page may not be loaded`);
31
+ return [];
32
+ }
33
+
34
+ const messages = await page.evaluate((): Message[] => {
35
+ const results: Message[] = [];
36
+
37
+ const chatItemSelectors = [
38
+ '.b-chat-item',
39
+ '[class*="chatItem"]',
40
+ '[data-type="chat-item"]',
41
+ '.b-chats__item',
42
+ ];
43
+
44
+ let chatItems: Element[] = [];
45
+ for (const sel of chatItemSelectors) {
46
+ const found = Array.from(document.querySelectorAll(sel));
47
+ if (found.length > 0) {
48
+ chatItems = found;
49
+ break;
50
+ }
51
+ }
52
+
53
+ for (const item of chatItems) {
54
+ try {
55
+ const link = item.querySelector('a[href*="/my/chats/chat/"]');
56
+ const href = link?.getAttribute('href') ?? '';
57
+ const threadMatch = href.match(/\/my\/chats\/chat\/(\d+)/);
58
+ const threadId = threadMatch ? threadMatch[1] : '';
59
+
60
+ const nameEl =
61
+ item.querySelector('.b-chat-item__name') ??
62
+ item.querySelector('[class*="userName"]') ??
63
+ item.querySelector('[class*="name"]');
64
+ const fromUsername = nameEl?.textContent?.trim() ?? '';
65
+
66
+ const textEl =
67
+ item.querySelector('.b-chat-item__message') ??
68
+ item.querySelector('[class*="lastMessage"]') ??
69
+ item.querySelector('[class*="message"]');
70
+ const text = textEl?.textContent?.trim() ?? '';
71
+
72
+ const timeEl =
73
+ item.querySelector('time') ??
74
+ item.querySelector('[class*="time"]') ??
75
+ item.querySelector('[class*="date"]');
76
+ const timestamp =
77
+ timeEl?.getAttribute('datetime') ?? timeEl?.textContent?.trim() ?? new Date().toISOString();
78
+
79
+ const messageId = `${threadId}_${btoa(encodeURIComponent(text)).slice(0, 16)}`;
80
+
81
+ if (threadId && fromUsername) {
82
+ results.push({ messageId, fromUsername, text, timestamp, threadId });
83
+ }
84
+ } catch {
85
+ // Skip malformed items
86
+ }
87
+ }
88
+
89
+ return results;
90
+ });
91
+
92
+ console.log(`[${new Date().toISOString()}] Found ${messages.length} chat threads`);
93
+ return messages;
94
+ }
95
+
96
+ export async function scrapeThreadMessages(page: Page, threadId: string): Promise<Message[]> {
97
+ const url = `https://onlyfans.com/my/chats/chat/${threadId}`;
98
+ try {
99
+ await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
100
+ } catch (err) {
101
+ console.error(`[${new Date().toISOString()}] Failed to navigate to thread ${threadId}:`, err);
102
+ return [];
103
+ }
104
+
105
+ try {
106
+ await page.waitForSelector('.b-chat__messages, [class*="messages"]', { timeout: 15000 });
107
+ } catch {
108
+ return [];
109
+ }
110
+
111
+ const messages = await page.evaluate((tId: string): Message[] => {
112
+ const results: Message[] = [];
113
+ const messageSelectors = [
114
+ '.b-message__wrapper',
115
+ '[class*="messageItem"]',
116
+ '[class*="message-item"]',
117
+ ];
118
+
119
+ let items: Element[] = [];
120
+ for (const sel of messageSelectors) {
121
+ const found = Array.from(document.querySelectorAll(sel));
122
+ if (found.length > 0) {
123
+ items = found;
124
+ break;
125
+ }
126
+ }
127
+
128
+ for (const item of items) {
129
+ try {
130
+ const isSelf =
131
+ item.classList.contains('m-my') ||
132
+ item.querySelector('[class*="my-message"]') !== null ||
133
+ item.getAttribute('data-type') === 'outgoing';
134
+ if (isSelf) continue;
135
+
136
+ const textEl = item.querySelector('.b-message__text, [class*="messageText"]');
137
+ const text = textEl?.textContent?.trim() ?? '';
138
+
139
+ const timeEl = item.querySelector('time, [class*="time"]');
140
+ const timestamp =
141
+ timeEl?.getAttribute('datetime') ?? timeEl?.textContent?.trim() ?? new Date().toISOString();
142
+
143
+ const messageId =
144
+ item.getAttribute('data-id') ??
145
+ item.getAttribute('id') ??
146
+ `${tId}_${btoa(encodeURIComponent(timestamp + text)).slice(0, 16)}`;
147
+
148
+ const usernameEl = item.querySelector('[class*="userName"], [class*="name"]');
149
+ const fromUsername = usernameEl?.textContent?.trim() ?? '';
150
+
151
+ if (text) {
152
+ results.push({ messageId, fromUsername, text, timestamp, threadId: tId });
153
+ }
154
+ } catch {
155
+ // skip
156
+ }
157
+ }
158
+
159
+ return results;
160
+ }, threadId);
161
+
162
+ return messages;
163
+ }
@@ -0,0 +1,105 @@
1
+ import type { Page } from 'puppeteer';
2
+
3
+ export interface Profile {
4
+ username: string;
5
+ displayName: string;
6
+ isSubscribed: boolean;
7
+ profilePic: string;
8
+ bio: string;
9
+ fetchedAt: string;
10
+ }
11
+
12
+ export async function scrapeProfile(page: Page, username: string): Promise<Profile | null> {
13
+ const url = `https://onlyfans.com/${username}`;
14
+ console.log(`[${new Date().toISOString()}] Fetching profile for @${username}`);
15
+
16
+ try {
17
+ await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
18
+ } catch (err) {
19
+ console.error(`[${new Date().toISOString()}] Failed to navigate to profile ${username}:`, err);
20
+ return null;
21
+ }
22
+
23
+ try {
24
+ await page.waitForSelector(
25
+ '.b-profile__main, [class*="profile"], .b-user-card, [class*="userCard"]',
26
+ { timeout: 15000 }
27
+ );
28
+ } catch {
29
+ console.warn(`[${new Date().toISOString()}] Profile selectors not found for @${username}`);
30
+ }
31
+
32
+ const profile = await page.evaluate((uname: string): Profile => {
33
+ const nameSelectors = [
34
+ '.b-username__name',
35
+ '[class*="userName"]',
36
+ 'h1[class*="name"]',
37
+ '.g-user-name',
38
+ ];
39
+ let displayName = uname;
40
+ for (const sel of nameSelectors) {
41
+ const el = document.querySelector(sel);
42
+ if (el?.textContent?.trim()) {
43
+ displayName = el.textContent.trim();
44
+ break;
45
+ }
46
+ }
47
+
48
+ const picSelectors = [
49
+ '.b-user-avatar img',
50
+ '[class*="avatar"] img',
51
+ '.g-avatar img',
52
+ 'img[class*="avatar"]',
53
+ ];
54
+ let profilePic = '';
55
+ for (const sel of picSelectors) {
56
+ const el = document.querySelector<HTMLImageElement>(sel);
57
+ if (el?.src) {
58
+ profilePic = el.src;
59
+ break;
60
+ }
61
+ }
62
+
63
+ const bioSelectors = [
64
+ '.b-profile__about-text',
65
+ '[class*="bio"]',
66
+ '[class*="about"]',
67
+ '.g-desc',
68
+ ];
69
+ let bio = '';
70
+ for (const sel of bioSelectors) {
71
+ const el = document.querySelector(sel);
72
+ if (el?.textContent?.trim()) {
73
+ bio = el.textContent.trim();
74
+ break;
75
+ }
76
+ }
77
+
78
+ const subSelectors = [
79
+ '[class*="subscribeButton"]',
80
+ 'button[class*="subscribe"]',
81
+ '.b-btn-subscibe',
82
+ ];
83
+ let isSubscribed = false;
84
+ for (const sel of subSelectors) {
85
+ const el = document.querySelector(sel);
86
+ if (el) {
87
+ const btnText = el.textContent?.toLowerCase() ?? '';
88
+ isSubscribed = btnText.includes('subscribed') || btnText.includes('unsubscribe');
89
+ break;
90
+ }
91
+ }
92
+
93
+ return {
94
+ username: uname,
95
+ displayName,
96
+ isSubscribed,
97
+ profilePic,
98
+ bio,
99
+ fetchedAt: new Date().toISOString(),
100
+ };
101
+ }, username);
102
+
103
+ console.log(`[${new Date().toISOString()}] Profile fetched for @${username}: ${profile.displayName}`);
104
+ return profile;
105
+ }
package/src/redis.ts ADDED
@@ -0,0 +1,34 @@
1
+ import Redis from 'ioredis';
2
+
3
+ const REDIS_URL = process.env.REDIS_URL ?? 'redis://localhost:6379';
4
+
5
+ export const redis = new Redis(REDIS_URL, {
6
+ lazyConnect: true,
7
+ maxRetriesPerRequest: 3,
8
+ });
9
+
10
+ redis.on('error', (err: Error) => {
11
+ console.error(`[${new Date().toISOString()}] Redis error:`, err.message);
12
+ });
13
+
14
+ export async function connectRedis(): Promise<void> {
15
+ await redis.connect();
16
+ console.log(`[${new Date().toISOString()}] Redis connected to ${REDIS_URL}`);
17
+ }
18
+
19
+ export async function xadd(stream: string, fields: Record<string, string>): Promise<string | null> {
20
+ const args: string[] = [];
21
+ for (const [k, v] of Object.entries(fields)) {
22
+ args.push(k, v);
23
+ }
24
+ return redis.xadd(stream, '*', ...args);
25
+ }
26
+
27
+ export async function isSeen(messageId: string): Promise<boolean> {
28
+ const result = await redis.sismember('of:seen_messages', messageId);
29
+ return result === 1;
30
+ }
31
+
32
+ export async function markSeen(messageId: string): Promise<void> {
33
+ await redis.sadd('of:seen_messages', messageId);
34
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "module": "commonjs",
5
+ "lib": ["ES2020", "DOM"],
6
+ "outDir": "./dist",
7
+ "rootDir": "./src",
8
+ "strict": true,
9
+ "esModuleInterop": true,
10
+ "skipLibCheck": true,
11
+ "forceConsistentCasingInFileNames": true,
12
+ "resolveJsonModule": true,
13
+ "declaration": true,
14
+ "declarationMap": true,
15
+ "sourceMap": true
16
+ },
17
+ "include": ["src/**/*"],
18
+ "exclude": ["node_modules", "dist"]
19
+ }