@yusufffararatt/dombridge-mcp 2.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +559 -0
- package/bin/cli.js +88 -0
- package/package.json +54 -0
- package/src/bridge/http-server.js +290 -0
- package/src/bridge/middleware.js +56 -0
- package/src/bridge/routes.js +1003 -0
- package/src/bridge-daemon.js +172 -0
- package/src/cli/auto-config.js +120 -0
- package/src/constants.js +13 -0
- package/src/index.js +279 -0
- package/src/mcp-bridge.js +136 -0
- package/src/metrics/error-codes.js +44 -0
- package/src/metrics/index.js +3 -0
- package/src/metrics/metrics-db.js +269 -0
- package/src/metrics/metrics-recorder.js +240 -0
- package/src/metrics/metrics-report.js +146 -0
- package/src/profiles/profile-db.js +159 -0
- package/src/profiles/profile-enricher.js +333 -0
- package/src/profiles/profile-manager.js +563 -0
- package/src/profiles/profile-repo.js +183 -0
- package/src/state/bridge-client.js +272 -0
- package/src/state/bridge-persistence.js +205 -0
- package/src/state/cache.js +38 -0
- package/src/state/extension-state.js +321 -0
- package/src/tools/action_tools.js +218 -0
- package/src/tools/analyze-page.js +247 -0
- package/src/tools/debug-mcp-state.js +172 -0
- package/src/tools/discover-apis.js +186 -0
- package/src/tools/execute-js.js +284 -0
- package/src/tools/export-session.js +171 -0
- package/src/tools/extract-data.js +395 -0
- package/src/tools/get-element.js +281 -0
- package/src/tools/get-network-trace.js +471 -0
- package/src/tools/index.js +110 -0
- package/src/tools/manage-site-profile.js +153 -0
- package/src/tools/paginate.js +444 -0
- package/src/tools/quick-scan.js +418 -0
- package/src/tools/screenshot_tools.js +117 -0
- package/src/utils/circuit-breaker.js +112 -0
- package/src/utils/extract-density.js +21 -0
- package/src/utils/logger.js +31 -0
- package/src/utils/paginate-detector.js +24 -0
- package/src/utils/rate-limiter.js +244 -0
- package/src/utils/run-script.js +37 -0
- package/src/utils/selector-validator.js +95 -0
- package/src/utils/state-validator.js +354 -0
- package/src/utils/tab-resolver.js +70 -0
- package/src/utils/workflow-helper.js +292 -0
- package/src/utils/workflow-state.js +177 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* manage_site_profile — unified profile management tool
|
|
3
|
+
*
|
|
4
|
+
* Replaces: load_site_profile, save_site_profile, check_site_changes
|
|
5
|
+
* Actions: load | save | check | delete | list | update
|
|
6
|
+
*/
|
|
7
|
+
import { ProfileRepo } from '../profiles/profile-repo.js';
|
|
8
|
+
import { join } from 'path';
|
|
9
|
+
import { homedir } from 'os';
|
|
10
|
+
import { mkdirSync } from 'fs';
|
|
11
|
+
|
|
12
|
+
const DB_PATH = process.env.PROFILE_DB_PATH
|
|
13
|
+
|| join(homedir(), '.dombridge', 'profiles.db');
|
|
14
|
+
|
|
15
|
+
let _repo = null;
|
|
16
|
+
let _dbPath = null;
|
|
17
|
+
export function _setDbPath(path) {
|
|
18
|
+
if (_repo) _repo.close();
|
|
19
|
+
_repo = null;
|
|
20
|
+
_dbPath = path;
|
|
21
|
+
}
|
|
22
|
+
function getRepo() {
|
|
23
|
+
if (!_repo) {
|
|
24
|
+
const p = _dbPath || DB_PATH;
|
|
25
|
+
mkdirSync(join(p, '..'), { recursive: true });
|
|
26
|
+
_repo = new ProfileRepo(p);
|
|
27
|
+
}
|
|
28
|
+
return _repo;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const ACTIONS = ['load', 'save', 'check', 'delete', 'list', 'update'];
|
|
32
|
+
|
|
33
|
+
export const manageSiteProfileTool = {
|
|
34
|
+
name: 'manage_site_profile',
|
|
35
|
+
description: 'Unified site-profile management. Replaces load_site_profile / save_site_profile / check_site_changes. Actions: load, save, check, delete, list, update.',
|
|
36
|
+
inputSchema: {
|
|
37
|
+
type: 'object',
|
|
38
|
+
properties: {
|
|
39
|
+
action: {
|
|
40
|
+
type: 'string',
|
|
41
|
+
enum: ACTIONS,
|
|
42
|
+
description: 'Which operation to perform.'
|
|
43
|
+
},
|
|
44
|
+
domain: {
|
|
45
|
+
type: 'string',
|
|
46
|
+
description: 'Target domain (required for: save, check, delete, update).'
|
|
47
|
+
},
|
|
48
|
+
notes: { type: 'string', description: 'For save/update: human-readable notes.' },
|
|
49
|
+
framework: { type: 'string', description: 'For save/update: detected framework.' },
|
|
50
|
+
pageType: { type: 'string', description: 'For save/update: SSR/SPA/etc.' },
|
|
51
|
+
endpoints: { type: 'array', description: 'For update: incremental endpoint upsert (Bug #7 fix). Each entry: {method, url, status?}.' },
|
|
52
|
+
limit: { type: 'number', description: 'For list: max profiles to return (default 20).' }
|
|
53
|
+
},
|
|
54
|
+
required: ['action']
|
|
55
|
+
},
|
|
56
|
+
handler: manageSiteProfileHandler
|
|
57
|
+
// Actual withMetrics wrapping happens in tools/index.js
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
export async function manageSiteProfileHandler(args, bridgeClient) {
|
|
61
|
+
const { action, domain } = args || {};
|
|
62
|
+
|
|
63
|
+
// Connection guard — required for save/check which need captured endpoints
|
|
64
|
+
if (['save', 'check'].includes(action) && (!bridgeClient || !bridgeClient.isConnected)) {
|
|
65
|
+
return {
|
|
66
|
+
content: [{
|
|
67
|
+
type: 'text',
|
|
68
|
+
text: `❌ Extension not connected. Required for action: ${action}.\nOpen the target page in Chrome and retry.`
|
|
69
|
+
}],
|
|
70
|
+
isError: true
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const repo = getRepo();
|
|
75
|
+
|
|
76
|
+
if (!ACTIONS.includes(action)) {
|
|
77
|
+
return {
|
|
78
|
+
content: [{ type: 'text', text: `❌ Unknown action: ${action}. Allowed: ${ACTIONS.join(', ')}` }],
|
|
79
|
+
isError: true
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (['save', 'check', 'delete', 'update'].includes(action) && !domain) {
|
|
84
|
+
return {
|
|
85
|
+
content: [{ type: 'text', text: `❌ domain_required for action: ${action}` }],
|
|
86
|
+
isError: true
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
switch (action) {
|
|
91
|
+
case 'load': {
|
|
92
|
+
if (!domain) {
|
|
93
|
+
const profiles = repo.list({ limit: args.limit || 20 });
|
|
94
|
+
return { content: [{ type: 'text', text: JSON.stringify({ profiles, count: profiles.length }, null, 2) }] };
|
|
95
|
+
}
|
|
96
|
+
const loaded = repo.load(domain);
|
|
97
|
+
if (!loaded) {
|
|
98
|
+
return { content: [{ type: 'text', text: `❌ Profile not found for domain: ${domain}` }], isError: true };
|
|
99
|
+
}
|
|
100
|
+
return { content: [{ type: 'text', text: JSON.stringify(loaded, null, 2) }] };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
case 'save': {
|
|
104
|
+
// Pull captured endpoints from bridge (Phase 2.4 pattern)
|
|
105
|
+
const captured = bridgeClient.getCapturedEndpoints
|
|
106
|
+
? bridgeClient.getCapturedEndpoints(domain)
|
|
107
|
+
: [];
|
|
108
|
+
const result = repo.save(domain, {
|
|
109
|
+
endpoints: captured,
|
|
110
|
+
notes: args.notes,
|
|
111
|
+
framework: args.framework,
|
|
112
|
+
pageType: args.pageType
|
|
113
|
+
});
|
|
114
|
+
const lines = [
|
|
115
|
+
`✅ Profile saved: ${domain}`,
|
|
116
|
+
`Endpoints inserted: ${result.inserted}`,
|
|
117
|
+
`Endpoints dropped (cross-domain): ${result.dropped.length}`,
|
|
118
|
+
''
|
|
119
|
+
];
|
|
120
|
+
if (result.dropped.length > 0) {
|
|
121
|
+
lines.push('Dropped URLs:');
|
|
122
|
+
for (const d of result.dropped.slice(0, 5)) lines.push(` - ${d.url}`);
|
|
123
|
+
}
|
|
124
|
+
return { content: [{ type: 'text', text: lines.join('\n') }] };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
case 'check': {
|
|
128
|
+
const captured = bridgeClient.getCapturedEndpoints
|
|
129
|
+
? bridgeClient.getCapturedEndpoints(domain)
|
|
130
|
+
: [];
|
|
131
|
+
return { content: [{ type: 'text', text: JSON.stringify(repo.check(domain, captured), null, 2) }] };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
case 'delete': {
|
|
135
|
+
return { content: [{ type: 'text', text: JSON.stringify({ domain, ...repo.delete(domain) }, null, 2) }] };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
case 'list': {
|
|
139
|
+
const profiles = repo.list({ limit: args.limit || 20 });
|
|
140
|
+
return { content: [{ type: 'text', text: JSON.stringify({ profiles, count: profiles.length }, null, 2) }] };
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
case 'update': {
|
|
144
|
+
const patch = {};
|
|
145
|
+
if (args.notes !== undefined) patch.notes = args.notes;
|
|
146
|
+
if (args.framework !== undefined) patch.framework = args.framework;
|
|
147
|
+
if (args.pageType !== undefined) patch.pageType = args.pageType;
|
|
148
|
+
// Bug #7 fix: support incremental endpoint upsert
|
|
149
|
+
if (Array.isArray(args.endpoints)) patch.endpoints = args.endpoints;
|
|
150
|
+
return { content: [{ type: 'text', text: JSON.stringify({ domain, ...repo.update(domain, patch) }, null, 2) }] };
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool: paginate
|
|
3
|
+
* Multi-page and infinite scroll navigation for list/archive pages
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { StateValidator } from '../utils/state-validator.js';
|
|
7
|
+
import { detectPaginationStrategy } from '../utils/paginate-detector.js';
|
|
8
|
+
import { runScript } from '../utils/run-script.js';
|
|
9
|
+
|
|
10
|
+
export const paginateTool = {
|
|
11
|
+
name: 'paginate',
|
|
12
|
+
description: `Navigate through multi-page content or infinite scroll on a Chrome tab.
|
|
13
|
+
|
|
14
|
+
WORKFLOW POSITION: 🟠 RPA — Use after identifying a list/archive page
|
|
15
|
+
|
|
16
|
+
PREREQUISITES:
|
|
17
|
+
- ✅ Extension must be connected
|
|
18
|
+
- A page with paginated or scrollable content must be open
|
|
19
|
+
|
|
20
|
+
STRATEGIES:
|
|
21
|
+
- auto: Detect automatically (URL param → DOM next button → scroll behavior)
|
|
22
|
+
- button: Click a "next page" button by CSS selector
|
|
23
|
+
- scroll: Scroll down and wait for new content
|
|
24
|
+
- url_increment: Increment page number in URL (e.g. ?page=1 → ?page=2)
|
|
25
|
+
|
|
26
|
+
STOP CONDITIONS (any one triggers stop):
|
|
27
|
+
- Same content appears twice in a row
|
|
28
|
+
- Next button not found or disabled
|
|
29
|
+
- No new elements loaded after scroll
|
|
30
|
+
- Page limit reached
|
|
31
|
+
- HTTP or navigation failure
|
|
32
|
+
|
|
33
|
+
PARAMETERS:
|
|
34
|
+
- strategy: Pagination strategy (default: 'auto')
|
|
35
|
+
- selector: CSS selector for next button (required for strategy:'button')
|
|
36
|
+
- limit: Max pages to navigate (default: 5, max: 50)
|
|
37
|
+
- delayMs: Wait time between pages in ms (default: 1500)
|
|
38
|
+
- verbose: Return per-page detail (default: false)
|
|
39
|
+
- force: Skip soft guards and run even if recent scan exists (default: false)
|
|
40
|
+
|
|
41
|
+
MULTI-TAB: Call debug_mcp_state() first to get tab IDs, then pass tabId to paginate a specific tab.
|
|
42
|
+
|
|
43
|
+
NOTE: db_write and data pipeline features are intentionally excluded from v1.
|
|
44
|
+
Those belong in a separate roadmap item.
|
|
45
|
+
|
|
46
|
+
EXAMPLE:
|
|
47
|
+
paginate({ strategy: 'auto', limit: 3 })
|
|
48
|
+
paginate({ strategy: 'button', selector: 'a.next-page', limit: 10 })
|
|
49
|
+
paginate({ strategy: 'scroll', limit: 5, delayMs: 2000 })`,
|
|
50
|
+
|
|
51
|
+
inputSchema: {
|
|
52
|
+
type: 'object',
|
|
53
|
+
properties: {
|
|
54
|
+
strategy: {
|
|
55
|
+
type: 'string',
|
|
56
|
+
enum: ['auto', 'button', 'scroll', 'url_increment'],
|
|
57
|
+
description: "Pagination strategy. 'auto' detects automatically.",
|
|
58
|
+
default: 'auto'
|
|
59
|
+
},
|
|
60
|
+
selector: {
|
|
61
|
+
type: 'string',
|
|
62
|
+
description: "CSS selector for next-page button. Required for strategy:'button', used as hint for 'auto'."
|
|
63
|
+
},
|
|
64
|
+
limit: {
|
|
65
|
+
type: 'number',
|
|
66
|
+
description: 'Maximum pages to navigate (default: 5, max: 50)',
|
|
67
|
+
default: 5
|
|
68
|
+
},
|
|
69
|
+
delayMs: {
|
|
70
|
+
type: 'number',
|
|
71
|
+
description: 'Wait time between page navigations in ms (default: 1500)',
|
|
72
|
+
default: 1500
|
|
73
|
+
},
|
|
74
|
+
verbose: {
|
|
75
|
+
type: 'boolean',
|
|
76
|
+
description: 'Return per-page detail including element counts (default: false)',
|
|
77
|
+
default: false
|
|
78
|
+
},
|
|
79
|
+
force: {
|
|
80
|
+
type: 'boolean',
|
|
81
|
+
description: 'Skip soft guards (default: false)',
|
|
82
|
+
default: false
|
|
83
|
+
},
|
|
84
|
+
tabId: {
|
|
85
|
+
type: 'number',
|
|
86
|
+
description: 'Target tab ID (optional). Omit to use active tab. Get IDs from debug_mcp_state().'
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
required: []
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
handler: async (args, bridgeClient) => {
|
|
93
|
+
// Hard guard: extension must be connected
|
|
94
|
+
const connValidation = StateValidator.validateConnection(bridgeClient);
|
|
95
|
+
if (!connValidation.valid) {
|
|
96
|
+
return StateValidator.formatValidationError(connValidation);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const {
|
|
100
|
+
strategy = 'auto',
|
|
101
|
+
selector,
|
|
102
|
+
limit = 5,
|
|
103
|
+
delayMs = 1500,
|
|
104
|
+
verbose = false,
|
|
105
|
+
force: _force = false,
|
|
106
|
+
tabId
|
|
107
|
+
} = args || {};
|
|
108
|
+
|
|
109
|
+
// Validate limit
|
|
110
|
+
const safeLimit = Math.min(Math.max(1, limit), 50);
|
|
111
|
+
|
|
112
|
+
// strategy:'button' requires a selector
|
|
113
|
+
if (strategy === 'button' && !selector) {
|
|
114
|
+
return {
|
|
115
|
+
content: [{
|
|
116
|
+
type: 'text',
|
|
117
|
+
text: `❌ strategy:'button' requires a selector parameter.\n\nExample: paginate({ strategy: 'button', selector: 'a.next-page' })`
|
|
118
|
+
}]
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const pages = [];
|
|
123
|
+
let currentPage = 1;
|
|
124
|
+
let stopReason = null;
|
|
125
|
+
let resolvedStrategy = strategy;
|
|
126
|
+
|
|
127
|
+
try {
|
|
128
|
+
// Step 1: If auto, detect strategy
|
|
129
|
+
if (strategy === 'auto') {
|
|
130
|
+
// Step 1a: Probe page state for pagination metadata (__PROPS, __NEXT_DATA__, __NUXT__, __INITIAL_STATE__)
|
|
131
|
+
// This handles the Trendyol-style case where widgetList.totalPages=N makes url_increment correct
|
|
132
|
+
let pageState = null;
|
|
133
|
+
let hasNextButton = false;
|
|
134
|
+
let hasInfiniteScroll = false;
|
|
135
|
+
try {
|
|
136
|
+
const probeResult = await runScript(
|
|
137
|
+
`(function() {
|
|
138
|
+
var ps = {};
|
|
139
|
+
try { if (window.__widget_list_v2__PROPS) ps.widgetList = window.__widget_list_v2__PROPS.widgetList; } catch(e) {}
|
|
140
|
+
try { if (window.__NEXT_DATA__ && window.__NEXT_DATA__.props && window.__NEXT_DATA__.props.pageProps) ps.__NEXT_DATA__ = { props: { pageProps: window.__NEXT_DATA__.props.pageProps } }; } catch(e) {}
|
|
141
|
+
try { if (window.__NUXT__ && window.__NUXT__.data) ps.pagination = window.__NUXT__.data.pagination; } catch(e) {}
|
|
142
|
+
try { if (window.__INITIAL_STATE__ && window.__INITIAL_STATE__.pagination) ps.pagination = window.__INITIAL_STATE__.pagination; } catch(e) {}
|
|
143
|
+
var nb = !!document.querySelector('a.next, [class*="next"], [aria-label*="next" i]');
|
|
144
|
+
var scrollH = document.documentElement.scrollHeight;
|
|
145
|
+
var clientH = document.documentElement.clientHeight;
|
|
146
|
+
var isc = scrollH > clientH * 2;
|
|
147
|
+
return JSON.stringify({ pageState: ps, hasNextButton: nb, hasInfiniteScroll: isc });
|
|
148
|
+
})()`,
|
|
149
|
+
bridgeClient,
|
|
150
|
+
5000
|
|
151
|
+
);
|
|
152
|
+
if (probeResult) {
|
|
153
|
+
const parsed = JSON.parse(probeResult);
|
|
154
|
+
pageState = parsed.pageState;
|
|
155
|
+
hasNextButton = parsed.hasNextButton;
|
|
156
|
+
hasInfiniteScroll = parsed.hasInfiniteScroll;
|
|
157
|
+
}
|
|
158
|
+
} catch (_e) {
|
|
159
|
+
// ignore — fall through to default detection
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Step 1b: Use the detector to choose a strategy from probed signals
|
|
163
|
+
const detectedFromMeta = detectPaginationStrategy({
|
|
164
|
+
pageState,
|
|
165
|
+
nextButtonSelector: hasNextButton ? 'a.next, [class*="next"], [aria-label*="next" i]' : null,
|
|
166
|
+
hasInfiniteScroll
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
if (detectedFromMeta !== 'auto') {
|
|
170
|
+
resolvedStrategy = detectedFromMeta;
|
|
171
|
+
} else {
|
|
172
|
+
// Step 1c: Fallback to the existing URL/button probe chain
|
|
173
|
+
const detected = await detectStrategy(bridgeClient, selector, tabId);
|
|
174
|
+
resolvedStrategy = detected.strategy;
|
|
175
|
+
if (!selector && detected.selector) {
|
|
176
|
+
args = { ...args, selector: detected.selector };
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Step 2: Get initial element count as baseline
|
|
182
|
+
const initialCount = await getElementCount(bridgeClient, tabId);
|
|
183
|
+
|
|
184
|
+
while (currentPage <= safeLimit) {
|
|
185
|
+
const pageResult = { page: currentPage };
|
|
186
|
+
|
|
187
|
+
// Execute one pagination step
|
|
188
|
+
const stepResult = await executePaginationStep({
|
|
189
|
+
strategy: resolvedStrategy,
|
|
190
|
+
selector: args?.selector || selector,
|
|
191
|
+
delayMs,
|
|
192
|
+
bridgeClient,
|
|
193
|
+
tabId
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
if (!stepResult.success) {
|
|
197
|
+
stopReason = stepResult.reason;
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Wait for content to load
|
|
202
|
+
// Scroll strategy: poll until element count changes (early exit) or timeout
|
|
203
|
+
// Other strategies: fixed sleep is sufficient
|
|
204
|
+
const prevCount = pages.length > 0 ? pages[pages.length - 1].elementCount : initialCount;
|
|
205
|
+
const newCount = resolvedStrategy === 'scroll'
|
|
206
|
+
? await pollUntilNewContent(prevCount, delayMs, bridgeClient, tabId)
|
|
207
|
+
: (await sleep(delayMs), await getElementCount(bridgeClient, tabId));
|
|
208
|
+
pageResult.elementCount = newCount;
|
|
209
|
+
pageResult.newElements = Math.max(0, newCount - initialCount);
|
|
210
|
+
|
|
211
|
+
// Stop condition: no new content. Distinguish between:
|
|
212
|
+
// - page_end: scroll has reached document bottom (no more to load)
|
|
213
|
+
// - no_scroll_progress: scrolled but new content didn't load (slow or stuck)
|
|
214
|
+
if (pages.length > 0) {
|
|
215
|
+
const lastCount = pages[pages.length - 1].elementCount;
|
|
216
|
+
if (newCount <= lastCount) {
|
|
217
|
+
const scrollState = await getScrollState(bridgeClient, tabId);
|
|
218
|
+
stopReason = (scrollState && scrollState.atBottom)
|
|
219
|
+
? 'page_end'
|
|
220
|
+
: 'no_scroll_progress';
|
|
221
|
+
pageResult.scrollState = scrollState;
|
|
222
|
+
break;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
pages.push(pageResult);
|
|
227
|
+
currentPage++;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if (!stopReason && currentPage > safeLimit) {
|
|
231
|
+
stopReason = 'limit_reached';
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Build response
|
|
235
|
+
const totalNewElements = pages.length > 0
|
|
236
|
+
? Math.max(0, (pages[pages.length - 1].elementCount || 0) - initialCount)
|
|
237
|
+
: 0;
|
|
238
|
+
|
|
239
|
+
let output = `## Pagination Complete\n\n`;
|
|
240
|
+
output += `**Strategy:** ${resolvedStrategy}\n`;
|
|
241
|
+
output += `**Pages navigated:** ${pages.length}\n`;
|
|
242
|
+
output += `**New elements loaded:** ${totalNewElements}\n`;
|
|
243
|
+
output += `**Stop reason:** ${formatStopReason(stopReason)}\n`;
|
|
244
|
+
|
|
245
|
+
if (verbose && pages.length > 0) {
|
|
246
|
+
output += `\n### Per-page detail\n`;
|
|
247
|
+
for (const p of pages) {
|
|
248
|
+
output += `- Page ${p.page}: ${p.elementCount} elements total (+${p.newElements} new)\n`;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if (pages.length === 0) {
|
|
253
|
+
output += `\n💡 No pages were navigated. The page may not have pagination or the strategy did not match.\n`;
|
|
254
|
+
output += `Try: paginate({ strategy: 'auto', verbose: true }) or specify a selector manually.\n`;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return { content: [{ type: 'text', text: output }] };
|
|
258
|
+
|
|
259
|
+
} catch (error) {
|
|
260
|
+
return {
|
|
261
|
+
isError: true,
|
|
262
|
+
content: [{
|
|
263
|
+
type: 'text',
|
|
264
|
+
text: `❌ Pagination error: ${error.message}\n\nREQUIRED STEPS:\n1. Verify pagination selector and strategy parameters\n2. Make sure the tab is connected and active`
|
|
265
|
+
}]
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
// ── Helpers ────────────────────────────────────────────────────────────────────
|
|
272
|
+
|
|
273
|
+
async function detectStrategy(bridgeClient, hintSelector, tabId) {
|
|
274
|
+
// 1. Check URL for page param
|
|
275
|
+
const urlCheck = await executeJs(
|
|
276
|
+
`(function() {
|
|
277
|
+
const url = window.location.href;
|
|
278
|
+
const params = new URLSearchParams(window.location.search);
|
|
279
|
+
for (const [k, v] of params) {
|
|
280
|
+
if (/page|p|offset/i.test(k) && /^\\d+$/.test(v)) return { type: 'url', key: k, value: v };
|
|
281
|
+
}
|
|
282
|
+
return null;
|
|
283
|
+
})()`,
|
|
284
|
+
bridgeClient, tabId
|
|
285
|
+
);
|
|
286
|
+
if (urlCheck?.type === 'url') {
|
|
287
|
+
return { strategy: 'url_increment' };
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// 2. Check DOM for next button
|
|
291
|
+
const buttonCheck = await executeJs(
|
|
292
|
+
`(function() {
|
|
293
|
+
const candidates = [
|
|
294
|
+
'a[aria-label*="next" i]', 'a[class*="next" i]', 'button[class*="next" i]',
|
|
295
|
+
'a[class*="sonraki" i]', 'button[class*="sonraki" i]',
|
|
296
|
+
'[data-testid*="next" i]', '.pagination a:last-child', 'li.next a'
|
|
297
|
+
];
|
|
298
|
+
for (const sel of candidates) {
|
|
299
|
+
const el = document.querySelector(sel);
|
|
300
|
+
if (el && !el.disabled && !el.classList.contains('disabled')) {
|
|
301
|
+
return { type: 'button', selector: sel };
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
return null;
|
|
305
|
+
})()`,
|
|
306
|
+
bridgeClient, tabId
|
|
307
|
+
);
|
|
308
|
+
if (buttonCheck?.type === 'button') {
|
|
309
|
+
return { strategy: 'button', selector: hintSelector || buttonCheck.selector };
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// 3. Default to scroll
|
|
313
|
+
return { strategy: 'scroll' };
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
async function executePaginationStep({ strategy, selector, delayMs: _delayMs, bridgeClient, tabId }) {
|
|
317
|
+
switch (strategy) {
|
|
318
|
+
case 'button': {
|
|
319
|
+
const result = await executeJs(
|
|
320
|
+
`(function() {
|
|
321
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
322
|
+
if (!el) return { ok: false, reason: 'button_not_found' };
|
|
323
|
+
if (el.disabled || el.classList.contains('disabled') || el.getAttribute('aria-disabled') === 'true') {
|
|
324
|
+
return { ok: false, reason: 'button_disabled' };
|
|
325
|
+
}
|
|
326
|
+
el.click();
|
|
327
|
+
return { ok: true };
|
|
328
|
+
})()`,
|
|
329
|
+
bridgeClient, tabId
|
|
330
|
+
);
|
|
331
|
+
if (!result?.ok) return { success: false, reason: result?.reason || 'button_error' };
|
|
332
|
+
return { success: true };
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
case 'scroll': {
|
|
336
|
+
await executeJs(
|
|
337
|
+
`window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' })`,
|
|
338
|
+
bridgeClient, tabId
|
|
339
|
+
);
|
|
340
|
+
return { success: true };
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
case 'url_increment': {
|
|
344
|
+
const result = await executeJs(
|
|
345
|
+
`(function() {
|
|
346
|
+
const url = new URL(window.location.href);
|
|
347
|
+
const params = url.searchParams;
|
|
348
|
+
for (const [k, v] of params) {
|
|
349
|
+
if (/page|p|offset/i.test(k) && /^\\d+$/.test(v)) {
|
|
350
|
+
params.set(k, String(parseInt(v) + 1));
|
|
351
|
+
window.location.href = url.toString();
|
|
352
|
+
return { ok: true };
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return { ok: false, reason: 'no_page_param' };
|
|
356
|
+
})()`,
|
|
357
|
+
bridgeClient, tabId
|
|
358
|
+
);
|
|
359
|
+
if (!result?.ok) return { success: false, reason: result?.reason || 'url_increment_failed' };
|
|
360
|
+
return { success: true };
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
default:
|
|
364
|
+
return { success: false, reason: 'unknown_strategy' };
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
async function getElementCount(bridgeClient, tabId) {
|
|
369
|
+
const result = await executeJs(
|
|
370
|
+
`document.querySelectorAll('li, article, ytd-rich-item-renderer, ytd-video-renderer, ytd-grid-video-renderer, [class*="item"], [class*="card"], [class*="product"], [class*="result"], [class*="video"]').length`,
|
|
371
|
+
bridgeClient, tabId
|
|
372
|
+
);
|
|
373
|
+
return typeof result === 'number' ? result : 0;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// Detect whether the viewport has reached the bottom of the scrollable area.
|
|
377
|
+
// Useful to distinguish "page ended" from "scrolled but content didn't load".
|
|
378
|
+
async function getScrollState(bridgeClient, tabId) {
|
|
379
|
+
const result = await executeJs(
|
|
380
|
+
`JSON.stringify({
|
|
381
|
+
scrollY: window.scrollY,
|
|
382
|
+
scrollHeight: document.documentElement.scrollHeight,
|
|
383
|
+
clientHeight: document.documentElement.clientHeight,
|
|
384
|
+
atBottom: (window.scrollY + document.documentElement.clientHeight) >= (document.documentElement.scrollHeight - 4)
|
|
385
|
+
})`,
|
|
386
|
+
bridgeClient, tabId
|
|
387
|
+
);
|
|
388
|
+
if (typeof result === 'string') {
|
|
389
|
+
try { return JSON.parse(result); } catch { return null; }
|
|
390
|
+
}
|
|
391
|
+
return null;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
async function executeJs(code, bridgeClient, tabId) {
|
|
395
|
+
try {
|
|
396
|
+
const requestId = `paginate-js-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
|
|
397
|
+
await bridgeClient.queueRequest('execute-js', {
|
|
398
|
+
code,
|
|
399
|
+
timeout: 5000,
|
|
400
|
+
id: requestId,
|
|
401
|
+
context: 'page',
|
|
402
|
+
...(tabId !== undefined ? { tabId } : {})
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
const record = await bridgeClient.waitForResult('js-execution', requestId, 10000);
|
|
406
|
+
if (record && record.result !== undefined) {
|
|
407
|
+
return record.result;
|
|
408
|
+
}
|
|
409
|
+
return null;
|
|
410
|
+
} catch {
|
|
411
|
+
return null;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
async function pollUntilNewContent(baseCount, maxWaitMs, bridgeClient, tabId) {
|
|
416
|
+
const deadline = Date.now() + maxWaitMs;
|
|
417
|
+
await sleep(400); // allow scroll animation to reach viewport trigger
|
|
418
|
+
while (Date.now() < deadline) {
|
|
419
|
+
const count = await getElementCount(bridgeClient, tabId);
|
|
420
|
+
if (count > baseCount) return count; // new content detected early
|
|
421
|
+
await sleep(400);
|
|
422
|
+
}
|
|
423
|
+
return await getElementCount(bridgeClient, tabId);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
function sleep(ms) {
|
|
427
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
function formatStopReason(reason) {
|
|
431
|
+
const map = {
|
|
432
|
+
no_new_content: 'No new content loaded',
|
|
433
|
+
page_end: 'Reached page end (scroll at bottom — list fully loaded)',
|
|
434
|
+
no_scroll_progress: 'Scrolled but no new content loaded (page may be slow or stuck — try longer delayMs or different selector)',
|
|
435
|
+
button_not_found: 'Next button not found',
|
|
436
|
+
button_disabled: 'Next button is disabled',
|
|
437
|
+
limit_reached: 'Page limit reached',
|
|
438
|
+
no_page_param: 'No page parameter in URL',
|
|
439
|
+
url_increment_failed: 'URL increment failed',
|
|
440
|
+
unknown_strategy: 'Unknown strategy',
|
|
441
|
+
null: 'Completed normally'
|
|
442
|
+
};
|
|
443
|
+
return map[reason] || reason || 'Completed normally';
|
|
444
|
+
}
|