mcpmake 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -635
- package/dist/commands/bundle.d.ts +1 -0
- package/dist/commands/bundle.d.ts.map +1 -0
- package/dist/commands/bundle.js +5 -4
- package/dist/commands/bundle.js.map +1 -0
- package/dist/commands/ci.d.ts +1 -0
- package/dist/commands/ci.d.ts.map +1 -0
- package/dist/commands/ci.js +3 -2
- package/dist/commands/ci.js.map +1 -0
- package/dist/commands/deploy.d.ts +1 -0
- package/dist/commands/deploy.d.ts.map +1 -0
- package/dist/commands/deploy.js +4 -3
- package/dist/commands/deploy.js.map +1 -0
- package/dist/commands/diff.d.ts +1 -0
- package/dist/commands/diff.d.ts.map +1 -0
- package/dist/commands/diff.js +5 -4
- package/dist/commands/diff.js.map +1 -0
- package/dist/commands/from/describe.d.ts +1 -0
- package/dist/commands/from/describe.d.ts.map +1 -0
- package/dist/commands/from/describe.js +11 -10
- package/dist/commands/from/describe.js.map +1 -0
- package/dist/commands/from/har.d.ts +1 -0
- package/dist/commands/from/har.d.ts.map +1 -0
- package/dist/commands/from/har.js +14 -13
- package/dist/commands/from/har.js.map +1 -0
- package/dist/commands/from/openapi.d.ts +1 -0
- package/dist/commands/from/openapi.d.ts.map +1 -0
- package/dist/commands/from/openapi.js +17 -16
- package/dist/commands/from/openapi.js.map +1 -0
- package/dist/commands/from/postman.d.ts +1 -0
- package/dist/commands/from/postman.d.ts.map +1 -0
- package/dist/commands/from/postman.js +13 -12
- package/dist/commands/from/postman.js.map +1 -0
- package/dist/commands/from/stainless.d.ts +110 -0
- package/dist/commands/from/stainless.d.ts.map +1 -0
- package/dist/commands/from/stainless.js +272 -0
- package/dist/commands/from/stainless.js.map +1 -0
- package/dist/commands/from/target-support.d.ts +1 -0
- package/dist/commands/from/target-support.d.ts.map +1 -0
- package/dist/commands/from/target-support.js +2 -1
- package/dist/commands/from/target-support.js.map +1 -0
- package/dist/commands/from/url.d.ts +1 -0
- package/dist/commands/from/url.d.ts.map +1 -0
- package/dist/commands/from/url.js +14 -13
- package/dist/commands/from/url.js.map +1 -0
- package/dist/commands/from/website.d.ts +1 -0
- package/dist/commands/from/website.d.ts.map +1 -0
- package/dist/commands/from/website.js +17 -16
- package/dist/commands/from/website.js.map +1 -0
- package/dist/commands/lint.d.ts +1 -0
- package/dist/commands/lint.d.ts.map +1 -0
- package/dist/commands/lint.js +6 -5
- package/dist/commands/lint.js.map +1 -0
- package/dist/commands/merge.d.ts +1 -0
- package/dist/commands/merge.d.ts.map +1 -0
- package/dist/commands/merge.js +3 -2
- package/dist/commands/merge.js.map +1 -0
- package/dist/commands/publish.d.ts +1 -0
- package/dist/commands/publish.d.ts.map +1 -0
- package/dist/commands/publish.js +4 -3
- package/dist/commands/publish.js.map +1 -0
- package/dist/commands/rescan.d.ts +1 -0
- package/dist/commands/rescan.d.ts.map +1 -0
- package/dist/commands/rescan.js +12 -11
- package/dist/commands/rescan.js.map +1 -0
- package/dist/commands/update.d.ts +1 -0
- package/dist/commands/update.d.ts.map +1 -0
- package/dist/commands/update.js +10 -9
- package/dist/commands/update.js.map +1 -0
- package/dist/commands/verify.d.ts +1 -0
- package/dist/commands/verify.d.ts.map +1 -0
- package/dist/commands/verify.js +7 -6
- package/dist/commands/verify.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +23 -2
- package/dist/index.js.map +1 -0
- package/dist/registry/official-registry.d.ts +1 -0
- package/dist/registry/official-registry.d.ts.map +1 -0
- package/dist/registry/official-registry.js +1 -0
- package/dist/registry/official-registry.js.map +1 -0
- package/package.json +20 -46
- package/dist/analyzer/auth-detector.d.ts +0 -12
- package/dist/analyzer/auth-detector.js +0 -142
- package/dist/analyzer/dom-parser.d.ts +0 -10
- package/dist/analyzer/dom-parser.js +0 -259
- package/dist/analyzer/goal-crawler.d.ts +0 -25
- package/dist/analyzer/goal-crawler.js +0 -177
- package/dist/analyzer/hybrid-detector.d.ts +0 -28
- package/dist/analyzer/hybrid-detector.js +0 -96
- package/dist/analyzer/index.d.ts +0 -12
- package/dist/analyzer/index.js +0 -8
- package/dist/analyzer/screenshot-capture.d.ts +0 -29
- package/dist/analyzer/screenshot-capture.js +0 -42
- package/dist/analyzer/selector-builder.d.ts +0 -19
- package/dist/analyzer/selector-builder.js +0 -199
- package/dist/analyzer/semantic-analyzer.d.ts +0 -13
- package/dist/analyzer/semantic-analyzer.js +0 -145
- package/dist/analyzer/site-crawler.d.ts +0 -38
- package/dist/analyzer/site-crawler.js +0 -235
- package/dist/cloud/billing/billing-engine.d.ts +0 -44
- package/dist/cloud/billing/billing-engine.js +0 -81
- package/dist/cloud/billing/credit-store.d.ts +0 -64
- package/dist/cloud/billing/credit-store.js +0 -168
- package/dist/cloud/billing/index.d.ts +0 -4
- package/dist/cloud/billing/index.js +0 -2
- package/dist/cloud/billing/usage-store.d.ts +0 -42
- package/dist/cloud/billing/usage-store.js +0 -85
- package/dist/cloud/billing/usage-tracker.d.ts +0 -38
- package/dist/cloud/billing/usage-tracker.js +0 -95
- package/dist/cloud/build-pipeline.d.ts +0 -39
- package/dist/cloud/build-pipeline.js +0 -310
- package/dist/cloud/build-queue.d.ts +0 -30
- package/dist/cloud/build-queue.js +0 -70
- package/dist/cloud/caddy-manager.d.ts +0 -18
- package/dist/cloud/caddy-manager.js +0 -97
- package/dist/cloud/container-backend.d.ts +0 -62
- package/dist/cloud/container-backend.js +0 -59
- package/dist/cloud/container-manager.d.ts +0 -64
- package/dist/cloud/container-manager.js +0 -301
- package/dist/cloud/crypto.d.ts +0 -27
- package/dist/cloud/crypto.js +0 -63
- package/dist/cloud/db/index.d.ts +0 -27
- package/dist/cloud/db/index.js +0 -53
- package/dist/cloud/db/migrations.d.ts +0 -12
- package/dist/cloud/db/migrations.js +0 -329
- package/dist/cloud/db/pg-store.d.ts +0 -45
- package/dist/cloud/db/pg-store.js +0 -336
- package/dist/cloud/failure-tracker.d.ts +0 -51
- package/dist/cloud/failure-tracker.js +0 -102
- package/dist/cloud/idle-monitor.d.ts +0 -30
- package/dist/cloud/idle-monitor.js +0 -70
- package/dist/cloud/mailer.d.ts +0 -21
- package/dist/cloud/mailer.js +0 -193
- package/dist/cloud/mcp-proxy.d.ts +0 -58
- package/dist/cloud/mcp-proxy.js +0 -203
- package/dist/cloud/metric-samples.d.ts +0 -43
- package/dist/cloud/metric-samples.js +0 -85
- package/dist/cloud/metrics.d.ts +0 -26
- package/dist/cloud/metrics.js +0 -59
- package/dist/cloud/multipart.d.ts +0 -26
- package/dist/cloud/multipart.js +0 -132
- package/dist/cloud/observability.d.ts +0 -27
- package/dist/cloud/observability.js +0 -98
- package/dist/cloud/rate-limiter.d.ts +0 -31
- package/dist/cloud/rate-limiter.js +0 -58
- package/dist/cloud/request-security.d.ts +0 -5
- package/dist/cloud/request-security.js +0 -74
- package/dist/cloud/resource-monitor.d.ts +0 -69
- package/dist/cloud/resource-monitor.js +0 -130
- package/dist/cloud/secret-store.d.ts +0 -38
- package/dist/cloud/secret-store.js +0 -103
- package/dist/cloud/security.d.ts +0 -26
- package/dist/cloud/security.js +0 -142
- package/dist/cloud/server.d.ts +0 -21
- package/dist/cloud/server.js +0 -1079
- package/dist/cloud/shared-state.d.ts +0 -72
- package/dist/cloud/shared-state.js +0 -159
- package/dist/cloud/ssrf.d.ts +0 -43
- package/dist/cloud/ssrf.js +0 -150
- package/dist/cloud/store.d.ts +0 -41
- package/dist/cloud/store.js +0 -75
- package/dist/cloud/stripe.d.ts +0 -78
- package/dist/cloud/stripe.js +0 -317
- package/dist/cloud/telemetry-store.d.ts +0 -53
- package/dist/cloud/telemetry-store.js +0 -108
- package/dist/cloud/web/auth.d.ts +0 -225
- package/dist/cloud/web/auth.js +0 -555
- package/dist/cloud/web/charts.d.ts +0 -70
- package/dist/cloud/web/charts.js +0 -178
- package/dist/cloud/web/csrf.d.ts +0 -14
- package/dist/cloud/web/csrf.js +0 -22
- package/dist/cloud/web/docs.d.ts +0 -40
- package/dist/cloud/web/docs.js +0 -174
- package/dist/cloud/web/router.d.ts +0 -25
- package/dist/cloud/web/router.js +0 -1921
- package/dist/cloud/web/static/alpine.min.js +0 -5
- package/dist/cloud/web/static/favicon.svg +0 -4
- package/dist/cloud/web/static/htmx-sse.js +0 -290
- package/dist/cloud/web/static/htmx.min.js +0 -1
- package/dist/cloud/web/static/style.css +0 -2683
- package/dist/cloud/web/static-server.d.ts +0 -13
- package/dist/cloud/web/static-server.js +0 -73
- package/dist/cloud/web/template-engine.d.ts +0 -27
- package/dist/cloud/web/template-engine.js +0 -146
- package/dist/cloud/web/templates/layouts/admin.hbs +0 -57
- package/dist/cloud/web/templates/layouts/auth.hbs +0 -138
- package/dist/cloud/web/templates/layouts/base.hbs +0 -16
- package/dist/cloud/web/templates/layouts/dashboard.hbs +0 -39
- package/dist/cloud/web/templates/layouts/landing.hbs +0 -82
- package/dist/cloud/web/templates/pages/admin/overview.hbs +0 -123
- package/dist/cloud/web/templates/pages/admin/servers.hbs +0 -129
- package/dist/cloud/web/templates/pages/admin/telemetry.hbs +0 -39
- package/dist/cloud/web/templates/pages/admin/user-edit.hbs +0 -91
- package/dist/cloud/web/templates/pages/admin/users.hbs +0 -179
- package/dist/cloud/web/templates/pages/auth/forgot-password.hbs +0 -25
- package/dist/cloud/web/templates/pages/auth/login.hbs +0 -33
- package/dist/cloud/web/templates/pages/auth/register.hbs +0 -32
- package/dist/cloud/web/templates/pages/auth/reset-password.hbs +0 -34
- package/dist/cloud/web/templates/pages/dashboard/billing.hbs +0 -140
- package/dist/cloud/web/templates/pages/dashboard/create.hbs +0 -173
- package/dist/cloud/web/templates/pages/dashboard/index.hbs +0 -8
- package/dist/cloud/web/templates/pages/dashboard/server-detail.hbs +0 -280
- package/dist/cloud/web/templates/pages/dashboard/server-logs.hbs +0 -35
- package/dist/cloud/web/templates/pages/dashboard/server-metrics.hbs +0 -63
- package/dist/cloud/web/templates/pages/dashboard/servers-partial.hbs +0 -21
- package/dist/cloud/web/templates/pages/dashboard/servers.hbs +0 -44
- package/dist/cloud/web/templates/pages/docs/show.hbs +0 -16
- package/dist/cloud/web/templates/pages/errors/404.hbs +0 -9
- package/dist/cloud/web/templates/pages/errors/500.hbs +0 -8
- package/dist/cloud/web/templates/pages/landing/index.hbs +0 -223
- package/dist/cloud/web/templates/pages/legal/privacy.hbs +0 -71
- package/dist/cloud/web/templates/pages/legal/terms.hbs +0 -73
- package/dist/cloud/web/templates/partials/admin-stats.hbs +0 -52
- package/dist/cloud/web/templates/partials/flash-message.hbs +0 -6
- package/dist/cloud/web/templates/partials/pricing-table.hbs +0 -103
- package/dist/cloud/web/templates/partials/server-card.hbs +0 -19
- package/dist/cloud/web/templates/partials/status-badge.hbs +0 -1
- package/dist/config/configurable-command.d.ts +0 -13
- package/dist/config/configurable-command.js +0 -70
- package/dist/config/mcpmake-config.d.ts +0 -68
- package/dist/config/mcpmake-config.js +0 -207
- package/dist/docs/cli.md +0 -400
- package/dist/docs/mcp-2026-07-28-migration.md +0 -78
- package/dist/docs/migrate-from-stainless.md +0 -94
- package/dist/docs/quickstart.md +0 -166
- package/dist/docs/show-hn.md +0 -26
- package/dist/docs/website-servers.md +0 -169
- package/dist/emitter/code-writer.d.ts +0 -8
- package/dist/emitter/code-writer.js +0 -25
- package/dist/emitter/index.d.ts +0 -32
- package/dist/emitter/index.js +0 -280
- package/dist/emitter/mcpb-bundler.d.ts +0 -31
- package/dist/emitter/mcpb-bundler.js +0 -172
- package/dist/emitter/project-scaffolder.d.ts +0 -4
- package/dist/emitter/project-scaffolder.js +0 -89
- package/dist/emitter/python-template-loader.d.ts +0 -4
- package/dist/emitter/python-template-loader.js +0 -30
- package/dist/emitter/python-templates/dockerfile.hbs +0 -14
- package/dist/emitter/python-templates/env.example.hbs +0 -6
- package/dist/emitter/python-templates/requirements.txt.hbs +0 -4
- package/dist/emitter/python-templates/server.py.hbs +0 -77
- package/dist/emitter/site-scaffolder.d.ts +0 -13
- package/dist/emitter/site-scaffolder.js +0 -70
- package/dist/emitter/site-template-loader.d.ts +0 -5
- package/dist/emitter/site-template-loader.js +0 -47
- package/dist/emitter/site-templates/browser-manager.ts.hbs +0 -233
- package/dist/emitter/site-templates/config.ts.hbs +0 -28
- package/dist/emitter/site-templates/dockerfile.hbs +0 -31
- package/dist/emitter/site-templates/env.example.hbs +0 -19
- package/dist/emitter/site-templates/package.json.hbs +0 -26
- package/dist/emitter/site-templates/server-main-http.ts.hbs +0 -108
- package/dist/emitter/site-templates/server-main.ts.hbs +0 -23
- package/dist/emitter/site-templates/tool-handler-action.ts.hbs +0 -86
- package/dist/emitter/site-templates/tool-handler-form.ts.hbs +0 -116
- package/dist/emitter/site-templates/tool-handler-lifecycle.ts.hbs +0 -146
- package/dist/emitter/site-templates/tool-index.ts.hbs +0 -11
- package/dist/emitter/template-loader.d.ts +0 -1
- package/dist/emitter/template-loader.js +0 -27
- package/dist/emitter/templates/auth-provider.ts.hbs +0 -57
- package/dist/emitter/templates/config.ts.hbs +0 -63
- package/dist/emitter/templates/discovery.ts.hbs +0 -301
- package/dist/emitter/templates/dockerfile.hbs +0 -34
- package/dist/emitter/templates/env.example.hbs +0 -28
- package/dist/emitter/templates/gitignore.hbs +0 -5
- package/dist/emitter/templates/http-executor.ts.hbs +0 -117
- package/dist/emitter/templates/oauth.ts.hbs +0 -188
- package/dist/emitter/templates/package.json.hbs +0 -25
- package/dist/emitter/templates/prompts.ts.hbs +0 -22
- package/dist/emitter/templates/readme.md.hbs +0 -123
- package/dist/emitter/templates/resources.ts.hbs +0 -63
- package/dist/emitter/templates/server-main-http.ts.hbs +0 -407
- package/dist/emitter/templates/server-main.ts.hbs +0 -40
- package/dist/emitter/templates/task-handlers.ts.hbs +0 -189
- package/dist/emitter/templates/task-manager.ts.hbs +0 -139
- package/dist/emitter/templates/task-sse.ts.hbs +0 -105
- package/dist/emitter/templates/tool-handler.ts.hbs +0 -124
- package/dist/emitter/templates/tool-index.ts.hbs +0 -11
- package/dist/emitter/templates/tool-test.ts.hbs +0 -57
- package/dist/emitter/templates/trace.ts.hbs +0 -79
- package/dist/emitter/templates/tsconfig.json.hbs +0 -16
- package/dist/emitter/templates/types.ts.hbs +0 -5
- package/dist/emitter/worker-template-loader.d.ts +0 -5
- package/dist/emitter/worker-template-loader.js +0 -33
- package/dist/emitter/worker-templates/config.ts.hbs +0 -54
- package/dist/emitter/worker-templates/dev-vars.example.hbs +0 -10
- package/dist/emitter/worker-templates/gitignore.hbs +0 -6
- package/dist/emitter/worker-templates/package.json.hbs +0 -24
- package/dist/emitter/worker-templates/readme.md.hbs +0 -53
- package/dist/emitter/worker-templates/server.test.ts.hbs +0 -20
- package/dist/emitter/worker-templates/tool-handler.ts.hbs +0 -85
- package/dist/emitter/worker-templates/tool-index.ts.hbs +0 -28
- package/dist/emitter/worker-templates/tsconfig.json.hbs +0 -17
- package/dist/emitter/worker-templates/worker.ts.hbs +0 -242
- package/dist/emitter/worker-templates/wrangler.toml.hbs +0 -19
- package/dist/generator/spec-generator.d.ts +0 -6
- package/dist/generator/spec-generator.js +0 -50
- package/dist/parser/har-filter.d.ts +0 -8
- package/dist/parser/har-filter.js +0 -71
- package/dist/parser/har-loader.d.ts +0 -2
- package/dist/parser/har-loader.js +0 -14
- package/dist/parser/har-normalizer.d.ts +0 -20
- package/dist/parser/har-normalizer.js +0 -78
- package/dist/parser/index.d.ts +0 -10
- package/dist/parser/index.js +0 -6
- package/dist/parser/openapi-loader.d.ts +0 -6
- package/dist/parser/openapi-loader.js +0 -308
- package/dist/parser/operation-extractor.d.ts +0 -13
- package/dist/parser/operation-extractor.js +0 -155
- package/dist/parser/overlay-loader.d.ts +0 -10
- package/dist/parser/overlay-loader.js +0 -184
- package/dist/parser/postman-loader.d.ts +0 -9
- package/dist/parser/postman-loader.js +0 -106
- package/dist/parser/schema-converter.d.ts +0 -12
- package/dist/parser/schema-converter.js +0 -117
- package/dist/plugins/adapter.d.ts +0 -40
- package/dist/plugins/adapter.js +0 -15
- package/dist/plugins/loader.d.ts +0 -25
- package/dist/plugins/loader.js +0 -58
- package/dist/pricing.d.ts +0 -55
- package/dist/pricing.js +0 -133
- package/dist/providers/index.d.ts +0 -15
- package/dist/providers/index.js +0 -56
- package/dist/recorder/browser-recorder.d.ts +0 -22
- package/dist/recorder/browser-recorder.js +0 -205
- package/dist/rescan/diff-engine.d.ts +0 -5
- package/dist/rescan/diff-engine.js +0 -312
- package/dist/rescan/index.d.ts +0 -3
- package/dist/rescan/index.js +0 -2
- package/dist/rescan/rescan-runner.d.ts +0 -42
- package/dist/rescan/rescan-runner.js +0 -69
- package/dist/rescan/rescan-scheduler.d.ts +0 -41
- package/dist/rescan/rescan-scheduler.js +0 -179
- package/dist/site-transformer/browser-tools.d.ts +0 -10
- package/dist/site-transformer/browser-tools.js +0 -59
- package/dist/site-transformer/index.d.ts +0 -2
- package/dist/site-transformer/index.js +0 -2
- package/dist/site-transformer/selector-healer.d.ts +0 -8
- package/dist/site-transformer/selector-healer.js +0 -106
- package/dist/site-transformer/tool-generator.d.ts +0 -13
- package/dist/site-transformer/tool-generator.js +0 -245
- package/dist/transformer/auth-detector.d.ts +0 -13
- package/dist/transformer/auth-detector.js +0 -90
- package/dist/transformer/catalog-builder.d.ts +0 -18
- package/dist/transformer/catalog-builder.js +0 -56
- package/dist/transformer/client-compat.d.ts +0 -6
- package/dist/transformer/client-compat.js +0 -44
- package/dist/transformer/har-clusterer.d.ts +0 -9
- package/dist/transformer/har-clusterer.js +0 -27
- package/dist/transformer/har-dedup.d.ts +0 -10
- package/dist/transformer/har-dedup.js +0 -81
- package/dist/transformer/har-schema-inferrer.d.ts +0 -15
- package/dist/transformer/har-schema-inferrer.js +0 -90
- package/dist/transformer/har-to-operations.d.ts +0 -13
- package/dist/transformer/har-to-operations.js +0 -192
- package/dist/transformer/index.d.ts +0 -8
- package/dist/transformer/index.js +0 -6
- package/dist/transformer/llm-namer.d.ts +0 -6
- package/dist/transformer/llm-namer.js +0 -59
- package/dist/transformer/naming.d.ts +0 -4
- package/dist/transformer/naming.js +0 -30
- package/dist/transformer/operation-filter.d.ts +0 -13
- package/dist/transformer/operation-filter.js +0 -52
- package/dist/transformer/resource-builder.d.ts +0 -12
- package/dist/transformer/resource-builder.js +0 -80
- package/dist/transformer/schema-merger.d.ts +0 -14
- package/dist/transformer/schema-merger.js +0 -65
- package/dist/transformer/tool-builder.d.ts +0 -3
- package/dist/transformer/tool-builder.js +0 -114
- package/dist/types/index.d.ts +0 -131
- package/dist/types/index.js +0 -1
- package/dist/types/site.d.ts +0 -284
- package/dist/types/site.js +0 -8
- package/dist/utils/fail.d.ts +0 -48
- package/dist/utils/fail.js +0 -204
- package/dist/utils/fs.d.ts +0 -5
- package/dist/utils/fs.js +0 -28
- package/dist/utils/interactive.d.ts +0 -6
- package/dist/utils/interactive.js +0 -30
- package/dist/utils/logger.d.ts +0 -1
- package/dist/utils/logger.js +0 -2
- package/dist/utils/sanitize.d.ts +0 -28
- package/dist/utils/sanitize.js +0 -44
- package/dist/utils/watcher.d.ts +0 -11
- package/dist/utils/watcher.js +0 -36
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Builds multi-strategy selector sets for DOM elements.
|
|
3
|
-
*
|
|
4
|
-
* Generates selectors in priority order (most stable first):
|
|
5
|
-
* data-testid > id > aria-label > name > role > CSS path > XPath
|
|
6
|
-
*/
|
|
7
|
-
import type { Page, ElementHandle } from 'playwright';
|
|
8
|
-
import type { SelectorSet } from '../types/site.js';
|
|
9
|
-
/**
|
|
10
|
-
* Build a SelectorSet for a given element on the page.
|
|
11
|
-
* Tries multiple strategies and picks the most stable one as primary.
|
|
12
|
-
*/
|
|
13
|
-
export declare function buildSelectorSet(page: Page, element: ElementHandle): Promise<SelectorSet>;
|
|
14
|
-
/**
|
|
15
|
-
* Validate that a selector still resolves to an element on the page.
|
|
16
|
-
* Tries the primary selector first, then fallbacks.
|
|
17
|
-
* Returns the first working selector or null if all fail.
|
|
18
|
-
*/
|
|
19
|
-
export declare function validateSelector(page: Page, selectorSet: SelectorSet): Promise<string | null>;
|
|
@@ -1,199 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Builds multi-strategy selector sets for DOM elements.
|
|
3
|
-
*
|
|
4
|
-
* Generates selectors in priority order (most stable first):
|
|
5
|
-
* data-testid > id > aria-label > name > role > CSS path > XPath
|
|
6
|
-
*/
|
|
7
|
-
/**
|
|
8
|
-
* Build a SelectorSet for a given element on the page.
|
|
9
|
-
* Tries multiple strategies and picks the most stable one as primary.
|
|
10
|
-
*/
|
|
11
|
-
export async function buildSelectorSet(page, element) {
|
|
12
|
-
const candidates = [];
|
|
13
|
-
const attrs = await element.evaluate((node) => {
|
|
14
|
-
const el = node;
|
|
15
|
-
return {
|
|
16
|
-
tagName: el.tagName.toLowerCase(),
|
|
17
|
-
id: el.id || null,
|
|
18
|
-
name: el.getAttribute('name'),
|
|
19
|
-
type: el.getAttribute('type'),
|
|
20
|
-
testId: el.getAttribute('data-testid') || el.getAttribute('data-test-id'),
|
|
21
|
-
ariaLabel: el.getAttribute('aria-label'),
|
|
22
|
-
role: el.getAttribute('role') || null,
|
|
23
|
-
placeholder: el.getAttribute('placeholder'),
|
|
24
|
-
textContent: el.textContent?.trim().slice(0, 80) || null,
|
|
25
|
-
className: el.className || null,
|
|
26
|
-
};
|
|
27
|
-
});
|
|
28
|
-
// 1. data-testid (most stable — intentionally placed for testing)
|
|
29
|
-
if (attrs.testId) {
|
|
30
|
-
candidates.push({
|
|
31
|
-
selector: `[data-testid="${cssEscape(attrs.testId)}"]`,
|
|
32
|
-
strategy: 'data-testid',
|
|
33
|
-
confidence: 0.95,
|
|
34
|
-
});
|
|
35
|
-
}
|
|
36
|
-
// 2. id (stable if present, but may be auto-generated)
|
|
37
|
-
if (attrs.id && !looksAutoGenerated(attrs.id)) {
|
|
38
|
-
candidates.push({
|
|
39
|
-
selector: `#${cssEscape(attrs.id)}`,
|
|
40
|
-
strategy: 'id',
|
|
41
|
-
confidence: 0.9,
|
|
42
|
-
});
|
|
43
|
-
}
|
|
44
|
-
// 3. aria-label (stable for accessible sites)
|
|
45
|
-
if (attrs.ariaLabel) {
|
|
46
|
-
candidates.push({
|
|
47
|
-
selector: `${attrs.tagName}[aria-label="${cssEscape(attrs.ariaLabel)}"]`,
|
|
48
|
-
strategy: 'aria-label',
|
|
49
|
-
confidence: 0.85,
|
|
50
|
-
});
|
|
51
|
-
}
|
|
52
|
-
// 4. name attribute (stable for form elements)
|
|
53
|
-
if (attrs.name) {
|
|
54
|
-
const typeQualifier = attrs.type ? `[type="${attrs.type}"]` : '';
|
|
55
|
-
candidates.push({
|
|
56
|
-
selector: `${attrs.tagName}[name="${cssEscape(attrs.name)}"]${typeQualifier}`,
|
|
57
|
-
strategy: 'name',
|
|
58
|
-
confidence: 0.85,
|
|
59
|
-
});
|
|
60
|
-
}
|
|
61
|
-
// 5. role + text (for buttons/links with visible text)
|
|
62
|
-
if (attrs.role && attrs.textContent) {
|
|
63
|
-
const shortText = attrs.textContent.slice(0, 40);
|
|
64
|
-
candidates.push({
|
|
65
|
-
selector: `${attrs.tagName}[role="${attrs.role}"]:has-text("${cssEscape(shortText)}")`,
|
|
66
|
-
strategy: 'role',
|
|
67
|
-
confidence: 0.7,
|
|
68
|
-
});
|
|
69
|
-
}
|
|
70
|
-
// 6. CSS path (always available but fragile)
|
|
71
|
-
const cssPath = await buildCssPath(element);
|
|
72
|
-
if (cssPath) {
|
|
73
|
-
candidates.push({
|
|
74
|
-
selector: cssPath,
|
|
75
|
-
strategy: 'css-path',
|
|
76
|
-
confidence: 0.4,
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
// Sort by confidence descending
|
|
80
|
-
candidates.sort((a, b) => b.confidence - a.confidence);
|
|
81
|
-
if (candidates.length === 0) {
|
|
82
|
-
// Absolute fallback: XPath
|
|
83
|
-
const xpath = await buildXPath(element);
|
|
84
|
-
return {
|
|
85
|
-
primary: xpath,
|
|
86
|
-
fallbacks: [],
|
|
87
|
-
strategy: 'xpath',
|
|
88
|
-
confidence: 0.2,
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
const primary = candidates[0];
|
|
92
|
-
const fallbacks = candidates.slice(1).map((c) => c.selector);
|
|
93
|
-
return {
|
|
94
|
-
primary: primary.selector,
|
|
95
|
-
fallbacks,
|
|
96
|
-
strategy: primary.strategy,
|
|
97
|
-
confidence: primary.confidence,
|
|
98
|
-
};
|
|
99
|
-
}
|
|
100
|
-
/**
|
|
101
|
-
* Validate that a selector still resolves to an element on the page.
|
|
102
|
-
* Tries the primary selector first, then fallbacks.
|
|
103
|
-
* Returns the first working selector or null if all fail.
|
|
104
|
-
*/
|
|
105
|
-
export async function validateSelector(page, selectorSet) {
|
|
106
|
-
const allSelectors = [selectorSet.primary, ...selectorSet.fallbacks];
|
|
107
|
-
for (const selector of allSelectors) {
|
|
108
|
-
try {
|
|
109
|
-
const el = await page.$(selector);
|
|
110
|
-
if (el)
|
|
111
|
-
return selector;
|
|
112
|
-
}
|
|
113
|
-
catch {
|
|
114
|
-
// Selector syntax may be invalid — skip
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
return null;
|
|
118
|
-
}
|
|
119
|
-
// ─── Internal helpers ───────────────────────────────────────────────
|
|
120
|
-
/** Build a CSS path from element to a unique ancestor. */
|
|
121
|
-
async function buildCssPath(element) {
|
|
122
|
-
try {
|
|
123
|
-
return await element.evaluate((node) => {
|
|
124
|
-
const parts = [];
|
|
125
|
-
let current = node;
|
|
126
|
-
const maxDepth = 5;
|
|
127
|
-
let depth = 0;
|
|
128
|
-
while (current && current !== document.body && depth < maxDepth) {
|
|
129
|
-
let part = current.tagName.toLowerCase();
|
|
130
|
-
if (current.id) {
|
|
131
|
-
// CSS.escape keeps the selector valid (and injection-safe) for ids
|
|
132
|
-
// containing quotes/brackets/etc. from untrusted crawled pages.
|
|
133
|
-
part = `#${CSS.escape(current.id)}`;
|
|
134
|
-
parts.unshift(part);
|
|
135
|
-
break;
|
|
136
|
-
}
|
|
137
|
-
const parent = current.parentElement;
|
|
138
|
-
if (parent) {
|
|
139
|
-
const siblings = Array.from(parent.children).filter((c) => c.tagName === current.tagName);
|
|
140
|
-
if (siblings.length > 1) {
|
|
141
|
-
const index = siblings.indexOf(current) + 1;
|
|
142
|
-
part += `:nth-of-type(${index})`;
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
parts.unshift(part);
|
|
146
|
-
current = parent;
|
|
147
|
-
depth++;
|
|
148
|
-
}
|
|
149
|
-
return parts.join(' > ');
|
|
150
|
-
});
|
|
151
|
-
}
|
|
152
|
-
catch {
|
|
153
|
-
return null;
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
/** Build an XPath for the element (absolute fallback). */
|
|
157
|
-
async function buildXPath(element) {
|
|
158
|
-
try {
|
|
159
|
-
return await element.evaluate((node) => {
|
|
160
|
-
const parts = [];
|
|
161
|
-
let current = node;
|
|
162
|
-
while (current && current.nodeType === Node.ELEMENT_NODE) {
|
|
163
|
-
let index = 1;
|
|
164
|
-
let sibling = current.previousElementSibling;
|
|
165
|
-
while (sibling) {
|
|
166
|
-
if (sibling.tagName === current.tagName)
|
|
167
|
-
index++;
|
|
168
|
-
sibling = sibling.previousElementSibling;
|
|
169
|
-
}
|
|
170
|
-
parts.unshift(`${current.tagName.toLowerCase()}[${index}]`);
|
|
171
|
-
current = current.parentElement;
|
|
172
|
-
}
|
|
173
|
-
return '/' + parts.join('/');
|
|
174
|
-
});
|
|
175
|
-
}
|
|
176
|
-
catch {
|
|
177
|
-
return '//body';
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
/** Check if an ID looks auto-generated (random hash, UUID-like, etc.) */
|
|
181
|
-
function looksAutoGenerated(id) {
|
|
182
|
-
// Random hex strings (8+ chars of only hex)
|
|
183
|
-
if (/^[a-f0-9]{8,}$/i.test(id))
|
|
184
|
-
return true;
|
|
185
|
-
// React-style IDs like :r0:, :r1a:
|
|
186
|
-
if (/^:r[a-z0-9]+:$/.test(id))
|
|
187
|
-
return true;
|
|
188
|
-
// Contains UUID-like segments
|
|
189
|
-
if (/[a-f0-9]{8}-[a-f0-9]{4}/.test(id))
|
|
190
|
-
return true;
|
|
191
|
-
// Very long IDs are usually auto-generated
|
|
192
|
-
if (id.length > 40)
|
|
193
|
-
return true;
|
|
194
|
-
return false;
|
|
195
|
-
}
|
|
196
|
-
/** Escape a string for use in CSS selectors. */
|
|
197
|
-
function cssEscape(str) {
|
|
198
|
-
return str.replace(/["'\\[\](){}:.,!#$%^&*+=|~<>?/;@]/g, '\\$&');
|
|
199
|
-
}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LLM-powered semantic analysis of crawled pages.
|
|
3
|
-
*
|
|
4
|
-
* Sends page structure to Claude (Haiku for cost efficiency) and infers
|
|
5
|
-
* human-readable semantic names and descriptions for forms, buttons, and links.
|
|
6
|
-
*/
|
|
7
|
-
import type { PageDescriptor } from '../types/site.js';
|
|
8
|
-
/**
|
|
9
|
-
* Enrich PageDescriptors with LLM-inferred semantic names and descriptions.
|
|
10
|
-
*
|
|
11
|
-
* When ANTHROPIC_API_KEY is not set, returns pages unchanged with a warning.
|
|
12
|
-
*/
|
|
13
|
-
export declare function analyzeSemantics(pages: PageDescriptor[], _screenshots?: Map<string, Buffer>): Promise<PageDescriptor[]>;
|
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LLM-powered semantic analysis of crawled pages.
|
|
3
|
-
*
|
|
4
|
-
* Sends page structure to Claude (Haiku for cost efficiency) and infers
|
|
5
|
-
* human-readable semantic names and descriptions for forms, buttons, and links.
|
|
6
|
-
*/
|
|
7
|
-
import Anthropic from '@anthropic-ai/sdk';
|
|
8
|
-
import { logger } from '../utils/logger.js';
|
|
9
|
-
const MODEL = 'claude-haiku-4-5-20251001';
|
|
10
|
-
const MAX_TOKENS = 4096;
|
|
11
|
-
/**
|
|
12
|
-
* Enrich PageDescriptors with LLM-inferred semantic names and descriptions.
|
|
13
|
-
*
|
|
14
|
-
* When ANTHROPIC_API_KEY is not set, returns pages unchanged with a warning.
|
|
15
|
-
*/
|
|
16
|
-
export async function analyzeSemantics(pages, _screenshots) {
|
|
17
|
-
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
18
|
-
if (!apiKey) {
|
|
19
|
-
logger.warn('ANTHROPIC_API_KEY not set — skipping semantic analysis');
|
|
20
|
-
return pages;
|
|
21
|
-
}
|
|
22
|
-
const client = new Anthropic({ apiKey });
|
|
23
|
-
// Build compact page summaries for the prompt
|
|
24
|
-
const summaries = pages.map((page, pageIndex) => ({
|
|
25
|
-
pageIndex,
|
|
26
|
-
url: page.url,
|
|
27
|
-
title: page.title,
|
|
28
|
-
forms: page.forms.map((form, formIndex) => ({
|
|
29
|
-
formIndex,
|
|
30
|
-
formId: form.formId,
|
|
31
|
-
method: form.method,
|
|
32
|
-
action: form.action,
|
|
33
|
-
fieldNames: form.fields.map((f) => f.name),
|
|
34
|
-
fieldTypes: form.fields.map((f) => f.fieldType),
|
|
35
|
-
labels: form.fields.map((f) => f.label ?? f.name),
|
|
36
|
-
submitText: form.submitButton?.humanLabel,
|
|
37
|
-
})),
|
|
38
|
-
buttons: page.buttons.map((btn, buttonIndex) => ({
|
|
39
|
-
buttonIndex,
|
|
40
|
-
buttonId: btn.buttonId,
|
|
41
|
-
text: btn.text,
|
|
42
|
-
ariaLabel: btn.ariaLabel,
|
|
43
|
-
type: btn.type,
|
|
44
|
-
href: btn.href,
|
|
45
|
-
})),
|
|
46
|
-
links: page.links.map((link, linkIndex) => ({
|
|
47
|
-
linkIndex,
|
|
48
|
-
linkId: link.linkId,
|
|
49
|
-
text: link.text,
|
|
50
|
-
href: link.href,
|
|
51
|
-
isNavigation: link.isNavigation,
|
|
52
|
-
})),
|
|
53
|
-
}));
|
|
54
|
-
const prompt = `You are analyzing a website's interactive elements to generate MCP tool names. Given the page structure below, infer semantic names and descriptions for each element.
|
|
55
|
-
|
|
56
|
-
Rules:
|
|
57
|
-
- Form semanticName: snake_case, e.g. "login_form", "search_form", "checkout_form"
|
|
58
|
-
- Button semanticAction: snake_case verb, e.g. "add_to_cart", "submit_order", "toggle_menu"
|
|
59
|
-
- Link semanticAction: snake_case, e.g. "navigate_to_checkout", "view_product_details"
|
|
60
|
-
- Page semanticName: snake_case, e.g. "login_page", "product_listing"
|
|
61
|
-
- Descriptions: one sentence explaining what the element does
|
|
62
|
-
- Only output valid JSON matching the schema, no explanation
|
|
63
|
-
|
|
64
|
-
Schema:
|
|
65
|
-
{
|
|
66
|
-
"pages": [{
|
|
67
|
-
"pageIndex": number,
|
|
68
|
-
"semanticName": string,
|
|
69
|
-
"description": string,
|
|
70
|
-
"forms": [{ "formIndex": number, "semanticName": string, "description": string }],
|
|
71
|
-
"buttons": [{ "buttonIndex": number, "semanticAction": string, "description": string }],
|
|
72
|
-
"links": [{ "linkIndex": number, "semanticAction": string }]
|
|
73
|
-
}]
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
Pages:
|
|
77
|
-
${JSON.stringify(summaries, null, 2)}`;
|
|
78
|
-
try {
|
|
79
|
-
logger.info('Analyzing page semantics with LLM...');
|
|
80
|
-
const message = await client.messages.create({
|
|
81
|
-
model: MODEL,
|
|
82
|
-
max_tokens: MAX_TOKENS,
|
|
83
|
-
messages: [{ role: 'user', content: prompt }],
|
|
84
|
-
});
|
|
85
|
-
const content = message.content[0];
|
|
86
|
-
if (content.type !== 'text')
|
|
87
|
-
return pages;
|
|
88
|
-
let json = content.text.trim();
|
|
89
|
-
if (json.startsWith('```')) {
|
|
90
|
-
json = json.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '');
|
|
91
|
-
}
|
|
92
|
-
const result = JSON.parse(json);
|
|
93
|
-
// Apply inferred names back to the page descriptors
|
|
94
|
-
const enriched = pages.map((page, pageIndex) => {
|
|
95
|
-
const pageResult = result.pages.find((p) => p.pageIndex === pageIndex);
|
|
96
|
-
if (!pageResult)
|
|
97
|
-
return page;
|
|
98
|
-
const updatedPage = {
|
|
99
|
-
...page,
|
|
100
|
-
semanticName: pageResult.semanticName ?? page.semanticName,
|
|
101
|
-
description: pageResult.description ?? page.description,
|
|
102
|
-
};
|
|
103
|
-
updatedPage.forms = page.forms.map((form, formIndex) => {
|
|
104
|
-
const formResult = pageResult.forms.find((f) => f.formIndex === formIndex);
|
|
105
|
-
if (!formResult)
|
|
106
|
-
return form;
|
|
107
|
-
return {
|
|
108
|
-
...form,
|
|
109
|
-
semanticName: formResult.semanticName,
|
|
110
|
-
description: formResult.description,
|
|
111
|
-
};
|
|
112
|
-
});
|
|
113
|
-
updatedPage.buttons = page.buttons.map((btn, buttonIndex) => {
|
|
114
|
-
const btnResult = pageResult.buttons.find((b) => b.buttonIndex === buttonIndex);
|
|
115
|
-
if (!btnResult)
|
|
116
|
-
return btn;
|
|
117
|
-
return {
|
|
118
|
-
...btn,
|
|
119
|
-
semanticAction: btnResult.semanticAction,
|
|
120
|
-
description: btnResult.description,
|
|
121
|
-
};
|
|
122
|
-
});
|
|
123
|
-
updatedPage.links = page.links.map((link, linkIndex) => {
|
|
124
|
-
const linkResult = pageResult.links.find((l) => l.linkIndex === linkIndex);
|
|
125
|
-
if (!linkResult)
|
|
126
|
-
return link;
|
|
127
|
-
return {
|
|
128
|
-
...link,
|
|
129
|
-
semanticAction: linkResult.semanticAction,
|
|
130
|
-
};
|
|
131
|
-
});
|
|
132
|
-
return updatedPage;
|
|
133
|
-
});
|
|
134
|
-
const totalNamed = enriched.reduce((n, p) => n +
|
|
135
|
-
p.forms.filter((f) => f.semanticName).length +
|
|
136
|
-
p.buttons.filter((b) => b.semanticAction).length +
|
|
137
|
-
p.links.filter((l) => l.semanticAction).length, 0);
|
|
138
|
-
logger.info(`Semantic analysis complete: ${totalNamed} elements named`);
|
|
139
|
-
return enriched;
|
|
140
|
-
}
|
|
141
|
-
catch (err) {
|
|
142
|
-
logger.warn(`Semantic analysis failed, using defaults: ${err instanceof Error ? err.message : err}`);
|
|
143
|
-
return pages;
|
|
144
|
-
}
|
|
145
|
-
}
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Crawls a website using Playwright, visiting pages breadth-first
|
|
3
|
-
* and extracting interactive elements from each page.
|
|
4
|
-
*/
|
|
5
|
-
import type { Entry } from 'har-format';
|
|
6
|
-
import type { SiteDescriptor } from '../types/site.js';
|
|
7
|
-
export interface CrawlOptions {
|
|
8
|
-
/** Target URL to start crawling from */
|
|
9
|
-
url: string;
|
|
10
|
-
/** Maximum crawl depth (default: 2) */
|
|
11
|
-
depth?: number;
|
|
12
|
-
/** Maximum number of pages to visit (default: 20) */
|
|
13
|
-
maxPages?: number;
|
|
14
|
-
/** Idle timeout in ms before auto-closing (default: 5 min) */
|
|
15
|
-
timeout?: number;
|
|
16
|
-
/** Run browser in headless mode (default: false) */
|
|
17
|
-
headless?: boolean;
|
|
18
|
-
/** Viewport dimensions */
|
|
19
|
-
viewport?: {
|
|
20
|
-
width: number;
|
|
21
|
-
height: number;
|
|
22
|
-
};
|
|
23
|
-
/** Whether to capture screenshots during analysis (default: true) */
|
|
24
|
-
captureScreenshots?: boolean;
|
|
25
|
-
/** Whether to capture HAR entries during crawl (for hybrid mode) */
|
|
26
|
-
captureHar?: boolean;
|
|
27
|
-
}
|
|
28
|
-
export interface CrawlResult {
|
|
29
|
-
siteDescriptor: SiteDescriptor;
|
|
30
|
-
/** Screenshots keyed by pageId */
|
|
31
|
-
screenshots: Map<string, Buffer>;
|
|
32
|
-
/** HAR entries captured during the crawl (only when captureHar is true) */
|
|
33
|
-
harEntries?: Entry[];
|
|
34
|
-
}
|
|
35
|
-
/**
|
|
36
|
-
* Crawl a website and return a SiteDescriptor with all discovered pages.
|
|
37
|
-
*/
|
|
38
|
-
export declare function crawlSite(options: CrawlOptions): Promise<CrawlResult>;
|
|
@@ -1,235 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Crawls a website using Playwright, visiting pages breadth-first
|
|
3
|
-
* and extracting interactive elements from each page.
|
|
4
|
-
*/
|
|
5
|
-
import { chromium } from 'playwright';
|
|
6
|
-
import { parsePage } from './dom-parser.js';
|
|
7
|
-
import { captureViewportScreenshot } from './screenshot-capture.js';
|
|
8
|
-
import { logger } from '../utils/logger.js';
|
|
9
|
-
import crypto from 'node:crypto';
|
|
10
|
-
const DEFAULT_DEPTH = 2;
|
|
11
|
-
const DEFAULT_MAX_PAGES = 20;
|
|
12
|
-
const DEFAULT_TIMEOUT = 5 * 60 * 1000;
|
|
13
|
-
const DEFAULT_VIEWPORT = { width: 1280, height: 720 };
|
|
14
|
-
/**
|
|
15
|
-
* Crawl a website and return a SiteDescriptor with all discovered pages.
|
|
16
|
-
*/
|
|
17
|
-
export async function crawlSite(options) {
|
|
18
|
-
const depth = options.depth ?? DEFAULT_DEPTH;
|
|
19
|
-
const maxPages = options.maxPages ?? DEFAULT_MAX_PAGES;
|
|
20
|
-
const timeout = options.timeout ?? DEFAULT_TIMEOUT;
|
|
21
|
-
const viewport = options.viewport ?? DEFAULT_VIEWPORT;
|
|
22
|
-
const captureScreenshots = options.captureScreenshots ?? true;
|
|
23
|
-
const parsedUrl = new URL(options.url);
|
|
24
|
-
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
|
|
25
|
-
throw new Error('Only http/https URLs are supported');
|
|
26
|
-
}
|
|
27
|
-
const baseUrl = `${parsedUrl.protocol}//${parsedUrl.host}`;
|
|
28
|
-
const captureHar = options.captureHar ?? false;
|
|
29
|
-
const pages = [];
|
|
30
|
-
const screenshots = new Map();
|
|
31
|
-
const harEntries = [];
|
|
32
|
-
const pendingRequests = new Map();
|
|
33
|
-
const visited = new Set();
|
|
34
|
-
const queue = [
|
|
35
|
-
{ url: options.url, currentDepth: 0 },
|
|
36
|
-
];
|
|
37
|
-
let browser;
|
|
38
|
-
try {
|
|
39
|
-
browser = await chromium.launch({ headless: options.headless ?? false });
|
|
40
|
-
const context = await browser.newContext({ viewport });
|
|
41
|
-
const page = await context.newPage();
|
|
42
|
-
// Optionally capture network requests as HAR entries during crawl
|
|
43
|
-
if (captureHar) {
|
|
44
|
-
page.on('request', (request) => {
|
|
45
|
-
pendingRequests.set(request, { startTime: Date.now() });
|
|
46
|
-
});
|
|
47
|
-
page.on('response', async (response) => {
|
|
48
|
-
const request = response.request();
|
|
49
|
-
const pending = pendingRequests.get(request);
|
|
50
|
-
if (!pending)
|
|
51
|
-
return;
|
|
52
|
-
pendingRequests.delete(request);
|
|
53
|
-
try {
|
|
54
|
-
const entry = await buildCrawlHarEntry(request, response, pending.startTime);
|
|
55
|
-
harEntries.push(entry);
|
|
56
|
-
}
|
|
57
|
-
catch {
|
|
58
|
-
// Some responses can't be read (redirects, aborted)
|
|
59
|
-
}
|
|
60
|
-
});
|
|
61
|
-
}
|
|
62
|
-
logger.info(`Crawling site: ${options.url} (depth: ${depth}, max pages: ${maxPages})`);
|
|
63
|
-
let lastActivityTime = Date.now();
|
|
64
|
-
while (queue.length > 0 && pages.length < maxPages) {
|
|
65
|
-
const item = queue.shift();
|
|
66
|
-
const normalizedUrl = normalizeUrl(item.url);
|
|
67
|
-
// Skip if already visited or different origin
|
|
68
|
-
if (visited.has(normalizedUrl))
|
|
69
|
-
continue;
|
|
70
|
-
if (!item.url.startsWith(baseUrl))
|
|
71
|
-
continue;
|
|
72
|
-
visited.add(normalizedUrl);
|
|
73
|
-
// Check idle timeout
|
|
74
|
-
if (Date.now() - lastActivityTime > timeout) {
|
|
75
|
-
logger.warn('Idle timeout reached during crawl');
|
|
76
|
-
break;
|
|
77
|
-
}
|
|
78
|
-
try {
|
|
79
|
-
logger.info(`[${pages.length + 1}/${maxPages}] Visiting: ${item.url}`);
|
|
80
|
-
await page.goto(item.url, {
|
|
81
|
-
waitUntil: 'domcontentloaded',
|
|
82
|
-
timeout: 15_000,
|
|
83
|
-
});
|
|
84
|
-
// Wait briefly for dynamic content to render
|
|
85
|
-
await page.waitForTimeout(1000);
|
|
86
|
-
lastActivityTime = Date.now();
|
|
87
|
-
// Parse the page DOM
|
|
88
|
-
const pageDescriptor = await parsePage(page);
|
|
89
|
-
pageDescriptor.url = item.url;
|
|
90
|
-
// Capture screenshot if enabled
|
|
91
|
-
if (captureScreenshots) {
|
|
92
|
-
const screenshot = await captureViewportScreenshot(page);
|
|
93
|
-
pageDescriptor.screenshotHash = screenshot.hash;
|
|
94
|
-
screenshots.set(pageDescriptor.pageId, screenshot.data);
|
|
95
|
-
}
|
|
96
|
-
pages.push(pageDescriptor);
|
|
97
|
-
// Queue navigation links for further crawling
|
|
98
|
-
if (item.currentDepth < depth) {
|
|
99
|
-
for (const link of pageDescriptor.links) {
|
|
100
|
-
if (link.isNavigation && !visited.has(normalizeUrl(link.href))) {
|
|
101
|
-
queue.push({ url: link.href, currentDepth: item.currentDepth + 1 });
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
catch (err) {
|
|
107
|
-
logger.warn(`Failed to crawl ${item.url}: ${err}`);
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
// Close browser
|
|
111
|
-
await browser.close().catch(() => { });
|
|
112
|
-
browser = undefined;
|
|
113
|
-
}
|
|
114
|
-
catch (error) {
|
|
115
|
-
if (browser)
|
|
116
|
-
await browser.close().catch(() => { });
|
|
117
|
-
throw error;
|
|
118
|
-
}
|
|
119
|
-
logger.info(`Crawl complete: ${pages.length} pages discovered`);
|
|
120
|
-
// Extract site metadata from the first page
|
|
121
|
-
const metadata = {
|
|
122
|
-
title: pages[0]?.title,
|
|
123
|
-
description: undefined,
|
|
124
|
-
favicon: undefined,
|
|
125
|
-
};
|
|
126
|
-
const siteDescriptor = {
|
|
127
|
-
siteId: generateSiteId(baseUrl),
|
|
128
|
-
baseUrl,
|
|
129
|
-
pages,
|
|
130
|
-
analyzedAt: new Date().toISOString(),
|
|
131
|
-
version: 1,
|
|
132
|
-
crawlDepth: depth,
|
|
133
|
-
metadata,
|
|
134
|
-
};
|
|
135
|
-
return {
|
|
136
|
-
siteDescriptor,
|
|
137
|
-
screenshots,
|
|
138
|
-
...(captureHar ? { harEntries } : {}),
|
|
139
|
-
};
|
|
140
|
-
}
|
|
141
|
-
// ─── HAR Capture (for hybrid mode) ─────────────────────────────────
|
|
142
|
-
const MAX_RESPONSE_BODY_BYTES = 5 * 1024 * 1024;
|
|
143
|
-
const SKIP_BODY_MIME_TYPES = ['image/', 'video/', 'audio/', 'font/', 'application/octet-stream'];
|
|
144
|
-
async function buildCrawlHarEntry(request, response, startTime) {
|
|
145
|
-
const elapsed = Date.now() - startTime;
|
|
146
|
-
const url = request.url();
|
|
147
|
-
const parsedUrl = new URL(url);
|
|
148
|
-
const requestHeaders = Object.entries(request.headers()).map(([name, value]) => ({
|
|
149
|
-
name,
|
|
150
|
-
value,
|
|
151
|
-
}));
|
|
152
|
-
const queryString = [...parsedUrl.searchParams.entries()].map(([name, value]) => ({
|
|
153
|
-
name,
|
|
154
|
-
value,
|
|
155
|
-
}));
|
|
156
|
-
const postData = request.postData();
|
|
157
|
-
const contentTypeHeader = request.headers()['content-type'] ?? '';
|
|
158
|
-
const responseHeaders = Object.entries(response.headers()).map(([name, value]) => ({
|
|
159
|
-
name,
|
|
160
|
-
value,
|
|
161
|
-
}));
|
|
162
|
-
let responseText;
|
|
163
|
-
const responseMimeType = response.headers()['content-type'] ?? '';
|
|
164
|
-
const skipBody = SKIP_BODY_MIME_TYPES.some((m) => responseMimeType.startsWith(m));
|
|
165
|
-
const contentLength = parseInt(response.headers()['content-length'] ?? '0', 10);
|
|
166
|
-
if (!skipBody && contentLength <= MAX_RESPONSE_BODY_BYTES) {
|
|
167
|
-
try {
|
|
168
|
-
const body = await response.body();
|
|
169
|
-
if (body.length <= MAX_RESPONSE_BODY_BYTES) {
|
|
170
|
-
responseText = body.toString('utf-8');
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
catch {
|
|
174
|
-
// Body may not be available
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
return {
|
|
178
|
-
startedDateTime: new Date(startTime).toISOString(),
|
|
179
|
-
time: elapsed,
|
|
180
|
-
request: {
|
|
181
|
-
method: request.method(),
|
|
182
|
-
url,
|
|
183
|
-
httpVersion: 'HTTP/1.1',
|
|
184
|
-
headers: requestHeaders,
|
|
185
|
-
queryString,
|
|
186
|
-
cookies: [],
|
|
187
|
-
headersSize: -1,
|
|
188
|
-
bodySize: postData ? Buffer.byteLength(postData) : 0,
|
|
189
|
-
...(postData
|
|
190
|
-
? {
|
|
191
|
-
postData: {
|
|
192
|
-
mimeType: contentTypeHeader.split(';')[0].trim() || 'application/octet-stream',
|
|
193
|
-
text: postData,
|
|
194
|
-
},
|
|
195
|
-
}
|
|
196
|
-
: {}),
|
|
197
|
-
},
|
|
198
|
-
response: {
|
|
199
|
-
status: response.status(),
|
|
200
|
-
statusText: response.statusText(),
|
|
201
|
-
httpVersion: 'HTTP/1.1',
|
|
202
|
-
headers: responseHeaders,
|
|
203
|
-
cookies: [],
|
|
204
|
-
content: {
|
|
205
|
-
size: responseText ? Buffer.byteLength(responseText) : 0,
|
|
206
|
-
mimeType: responseMimeType.split(';')[0].trim() || 'application/octet-stream',
|
|
207
|
-
...(responseText ? { text: responseText } : {}),
|
|
208
|
-
},
|
|
209
|
-
redirectURL: '',
|
|
210
|
-
headersSize: -1,
|
|
211
|
-
bodySize: responseText ? Buffer.byteLength(responseText) : 0,
|
|
212
|
-
},
|
|
213
|
-
cache: {},
|
|
214
|
-
timings: {
|
|
215
|
-
send: 1,
|
|
216
|
-
wait: Math.max(1, elapsed - 2),
|
|
217
|
-
receive: 1,
|
|
218
|
-
},
|
|
219
|
-
};
|
|
220
|
-
}
|
|
221
|
-
// ─── Helpers ────────────────────────────────────────────────────────
|
|
222
|
-
function normalizeUrl(url) {
|
|
223
|
-
try {
|
|
224
|
-
const parsed = new URL(url);
|
|
225
|
-
// Remove trailing slash, fragment, and normalize
|
|
226
|
-
return `${parsed.origin}${parsed.pathname.replace(/\/$/, '')}${parsed.search}`;
|
|
227
|
-
}
|
|
228
|
-
catch {
|
|
229
|
-
return url;
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
function generateSiteId(baseUrl) {
|
|
233
|
-
const hash = crypto.createHash('sha256').update(baseUrl).digest('hex').slice(0, 12);
|
|
234
|
-
return `site_${hash}`;
|
|
235
|
-
}
|