rubycrawl 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +427 -210
- data/lib/rubycrawl/helpers.rb +15 -11
- data/lib/rubycrawl/markdown_converter.rb +3 -3
- data/lib/rubycrawl/result.rb +10 -11
- data/lib/rubycrawl/service_client.rb +25 -3
- data/lib/rubycrawl/site_crawler.rb +14 -6
- data/lib/rubycrawl/version.rb +1 -1
- data/lib/rubycrawl.rb +33 -7
- data/node/src/index.js +193 -19
- data/rubycrawl.gemspec +3 -2
- metadata +2 -567
- data/Gemfile +0 -11
- data/bin/console +0 -9
- data/bin/setup +0 -4
- data/node/node_modules/.bin/playwright +0 -1
- data/node/node_modules/.bin/playwright-core +0 -1
- data/node/node_modules/.package-lock.json +0 -65
- data/node/node_modules/dotenv/CHANGELOG.md +0 -520
- data/node/node_modules/dotenv/LICENSE +0 -23
- data/node/node_modules/dotenv/README-es.md +0 -411
- data/node/node_modules/dotenv/README.md +0 -645
- data/node/node_modules/dotenv/SECURITY.md +0 -1
- data/node/node_modules/dotenv/config.d.ts +0 -1
- data/node/node_modules/dotenv/config.js +0 -9
- data/node/node_modules/dotenv/lib/cli-options.js +0 -17
- data/node/node_modules/dotenv/lib/env-options.js +0 -28
- data/node/node_modules/dotenv/lib/main.d.ts +0 -162
- data/node/node_modules/dotenv/lib/main.js +0 -386
- data/node/node_modules/dotenv/package.json +0 -62
- data/node/node_modules/playwright/LICENSE +0 -202
- data/node/node_modules/playwright/NOTICE +0 -5
- data/node/node_modules/playwright/README.md +0 -168
- data/node/node_modules/playwright/ThirdPartyNotices.txt +0 -5042
- data/node/node_modules/playwright/cli.js +0 -19
- data/node/node_modules/playwright/index.d.ts +0 -17
- data/node/node_modules/playwright/index.js +0 -17
- data/node/node_modules/playwright/index.mjs +0 -18
- data/node/node_modules/playwright/jsx-runtime.js +0 -42
- data/node/node_modules/playwright/jsx-runtime.mjs +0 -21
- data/node/node_modules/playwright/lib/agents/agentParser.js +0 -89
- data/node/node_modules/playwright/lib/agents/copilot-setup-steps.yml +0 -34
- data/node/node_modules/playwright/lib/agents/generateAgents.js +0 -348
- data/node/node_modules/playwright/lib/agents/playwright-test-coverage.prompt.md +0 -31
- data/node/node_modules/playwright/lib/agents/playwright-test-generate.prompt.md +0 -8
- data/node/node_modules/playwright/lib/agents/playwright-test-generator.agent.md +0 -88
- data/node/node_modules/playwright/lib/agents/playwright-test-heal.prompt.md +0 -6
- data/node/node_modules/playwright/lib/agents/playwright-test-healer.agent.md +0 -55
- data/node/node_modules/playwright/lib/agents/playwright-test-plan.prompt.md +0 -9
- data/node/node_modules/playwright/lib/agents/playwright-test-planner.agent.md +0 -73
- data/node/node_modules/playwright/lib/common/config.js +0 -282
- data/node/node_modules/playwright/lib/common/configLoader.js +0 -344
- data/node/node_modules/playwright/lib/common/esmLoaderHost.js +0 -104
- data/node/node_modules/playwright/lib/common/expectBundle.js +0 -28
- data/node/node_modules/playwright/lib/common/expectBundleImpl.js +0 -407
- data/node/node_modules/playwright/lib/common/fixtures.js +0 -302
- data/node/node_modules/playwright/lib/common/globals.js +0 -58
- data/node/node_modules/playwright/lib/common/ipc.js +0 -60
- data/node/node_modules/playwright/lib/common/poolBuilder.js +0 -85
- data/node/node_modules/playwright/lib/common/process.js +0 -132
- data/node/node_modules/playwright/lib/common/suiteUtils.js +0 -140
- data/node/node_modules/playwright/lib/common/test.js +0 -321
- data/node/node_modules/playwright/lib/common/testLoader.js +0 -101
- data/node/node_modules/playwright/lib/common/testType.js +0 -298
- data/node/node_modules/playwright/lib/common/validators.js +0 -68
- data/node/node_modules/playwright/lib/fsWatcher.js +0 -67
- data/node/node_modules/playwright/lib/index.js +0 -726
- data/node/node_modules/playwright/lib/internalsForTest.js +0 -42
- data/node/node_modules/playwright/lib/isomorphic/events.js +0 -77
- data/node/node_modules/playwright/lib/isomorphic/folders.js +0 -30
- data/node/node_modules/playwright/lib/isomorphic/stringInternPool.js +0 -69
- data/node/node_modules/playwright/lib/isomorphic/teleReceiver.js +0 -521
- data/node/node_modules/playwright/lib/isomorphic/teleSuiteUpdater.js +0 -157
- data/node/node_modules/playwright/lib/isomorphic/testServerConnection.js +0 -225
- data/node/node_modules/playwright/lib/isomorphic/testServerInterface.js +0 -16
- data/node/node_modules/playwright/lib/isomorphic/testTree.js +0 -329
- data/node/node_modules/playwright/lib/isomorphic/types.d.js +0 -16
- data/node/node_modules/playwright/lib/loader/loaderMain.js +0 -59
- data/node/node_modules/playwright/lib/matchers/expect.js +0 -311
- data/node/node_modules/playwright/lib/matchers/matcherHint.js +0 -44
- data/node/node_modules/playwright/lib/matchers/matchers.js +0 -383
- data/node/node_modules/playwright/lib/matchers/toBeTruthy.js +0 -75
- data/node/node_modules/playwright/lib/matchers/toEqual.js +0 -100
- data/node/node_modules/playwright/lib/matchers/toHaveURL.js +0 -101
- data/node/node_modules/playwright/lib/matchers/toMatchAriaSnapshot.js +0 -159
- data/node/node_modules/playwright/lib/matchers/toMatchSnapshot.js +0 -342
- data/node/node_modules/playwright/lib/matchers/toMatchText.js +0 -99
- data/node/node_modules/playwright/lib/mcp/browser/browserContextFactory.js +0 -329
- data/node/node_modules/playwright/lib/mcp/browser/browserServerBackend.js +0 -84
- data/node/node_modules/playwright/lib/mcp/browser/config.js +0 -421
- data/node/node_modules/playwright/lib/mcp/browser/context.js +0 -244
- data/node/node_modules/playwright/lib/mcp/browser/response.js +0 -278
- data/node/node_modules/playwright/lib/mcp/browser/sessionLog.js +0 -75
- data/node/node_modules/playwright/lib/mcp/browser/tab.js +0 -343
- data/node/node_modules/playwright/lib/mcp/browser/tools/common.js +0 -65
- data/node/node_modules/playwright/lib/mcp/browser/tools/console.js +0 -46
- data/node/node_modules/playwright/lib/mcp/browser/tools/dialogs.js +0 -60
- data/node/node_modules/playwright/lib/mcp/browser/tools/evaluate.js +0 -61
- data/node/node_modules/playwright/lib/mcp/browser/tools/files.js +0 -58
- data/node/node_modules/playwright/lib/mcp/browser/tools/form.js +0 -63
- data/node/node_modules/playwright/lib/mcp/browser/tools/install.js +0 -72
- data/node/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +0 -107
- data/node/node_modules/playwright/lib/mcp/browser/tools/mouse.js +0 -107
- data/node/node_modules/playwright/lib/mcp/browser/tools/navigate.js +0 -71
- data/node/node_modules/playwright/lib/mcp/browser/tools/network.js +0 -63
- data/node/node_modules/playwright/lib/mcp/browser/tools/open.js +0 -57
- data/node/node_modules/playwright/lib/mcp/browser/tools/pdf.js +0 -49
- data/node/node_modules/playwright/lib/mcp/browser/tools/runCode.js +0 -78
- data/node/node_modules/playwright/lib/mcp/browser/tools/screenshot.js +0 -93
- data/node/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +0 -173
- data/node/node_modules/playwright/lib/mcp/browser/tools/tabs.js +0 -67
- data/node/node_modules/playwright/lib/mcp/browser/tools/tool.js +0 -47
- data/node/node_modules/playwright/lib/mcp/browser/tools/tracing.js +0 -74
- data/node/node_modules/playwright/lib/mcp/browser/tools/utils.js +0 -94
- data/node/node_modules/playwright/lib/mcp/browser/tools/verify.js +0 -143
- data/node/node_modules/playwright/lib/mcp/browser/tools/wait.js +0 -63
- data/node/node_modules/playwright/lib/mcp/browser/tools.js +0 -84
- data/node/node_modules/playwright/lib/mcp/browser/watchdog.js +0 -44
- data/node/node_modules/playwright/lib/mcp/config.d.js +0 -16
- data/node/node_modules/playwright/lib/mcp/extension/cdpRelay.js +0 -351
- data/node/node_modules/playwright/lib/mcp/extension/extensionContextFactory.js +0 -76
- data/node/node_modules/playwright/lib/mcp/extension/protocol.js +0 -28
- data/node/node_modules/playwright/lib/mcp/index.js +0 -61
- data/node/node_modules/playwright/lib/mcp/log.js +0 -35
- data/node/node_modules/playwright/lib/mcp/program.js +0 -111
- data/node/node_modules/playwright/lib/mcp/sdk/exports.js +0 -28
- data/node/node_modules/playwright/lib/mcp/sdk/http.js +0 -152
- data/node/node_modules/playwright/lib/mcp/sdk/inProcessTransport.js +0 -71
- data/node/node_modules/playwright/lib/mcp/sdk/server.js +0 -223
- data/node/node_modules/playwright/lib/mcp/sdk/tool.js +0 -47
- data/node/node_modules/playwright/lib/mcp/terminal/cli.js +0 -296
- data/node/node_modules/playwright/lib/mcp/terminal/command.js +0 -56
- data/node/node_modules/playwright/lib/mcp/terminal/commands.js +0 -333
- data/node/node_modules/playwright/lib/mcp/terminal/daemon.js +0 -129
- data/node/node_modules/playwright/lib/mcp/terminal/help.json +0 -32
- data/node/node_modules/playwright/lib/mcp/terminal/helpGenerator.js +0 -88
- data/node/node_modules/playwright/lib/mcp/terminal/socketConnection.js +0 -80
- data/node/node_modules/playwright/lib/mcp/test/browserBackend.js +0 -98
- data/node/node_modules/playwright/lib/mcp/test/generatorTools.js +0 -122
- data/node/node_modules/playwright/lib/mcp/test/plannerTools.js +0 -145
- data/node/node_modules/playwright/lib/mcp/test/seed.js +0 -82
- data/node/node_modules/playwright/lib/mcp/test/streams.js +0 -44
- data/node/node_modules/playwright/lib/mcp/test/testBackend.js +0 -99
- data/node/node_modules/playwright/lib/mcp/test/testContext.js +0 -285
- data/node/node_modules/playwright/lib/mcp/test/testTool.js +0 -30
- data/node/node_modules/playwright/lib/mcp/test/testTools.js +0 -108
- data/node/node_modules/playwright/lib/plugins/gitCommitInfoPlugin.js +0 -198
- data/node/node_modules/playwright/lib/plugins/index.js +0 -28
- data/node/node_modules/playwright/lib/plugins/webServerPlugin.js +0 -237
- data/node/node_modules/playwright/lib/program.js +0 -417
- data/node/node_modules/playwright/lib/reporters/base.js +0 -634
- data/node/node_modules/playwright/lib/reporters/blob.js +0 -138
- data/node/node_modules/playwright/lib/reporters/dot.js +0 -99
- data/node/node_modules/playwright/lib/reporters/empty.js +0 -32
- data/node/node_modules/playwright/lib/reporters/github.js +0 -128
- data/node/node_modules/playwright/lib/reporters/html.js +0 -633
- data/node/node_modules/playwright/lib/reporters/internalReporter.js +0 -138
- data/node/node_modules/playwright/lib/reporters/json.js +0 -254
- data/node/node_modules/playwright/lib/reporters/junit.js +0 -232
- data/node/node_modules/playwright/lib/reporters/line.js +0 -131
- data/node/node_modules/playwright/lib/reporters/list.js +0 -253
- data/node/node_modules/playwright/lib/reporters/listModeReporter.js +0 -69
- data/node/node_modules/playwright/lib/reporters/markdown.js +0 -144
- data/node/node_modules/playwright/lib/reporters/merge.js +0 -558
- data/node/node_modules/playwright/lib/reporters/multiplexer.js +0 -112
- data/node/node_modules/playwright/lib/reporters/reporterV2.js +0 -102
- data/node/node_modules/playwright/lib/reporters/teleEmitter.js +0 -317
- data/node/node_modules/playwright/lib/reporters/versions/blobV1.js +0 -16
- data/node/node_modules/playwright/lib/runner/dispatcher.js +0 -530
- data/node/node_modules/playwright/lib/runner/failureTracker.js +0 -72
- data/node/node_modules/playwright/lib/runner/lastRun.js +0 -77
- data/node/node_modules/playwright/lib/runner/loadUtils.js +0 -334
- data/node/node_modules/playwright/lib/runner/loaderHost.js +0 -89
- data/node/node_modules/playwright/lib/runner/processHost.js +0 -180
- data/node/node_modules/playwright/lib/runner/projectUtils.js +0 -241
- data/node/node_modules/playwright/lib/runner/rebase.js +0 -189
- data/node/node_modules/playwright/lib/runner/reporters.js +0 -138
- data/node/node_modules/playwright/lib/runner/sigIntWatcher.js +0 -96
- data/node/node_modules/playwright/lib/runner/storage.js +0 -91
- data/node/node_modules/playwright/lib/runner/taskRunner.js +0 -127
- data/node/node_modules/playwright/lib/runner/tasks.js +0 -410
- data/node/node_modules/playwright/lib/runner/testGroups.js +0 -125
- data/node/node_modules/playwright/lib/runner/testRunner.js +0 -398
- data/node/node_modules/playwright/lib/runner/testServer.js +0 -269
- data/node/node_modules/playwright/lib/runner/uiModeReporter.js +0 -30
- data/node/node_modules/playwright/lib/runner/vcs.js +0 -72
- data/node/node_modules/playwright/lib/runner/watchMode.js +0 -396
- data/node/node_modules/playwright/lib/runner/workerHost.js +0 -104
- data/node/node_modules/playwright/lib/third_party/pirates.js +0 -62
- data/node/node_modules/playwright/lib/third_party/tsconfig-loader.js +0 -103
- data/node/node_modules/playwright/lib/transform/babelBundle.js +0 -46
- data/node/node_modules/playwright/lib/transform/babelBundleImpl.js +0 -461
- data/node/node_modules/playwright/lib/transform/compilationCache.js +0 -274
- data/node/node_modules/playwright/lib/transform/esmLoader.js +0 -103
- data/node/node_modules/playwright/lib/transform/md.js +0 -221
- data/node/node_modules/playwright/lib/transform/portTransport.js +0 -67
- data/node/node_modules/playwright/lib/transform/transform.js +0 -303
- data/node/node_modules/playwright/lib/util.js +0 -400
- data/node/node_modules/playwright/lib/utilsBundle.js +0 -50
- data/node/node_modules/playwright/lib/utilsBundleImpl.js +0 -103
- data/node/node_modules/playwright/lib/worker/fixtureRunner.js +0 -262
- data/node/node_modules/playwright/lib/worker/testInfo.js +0 -536
- data/node/node_modules/playwright/lib/worker/testTracing.js +0 -345
- data/node/node_modules/playwright/lib/worker/timeoutManager.js +0 -174
- data/node/node_modules/playwright/lib/worker/util.js +0 -31
- data/node/node_modules/playwright/lib/worker/workerMain.js +0 -530
- data/node/node_modules/playwright/package.json +0 -72
- data/node/node_modules/playwright/test.d.ts +0 -18
- data/node/node_modules/playwright/test.js +0 -24
- data/node/node_modules/playwright/test.mjs +0 -34
- data/node/node_modules/playwright/types/test.d.ts +0 -10251
- data/node/node_modules/playwright/types/testReporter.d.ts +0 -822
- data/node/node_modules/playwright-core/LICENSE +0 -202
- data/node/node_modules/playwright-core/NOTICE +0 -5
- data/node/node_modules/playwright-core/README.md +0 -3
- data/node/node_modules/playwright-core/ThirdPartyNotices.txt +0 -4076
- data/node/node_modules/playwright-core/bin/install_media_pack.ps1 +0 -5
- data/node/node_modules/playwright-core/bin/install_webkit_wsl.ps1 +0 -33
- data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_linux.sh +0 -42
- data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_mac.sh +0 -13
- data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_win.ps1 +0 -24
- data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_linux.sh +0 -42
- data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_mac.sh +0 -12
- data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_win.ps1 +0 -24
- data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_linux.sh +0 -48
- data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_mac.sh +0 -11
- data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_win.ps1 +0 -23
- data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_linux.sh +0 -48
- data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_mac.sh +0 -11
- data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_win.ps1 +0 -23
- data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_linux.sh +0 -48
- data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_mac.sh +0 -11
- data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_win.ps1 +0 -24
- data/node/node_modules/playwright-core/browsers.json +0 -79
- data/node/node_modules/playwright-core/cli.js +0 -18
- data/node/node_modules/playwright-core/index.d.ts +0 -17
- data/node/node_modules/playwright-core/index.js +0 -32
- data/node/node_modules/playwright-core/index.mjs +0 -28
- data/node/node_modules/playwright-core/lib/androidServerImpl.js +0 -65
- data/node/node_modules/playwright-core/lib/browserServerImpl.js +0 -120
- data/node/node_modules/playwright-core/lib/cli/driver.js +0 -97
- data/node/node_modules/playwright-core/lib/cli/program.js +0 -589
- data/node/node_modules/playwright-core/lib/cli/programWithTestStub.js +0 -74
- data/node/node_modules/playwright-core/lib/client/android.js +0 -361
- data/node/node_modules/playwright-core/lib/client/api.js +0 -137
- data/node/node_modules/playwright-core/lib/client/artifact.js +0 -79
- data/node/node_modules/playwright-core/lib/client/browser.js +0 -161
- data/node/node_modules/playwright-core/lib/client/browserContext.js +0 -582
- data/node/node_modules/playwright-core/lib/client/browserType.js +0 -185
- data/node/node_modules/playwright-core/lib/client/cdpSession.js +0 -51
- data/node/node_modules/playwright-core/lib/client/channelOwner.js +0 -194
- data/node/node_modules/playwright-core/lib/client/clientHelper.js +0 -64
- data/node/node_modules/playwright-core/lib/client/clientInstrumentation.js +0 -55
- data/node/node_modules/playwright-core/lib/client/clientStackTrace.js +0 -69
- data/node/node_modules/playwright-core/lib/client/clock.js +0 -68
- data/node/node_modules/playwright-core/lib/client/connection.js +0 -318
- data/node/node_modules/playwright-core/lib/client/consoleMessage.js +0 -58
- data/node/node_modules/playwright-core/lib/client/coverage.js +0 -44
- data/node/node_modules/playwright-core/lib/client/dialog.js +0 -56
- data/node/node_modules/playwright-core/lib/client/download.js +0 -62
- data/node/node_modules/playwright-core/lib/client/electron.js +0 -138
- data/node/node_modules/playwright-core/lib/client/elementHandle.js +0 -284
- data/node/node_modules/playwright-core/lib/client/errors.js +0 -77
- data/node/node_modules/playwright-core/lib/client/eventEmitter.js +0 -314
- data/node/node_modules/playwright-core/lib/client/events.js +0 -103
- data/node/node_modules/playwright-core/lib/client/fetch.js +0 -368
- data/node/node_modules/playwright-core/lib/client/fileChooser.js +0 -46
- data/node/node_modules/playwright-core/lib/client/fileUtils.js +0 -34
- data/node/node_modules/playwright-core/lib/client/frame.js +0 -409
- data/node/node_modules/playwright-core/lib/client/harRouter.js +0 -87
- data/node/node_modules/playwright-core/lib/client/input.js +0 -84
- data/node/node_modules/playwright-core/lib/client/jsHandle.js +0 -109
- data/node/node_modules/playwright-core/lib/client/jsonPipe.js +0 -39
- data/node/node_modules/playwright-core/lib/client/localUtils.js +0 -60
- data/node/node_modules/playwright-core/lib/client/locator.js +0 -369
- data/node/node_modules/playwright-core/lib/client/network.js +0 -747
- data/node/node_modules/playwright-core/lib/client/page.js +0 -745
- data/node/node_modules/playwright-core/lib/client/pageAgent.js +0 -64
- data/node/node_modules/playwright-core/lib/client/platform.js +0 -77
- data/node/node_modules/playwright-core/lib/client/playwright.js +0 -71
- data/node/node_modules/playwright-core/lib/client/selectors.js +0 -55
- data/node/node_modules/playwright-core/lib/client/stream.js +0 -39
- data/node/node_modules/playwright-core/lib/client/timeoutSettings.js +0 -79
- data/node/node_modules/playwright-core/lib/client/tracing.js +0 -119
- data/node/node_modules/playwright-core/lib/client/types.js +0 -28
- data/node/node_modules/playwright-core/lib/client/video.js +0 -59
- data/node/node_modules/playwright-core/lib/client/waiter.js +0 -142
- data/node/node_modules/playwright-core/lib/client/webError.js +0 -39
- data/node/node_modules/playwright-core/lib/client/webSocket.js +0 -93
- data/node/node_modules/playwright-core/lib/client/worker.js +0 -85
- data/node/node_modules/playwright-core/lib/client/writableStream.js +0 -39
- data/node/node_modules/playwright-core/lib/generated/bindingsControllerSource.js +0 -28
- data/node/node_modules/playwright-core/lib/generated/clockSource.js +0 -28
- data/node/node_modules/playwright-core/lib/generated/injectedScriptSource.js +0 -28
- data/node/node_modules/playwright-core/lib/generated/pollingRecorderSource.js +0 -28
- data/node/node_modules/playwright-core/lib/generated/storageScriptSource.js +0 -28
- data/node/node_modules/playwright-core/lib/generated/utilityScriptSource.js +0 -28
- data/node/node_modules/playwright-core/lib/generated/webSocketMockSource.js +0 -336
- data/node/node_modules/playwright-core/lib/inProcessFactory.js +0 -60
- data/node/node_modules/playwright-core/lib/inprocess.js +0 -3
- data/node/node_modules/playwright-core/lib/mcpBundle.js +0 -84
- data/node/node_modules/playwright-core/lib/mcpBundleImpl/index.js +0 -147
- data/node/node_modules/playwright-core/lib/outofprocess.js +0 -76
- data/node/node_modules/playwright-core/lib/protocol/serializers.js +0 -197
- data/node/node_modules/playwright-core/lib/protocol/validator.js +0 -2969
- data/node/node_modules/playwright-core/lib/protocol/validatorPrimitives.js +0 -193
- data/node/node_modules/playwright-core/lib/remote/playwrightConnection.js +0 -129
- data/node/node_modules/playwright-core/lib/remote/playwrightServer.js +0 -334
- data/node/node_modules/playwright-core/lib/server/agent/actionRunner.js +0 -335
- data/node/node_modules/playwright-core/lib/server/agent/actions.js +0 -128
- data/node/node_modules/playwright-core/lib/server/agent/codegen.js +0 -111
- data/node/node_modules/playwright-core/lib/server/agent/context.js +0 -150
- data/node/node_modules/playwright-core/lib/server/agent/expectTools.js +0 -156
- data/node/node_modules/playwright-core/lib/server/agent/pageAgent.js +0 -204
- data/node/node_modules/playwright-core/lib/server/agent/performTools.js +0 -262
- data/node/node_modules/playwright-core/lib/server/agent/tool.js +0 -109
- data/node/node_modules/playwright-core/lib/server/android/android.js +0 -465
- data/node/node_modules/playwright-core/lib/server/android/backendAdb.js +0 -177
- data/node/node_modules/playwright-core/lib/server/artifact.js +0 -127
- data/node/node_modules/playwright-core/lib/server/bidi/bidiBrowser.js +0 -549
- data/node/node_modules/playwright-core/lib/server/bidi/bidiChromium.js +0 -148
- data/node/node_modules/playwright-core/lib/server/bidi/bidiConnection.js +0 -213
- data/node/node_modules/playwright-core/lib/server/bidi/bidiDeserializer.js +0 -116
- data/node/node_modules/playwright-core/lib/server/bidi/bidiExecutionContext.js +0 -267
- data/node/node_modules/playwright-core/lib/server/bidi/bidiFirefox.js +0 -128
- data/node/node_modules/playwright-core/lib/server/bidi/bidiInput.js +0 -146
- data/node/node_modules/playwright-core/lib/server/bidi/bidiNetworkManager.js +0 -383
- data/node/node_modules/playwright-core/lib/server/bidi/bidiOverCdp.js +0 -102
- data/node/node_modules/playwright-core/lib/server/bidi/bidiPage.js +0 -583
- data/node/node_modules/playwright-core/lib/server/bidi/bidiPdf.js +0 -106
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiCommands.d.js +0 -22
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiKeyboard.js +0 -256
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocol.js +0 -24
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocolCore.js +0 -180
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocolPermissions.js +0 -42
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiSerializer.js +0 -148
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/firefoxPrefs.js +0 -259
- data/node/node_modules/playwright-core/lib/server/browser.js +0 -149
- data/node/node_modules/playwright-core/lib/server/browserContext.js +0 -702
- data/node/node_modules/playwright-core/lib/server/browserType.js +0 -336
- data/node/node_modules/playwright-core/lib/server/callLog.js +0 -82
- data/node/node_modules/playwright-core/lib/server/chromium/appIcon.png +0 -0
- data/node/node_modules/playwright-core/lib/server/chromium/chromium.js +0 -395
- data/node/node_modules/playwright-core/lib/server/chromium/chromiumSwitches.js +0 -104
- data/node/node_modules/playwright-core/lib/server/chromium/crBrowser.js +0 -511
- data/node/node_modules/playwright-core/lib/server/chromium/crConnection.js +0 -197
- data/node/node_modules/playwright-core/lib/server/chromium/crCoverage.js +0 -235
- data/node/node_modules/playwright-core/lib/server/chromium/crDevTools.js +0 -111
- data/node/node_modules/playwright-core/lib/server/chromium/crDragDrop.js +0 -131
- data/node/node_modules/playwright-core/lib/server/chromium/crExecutionContext.js +0 -146
- data/node/node_modules/playwright-core/lib/server/chromium/crInput.js +0 -187
- data/node/node_modules/playwright-core/lib/server/chromium/crNetworkManager.js +0 -707
- data/node/node_modules/playwright-core/lib/server/chromium/crPage.js +0 -1001
- data/node/node_modules/playwright-core/lib/server/chromium/crPdf.js +0 -121
- data/node/node_modules/playwright-core/lib/server/chromium/crProtocolHelper.js +0 -145
- data/node/node_modules/playwright-core/lib/server/chromium/crServiceWorker.js +0 -136
- data/node/node_modules/playwright-core/lib/server/chromium/defaultFontFamilies.js +0 -162
- data/node/node_modules/playwright-core/lib/server/chromium/protocol.d.js +0 -16
- data/node/node_modules/playwright-core/lib/server/clock.js +0 -149
- data/node/node_modules/playwright-core/lib/server/codegen/csharp.js +0 -327
- data/node/node_modules/playwright-core/lib/server/codegen/java.js +0 -274
- data/node/node_modules/playwright-core/lib/server/codegen/javascript.js +0 -247
- data/node/node_modules/playwright-core/lib/server/codegen/jsonl.js +0 -52
- data/node/node_modules/playwright-core/lib/server/codegen/language.js +0 -132
- data/node/node_modules/playwright-core/lib/server/codegen/languages.js +0 -68
- data/node/node_modules/playwright-core/lib/server/codegen/python.js +0 -279
- data/node/node_modules/playwright-core/lib/server/codegen/types.js +0 -16
- data/node/node_modules/playwright-core/lib/server/console.js +0 -57
- data/node/node_modules/playwright-core/lib/server/cookieStore.js +0 -206
- data/node/node_modules/playwright-core/lib/server/debugController.js +0 -191
- data/node/node_modules/playwright-core/lib/server/debugger.js +0 -119
- data/node/node_modules/playwright-core/lib/server/deviceDescriptors.js +0 -39
- data/node/node_modules/playwright-core/lib/server/deviceDescriptorsSource.json +0 -1779
- data/node/node_modules/playwright-core/lib/server/dialog.js +0 -116
- data/node/node_modules/playwright-core/lib/server/dispatchers/androidDispatcher.js +0 -325
- data/node/node_modules/playwright-core/lib/server/dispatchers/artifactDispatcher.js +0 -118
- data/node/node_modules/playwright-core/lib/server/dispatchers/browserContextDispatcher.js +0 -384
- data/node/node_modules/playwright-core/lib/server/dispatchers/browserDispatcher.js +0 -118
- data/node/node_modules/playwright-core/lib/server/dispatchers/browserTypeDispatcher.js +0 -64
- data/node/node_modules/playwright-core/lib/server/dispatchers/cdpSessionDispatcher.js +0 -44
- data/node/node_modules/playwright-core/lib/server/dispatchers/debugControllerDispatcher.js +0 -78
- data/node/node_modules/playwright-core/lib/server/dispatchers/dialogDispatcher.js +0 -47
- data/node/node_modules/playwright-core/lib/server/dispatchers/dispatcher.js +0 -364
- data/node/node_modules/playwright-core/lib/server/dispatchers/electronDispatcher.js +0 -89
- data/node/node_modules/playwright-core/lib/server/dispatchers/elementHandlerDispatcher.js +0 -181
- data/node/node_modules/playwright-core/lib/server/dispatchers/frameDispatcher.js +0 -227
- data/node/node_modules/playwright-core/lib/server/dispatchers/jsHandleDispatcher.js +0 -85
- data/node/node_modules/playwright-core/lib/server/dispatchers/jsonPipeDispatcher.js +0 -58
- data/node/node_modules/playwright-core/lib/server/dispatchers/localUtilsDispatcher.js +0 -149
- data/node/node_modules/playwright-core/lib/server/dispatchers/networkDispatchers.js +0 -213
- data/node/node_modules/playwright-core/lib/server/dispatchers/pageAgentDispatcher.js +0 -96
- data/node/node_modules/playwright-core/lib/server/dispatchers/pageDispatcher.js +0 -393
- data/node/node_modules/playwright-core/lib/server/dispatchers/playwrightDispatcher.js +0 -108
- data/node/node_modules/playwright-core/lib/server/dispatchers/streamDispatcher.js +0 -67
- data/node/node_modules/playwright-core/lib/server/dispatchers/tracingDispatcher.js +0 -68
- data/node/node_modules/playwright-core/lib/server/dispatchers/webSocketRouteDispatcher.js +0 -165
- data/node/node_modules/playwright-core/lib/server/dispatchers/writableStreamDispatcher.js +0 -79
- data/node/node_modules/playwright-core/lib/server/dom.js +0 -815
- data/node/node_modules/playwright-core/lib/server/download.js +0 -70
- data/node/node_modules/playwright-core/lib/server/electron/electron.js +0 -273
- data/node/node_modules/playwright-core/lib/server/electron/loader.js +0 -29
- data/node/node_modules/playwright-core/lib/server/errors.js +0 -69
- data/node/node_modules/playwright-core/lib/server/fetch.js +0 -621
- data/node/node_modules/playwright-core/lib/server/fileChooser.js +0 -43
- data/node/node_modules/playwright-core/lib/server/fileUploadUtils.js +0 -84
- data/node/node_modules/playwright-core/lib/server/firefox/ffBrowser.js +0 -418
- data/node/node_modules/playwright-core/lib/server/firefox/ffConnection.js +0 -142
- data/node/node_modules/playwright-core/lib/server/firefox/ffExecutionContext.js +0 -150
- data/node/node_modules/playwright-core/lib/server/firefox/ffInput.js +0 -159
- data/node/node_modules/playwright-core/lib/server/firefox/ffNetworkManager.js +0 -256
- data/node/node_modules/playwright-core/lib/server/firefox/ffPage.js +0 -497
- data/node/node_modules/playwright-core/lib/server/firefox/firefox.js +0 -114
- data/node/node_modules/playwright-core/lib/server/firefox/protocol.d.js +0 -16
- data/node/node_modules/playwright-core/lib/server/formData.js +0 -147
- data/node/node_modules/playwright-core/lib/server/frameSelectors.js +0 -160
- data/node/node_modules/playwright-core/lib/server/frames.js +0 -1471
- data/node/node_modules/playwright-core/lib/server/har/harRecorder.js +0 -147
- data/node/node_modules/playwright-core/lib/server/har/harTracer.js +0 -607
- data/node/node_modules/playwright-core/lib/server/harBackend.js +0 -157
- data/node/node_modules/playwright-core/lib/server/helper.js +0 -96
- data/node/node_modules/playwright-core/lib/server/index.js +0 -58
- data/node/node_modules/playwright-core/lib/server/input.js +0 -277
- data/node/node_modules/playwright-core/lib/server/instrumentation.js +0 -72
- data/node/node_modules/playwright-core/lib/server/javascript.js +0 -291
- data/node/node_modules/playwright-core/lib/server/launchApp.js +0 -128
- data/node/node_modules/playwright-core/lib/server/localUtils.js +0 -214
- data/node/node_modules/playwright-core/lib/server/macEditingCommands.js +0 -143
- data/node/node_modules/playwright-core/lib/server/network.js +0 -667
- data/node/node_modules/playwright-core/lib/server/page.js +0 -830
- data/node/node_modules/playwright-core/lib/server/pipeTransport.js +0 -89
- data/node/node_modules/playwright-core/lib/server/playwright.js +0 -69
- data/node/node_modules/playwright-core/lib/server/progress.js +0 -132
- data/node/node_modules/playwright-core/lib/server/protocolError.js +0 -52
- data/node/node_modules/playwright-core/lib/server/recorder/chat.js +0 -161
- data/node/node_modules/playwright-core/lib/server/recorder/recorderApp.js +0 -366
- data/node/node_modules/playwright-core/lib/server/recorder/recorderRunner.js +0 -138
- data/node/node_modules/playwright-core/lib/server/recorder/recorderSignalProcessor.js +0 -83
- data/node/node_modules/playwright-core/lib/server/recorder/recorderUtils.js +0 -157
- data/node/node_modules/playwright-core/lib/server/recorder/throttledFile.js +0 -57
- data/node/node_modules/playwright-core/lib/server/recorder.js +0 -499
- data/node/node_modules/playwright-core/lib/server/registry/browserFetcher.js +0 -177
- data/node/node_modules/playwright-core/lib/server/registry/dependencies.js +0 -371
- data/node/node_modules/playwright-core/lib/server/registry/index.js +0 -1422
- data/node/node_modules/playwright-core/lib/server/registry/nativeDeps.js +0 -1280
- data/node/node_modules/playwright-core/lib/server/registry/oopDownloadBrowserMain.js +0 -127
- data/node/node_modules/playwright-core/lib/server/screencast.js +0 -190
- data/node/node_modules/playwright-core/lib/server/screenshotter.js +0 -333
- data/node/node_modules/playwright-core/lib/server/selectors.js +0 -112
- data/node/node_modules/playwright-core/lib/server/socksClientCertificatesInterceptor.js +0 -383
- data/node/node_modules/playwright-core/lib/server/socksInterceptor.js +0 -95
- data/node/node_modules/playwright-core/lib/server/trace/recorder/snapshotter.js +0 -147
- data/node/node_modules/playwright-core/lib/server/trace/recorder/snapshotterInjected.js +0 -561
- data/node/node_modules/playwright-core/lib/server/trace/recorder/tracing.js +0 -604
- data/node/node_modules/playwright-core/lib/server/trace/viewer/traceParser.js +0 -72
- data/node/node_modules/playwright-core/lib/server/trace/viewer/traceViewer.js +0 -245
- data/node/node_modules/playwright-core/lib/server/transport.js +0 -181
- data/node/node_modules/playwright-core/lib/server/types.js +0 -28
- data/node/node_modules/playwright-core/lib/server/usKeyboardLayout.js +0 -145
- data/node/node_modules/playwright-core/lib/server/utils/ascii.js +0 -44
- data/node/node_modules/playwright-core/lib/server/utils/comparators.js +0 -139
- data/node/node_modules/playwright-core/lib/server/utils/crypto.js +0 -216
- data/node/node_modules/playwright-core/lib/server/utils/debug.js +0 -42
- data/node/node_modules/playwright-core/lib/server/utils/debugLogger.js +0 -122
- data/node/node_modules/playwright-core/lib/server/utils/env.js +0 -73
- data/node/node_modules/playwright-core/lib/server/utils/eventsHelper.js +0 -39
- data/node/node_modules/playwright-core/lib/server/utils/expectUtils.js +0 -123
- data/node/node_modules/playwright-core/lib/server/utils/fileUtils.js +0 -191
- data/node/node_modules/playwright-core/lib/server/utils/happyEyeballs.js +0 -207
- data/node/node_modules/playwright-core/lib/server/utils/hostPlatform.js +0 -123
- data/node/node_modules/playwright-core/lib/server/utils/httpServer.js +0 -203
- data/node/node_modules/playwright-core/lib/server/utils/imageUtils.js +0 -141
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/colorUtils.js +0 -89
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/compare.js +0 -109
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/imageChannel.js +0 -78
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/stats.js +0 -102
- data/node/node_modules/playwright-core/lib/server/utils/linuxUtils.js +0 -71
- data/node/node_modules/playwright-core/lib/server/utils/network.js +0 -242
- data/node/node_modules/playwright-core/lib/server/utils/nodePlatform.js +0 -154
- data/node/node_modules/playwright-core/lib/server/utils/pipeTransport.js +0 -84
- data/node/node_modules/playwright-core/lib/server/utils/processLauncher.js +0 -241
- data/node/node_modules/playwright-core/lib/server/utils/profiler.js +0 -65
- data/node/node_modules/playwright-core/lib/server/utils/socksProxy.js +0 -511
- data/node/node_modules/playwright-core/lib/server/utils/spawnAsync.js +0 -41
- data/node/node_modules/playwright-core/lib/server/utils/task.js +0 -51
- data/node/node_modules/playwright-core/lib/server/utils/userAgent.js +0 -98
- data/node/node_modules/playwright-core/lib/server/utils/wsServer.js +0 -121
- data/node/node_modules/playwright-core/lib/server/utils/zipFile.js +0 -74
- data/node/node_modules/playwright-core/lib/server/utils/zones.js +0 -57
- data/node/node_modules/playwright-core/lib/server/videoRecorder.js +0 -124
- data/node/node_modules/playwright-core/lib/server/webkit/protocol.d.js +0 -16
- data/node/node_modules/playwright-core/lib/server/webkit/webkit.js +0 -108
- data/node/node_modules/playwright-core/lib/server/webkit/wkBrowser.js +0 -335
- data/node/node_modules/playwright-core/lib/server/webkit/wkConnection.js +0 -144
- data/node/node_modules/playwright-core/lib/server/webkit/wkExecutionContext.js +0 -154
- data/node/node_modules/playwright-core/lib/server/webkit/wkInput.js +0 -181
- data/node/node_modules/playwright-core/lib/server/webkit/wkInterceptableRequest.js +0 -197
- data/node/node_modules/playwright-core/lib/server/webkit/wkPage.js +0 -1158
- data/node/node_modules/playwright-core/lib/server/webkit/wkProvisionalPage.js +0 -83
- data/node/node_modules/playwright-core/lib/server/webkit/wkWorkers.js +0 -105
- data/node/node_modules/playwright-core/lib/third_party/pixelmatch.js +0 -255
- data/node/node_modules/playwright-core/lib/utils/isomorphic/ariaSnapshot.js +0 -455
- data/node/node_modules/playwright-core/lib/utils/isomorphic/assert.js +0 -31
- data/node/node_modules/playwright-core/lib/utils/isomorphic/colors.js +0 -72
- data/node/node_modules/playwright-core/lib/utils/isomorphic/cssParser.js +0 -245
- data/node/node_modules/playwright-core/lib/utils/isomorphic/cssTokenizer.js +0 -1051
- data/node/node_modules/playwright-core/lib/utils/isomorphic/headers.js +0 -53
- data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorGenerators.js +0 -689
- data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorParser.js +0 -176
- data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorUtils.js +0 -81
- data/node/node_modules/playwright-core/lib/utils/isomorphic/lruCache.js +0 -51
- data/node/node_modules/playwright-core/lib/utils/isomorphic/manualPromise.js +0 -114
- data/node/node_modules/playwright-core/lib/utils/isomorphic/mimeType.js +0 -459
- data/node/node_modules/playwright-core/lib/utils/isomorphic/multimap.js +0 -80
- data/node/node_modules/playwright-core/lib/utils/isomorphic/protocolFormatter.js +0 -81
- data/node/node_modules/playwright-core/lib/utils/isomorphic/protocolMetainfo.js +0 -330
- data/node/node_modules/playwright-core/lib/utils/isomorphic/rtti.js +0 -43
- data/node/node_modules/playwright-core/lib/utils/isomorphic/selectorParser.js +0 -386
- data/node/node_modules/playwright-core/lib/utils/isomorphic/semaphore.js +0 -54
- data/node/node_modules/playwright-core/lib/utils/isomorphic/stackTrace.js +0 -158
- data/node/node_modules/playwright-core/lib/utils/isomorphic/stringUtils.js +0 -204
- data/node/node_modules/playwright-core/lib/utils/isomorphic/time.js +0 -49
- data/node/node_modules/playwright-core/lib/utils/isomorphic/timeoutRunner.js +0 -66
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/entries.js +0 -16
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotRenderer.js +0 -499
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotServer.js +0 -120
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotStorage.js +0 -89
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceLoader.js +0 -131
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceModel.js +0 -365
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceModernizer.js +0 -400
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV3.js +0 -16
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV4.js +0 -16
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV5.js +0 -16
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV6.js +0 -16
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV7.js +0 -16
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV8.js +0 -16
- data/node/node_modules/playwright-core/lib/utils/isomorphic/traceUtils.js +0 -58
- data/node/node_modules/playwright-core/lib/utils/isomorphic/types.js +0 -16
- data/node/node_modules/playwright-core/lib/utils/isomorphic/urlMatch.js +0 -190
- data/node/node_modules/playwright-core/lib/utils/isomorphic/utilityScriptSerializers.js +0 -251
- data/node/node_modules/playwright-core/lib/utils/isomorphic/yaml.js +0 -84
- data/node/node_modules/playwright-core/lib/utils.js +0 -111
- data/node/node_modules/playwright-core/lib/utilsBundle.js +0 -109
- data/node/node_modules/playwright-core/lib/utilsBundleImpl/index.js +0 -218
- data/node/node_modules/playwright-core/lib/utilsBundleImpl/xdg-open +0 -1066
- data/node/node_modules/playwright-core/lib/vite/htmlReport/index.html +0 -84
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/codeMirrorModule-DYBRYzYX.css +0 -1
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/codeMirrorModule-DadYNm1I.js +0 -32
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/codicon-DCmgc-ay.ttf +0 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/index-BSjZa4pk.css +0 -1
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/index-BhTWtUlo.js +0 -193
- data/node/node_modules/playwright-core/lib/vite/recorder/index.html +0 -29
- data/node/node_modules/playwright-core/lib/vite/recorder/playwright-logo.svg +0 -9
- data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/codeMirrorModule-a5XoALAZ.js +0 -32
- data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/defaultSettingsView-CJSZINFr.js +0 -266
- data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/xtermModule-CsJ4vdCR.js +0 -9
- data/node/node_modules/playwright-core/lib/vite/traceViewer/codeMirrorModule.DYBRYzYX.css +0 -1
- data/node/node_modules/playwright-core/lib/vite/traceViewer/codicon.DCmgc-ay.ttf +0 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/defaultSettingsView.7ch9cixO.css +0 -1
- data/node/node_modules/playwright-core/lib/vite/traceViewer/index.BVu7tZDe.css +0 -1
- data/node/node_modules/playwright-core/lib/vite/traceViewer/index.Bk2uYQRV.js +0 -2
- data/node/node_modules/playwright-core/lib/vite/traceViewer/index.html +0 -43
- data/node/node_modules/playwright-core/lib/vite/traceViewer/manifest.webmanifest +0 -16
- data/node/node_modules/playwright-core/lib/vite/traceViewer/playwright-logo.svg +0 -9
- data/node/node_modules/playwright-core/lib/vite/traceViewer/snapshot.html +0 -21
- data/node/node_modules/playwright-core/lib/vite/traceViewer/sw.bundle.js +0 -5
- data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.Btcz36p_.css +0 -1
- data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.CQJ9SCIQ.js +0 -5
- data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.html +0 -17
- data/node/node_modules/playwright-core/lib/vite/traceViewer/xtermModule.DYP7pi_n.css +0 -32
- data/node/node_modules/playwright-core/lib/zipBundle.js +0 -34
- data/node/node_modules/playwright-core/lib/zipBundleImpl.js +0 -5
- data/node/node_modules/playwright-core/package.json +0 -43
- data/node/node_modules/playwright-core/types/protocol.d.ts +0 -23824
- data/node/node_modules/playwright-core/types/structs.d.ts +0 -45
- data/node/node_modules/playwright-core/types/types.d.ts +0 -22843
- data/spec/rubycrawl_spec.rb +0 -51
- data/spec/spec_helper.rb +0 -11
data/lib/rubycrawl/helpers.rb
CHANGED
|
@@ -17,14 +17,22 @@ class RubyCrawl
|
|
|
17
17
|
if uri.host&.match?(/^(localhost|127\.|192\.168\.|10\.|172\.(1[6-9]|2[0-9]|3[01]))/)
|
|
18
18
|
warn '[rubycrawl] Warning: Crawling internal/private IP addresses'
|
|
19
19
|
end
|
|
20
|
-
rescue URI::InvalidURIError => e
|
|
20
|
+
rescue URI::InvalidURIError, TypeError => e
|
|
21
21
|
raise ConfigurationError, "Invalid URL: #{e.message}"
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
VALID_WAIT_UNTIL = %w[load domcontentloaded networkidle commit].freeze
|
|
25
|
+
|
|
26
|
+
def build_payload(url, wait_until, block_resources, session_id = nil)
|
|
27
|
+
if wait_until && !VALID_WAIT_UNTIL.include?(wait_until.to_s)
|
|
28
|
+
raise ConfigurationError,
|
|
29
|
+
"Invalid wait_until: #{wait_until.inspect}. Must be one of: #{VALID_WAIT_UNTIL.join(', ')}"
|
|
30
|
+
end
|
|
31
|
+
|
|
25
32
|
payload = { url: url }
|
|
26
33
|
payload[:wait_until] = wait_until if wait_until
|
|
27
34
|
payload[:block_resources] = block_resources unless block_resources.nil?
|
|
35
|
+
payload[:session_id] = session_id if session_id
|
|
28
36
|
payload
|
|
29
37
|
end
|
|
30
38
|
|
|
@@ -39,11 +47,9 @@ class RubyCrawl
|
|
|
39
47
|
|
|
40
48
|
def error_class_for(error_code)
|
|
41
49
|
case error_code
|
|
42
|
-
when '
|
|
43
|
-
TimeoutError
|
|
44
|
-
when 'navigation_failed', 'crawl_failed'
|
|
50
|
+
when 'crawl_failed'
|
|
45
51
|
NavigationError
|
|
46
|
-
when 'invalid_json', '
|
|
52
|
+
when 'invalid_json', 'session_create_failed', 'session_destroy_failed'
|
|
47
53
|
ServiceError
|
|
48
54
|
else
|
|
49
55
|
Error
|
|
@@ -52,12 +58,10 @@ class RubyCrawl
|
|
|
52
58
|
|
|
53
59
|
def error_message_for(error_code, error_message)
|
|
54
60
|
case error_code
|
|
55
|
-
when '
|
|
56
|
-
"Crawl timeout: #{error_message}"
|
|
57
|
-
when 'navigation_failed', 'crawl_failed'
|
|
61
|
+
when 'crawl_failed'
|
|
58
62
|
"Navigation failed: #{error_message}"
|
|
59
|
-
when 'invalid_json', '
|
|
60
|
-
"
|
|
63
|
+
when 'invalid_json', 'session_create_failed', 'session_destroy_failed'
|
|
64
|
+
"Service error [#{error_code}]: #{error_message}"
|
|
61
65
|
else
|
|
62
66
|
"Crawl error [#{error_code}]: #{error_message}"
|
|
63
67
|
end
|
|
@@ -15,10 +15,10 @@ class RubyCrawl
|
|
|
15
15
|
|
|
16
16
|
# Convert HTML to Markdown with resolved URLs.
|
|
17
17
|
#
|
|
18
|
-
# @param html [String] The HTML
|
|
18
|
+
# @param html [String] The page HTML to convert
|
|
19
19
|
# @param base_url [String, nil] Base URL to resolve relative URLs
|
|
20
|
-
# @param options [Hash] Options
|
|
21
|
-
# @return [String]
|
|
20
|
+
# @param options [Hash] Options passed to ReverseMarkdown
|
|
21
|
+
# @return [String] Markdown content with absolute URLs
|
|
22
22
|
def convert(html, base_url: nil, **options)
|
|
23
23
|
return '' if html.nil? || html.empty?
|
|
24
24
|
|
data/lib/rubycrawl/result.rb
CHANGED
|
@@ -1,38 +1,37 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
class RubyCrawl
|
|
4
|
-
# Result object with lazy
|
|
4
|
+
# Result object with lazy clean_markdown conversion.
|
|
5
5
|
class Result
|
|
6
6
|
attr_reader :text, :html, :links, :metadata
|
|
7
7
|
|
|
8
|
-
def initialize(text:, html:, links:, metadata
|
|
8
|
+
def initialize(text:, html:, links:, metadata:)
|
|
9
9
|
@text = text
|
|
10
10
|
@html = html
|
|
11
11
|
@links = links
|
|
12
12
|
@metadata = metadata
|
|
13
|
-
@markdown = markdown unless markdown.to_s.empty?
|
|
14
13
|
end
|
|
15
14
|
|
|
16
|
-
# Returns markdown
|
|
15
|
+
# Returns clean markdown converted from the page HTML.
|
|
17
16
|
# Relative URLs are resolved using the page's final_url.
|
|
18
17
|
#
|
|
19
18
|
# @return [String] Markdown content with absolute URLs
|
|
20
|
-
def
|
|
21
|
-
@
|
|
19
|
+
def clean_markdown
|
|
20
|
+
@clean_markdown ||= MarkdownConverter.convert(html, base_url: final_url)
|
|
22
21
|
end
|
|
23
22
|
|
|
24
23
|
# The final URL after redirects.
|
|
25
24
|
#
|
|
26
25
|
# @return [String, nil]
|
|
27
26
|
def final_url
|
|
28
|
-
metadata['final_url']
|
|
27
|
+
metadata['final_url']
|
|
29
28
|
end
|
|
30
29
|
|
|
31
|
-
# Check if
|
|
30
|
+
# Check if clean_markdown has been computed.
|
|
32
31
|
#
|
|
33
32
|
# @return [Boolean]
|
|
34
|
-
def
|
|
35
|
-
!@
|
|
33
|
+
def clean_markdown?
|
|
34
|
+
!@clean_markdown.nil?
|
|
36
35
|
end
|
|
37
36
|
|
|
38
37
|
def to_h
|
|
@@ -41,7 +40,7 @@ class RubyCrawl
|
|
|
41
40
|
html: html,
|
|
42
41
|
links: links,
|
|
43
42
|
metadata: metadata,
|
|
44
|
-
|
|
43
|
+
clean_markdown: @clean_markdown
|
|
45
44
|
}
|
|
46
45
|
end
|
|
47
46
|
end
|
|
@@ -36,6 +36,24 @@ class RubyCrawl
|
|
|
36
36
|
raise TimeoutError, "Request to node service timed out: #{e.message}"
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
+
# Create a session for reusing browser context across multiple crawls.
|
|
40
|
+
# @return [String] session_id
|
|
41
|
+
def create_session
|
|
42
|
+
response = post_json('/session/create', {})
|
|
43
|
+
raise ServiceError, "Failed to create session: #{response['error']}" if response['error']
|
|
44
|
+
|
|
45
|
+
response['session_id']
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Destroy a session and close its browser context.
|
|
49
|
+
# @param session_id [String]
|
|
50
|
+
def destroy_session(session_id)
|
|
51
|
+
post_json('/session/destroy', { session_id: session_id })
|
|
52
|
+
rescue StandardError
|
|
53
|
+
# Ignore errors on destroy - context may already be closed
|
|
54
|
+
nil
|
|
55
|
+
end
|
|
56
|
+
|
|
39
57
|
private
|
|
40
58
|
|
|
41
59
|
def build_request(uri, body)
|
|
@@ -55,9 +73,13 @@ class RubyCrawl
|
|
|
55
73
|
raise ServiceError, "rubycrawl node service directory not found: #{@node_dir}" unless Dir.exist?(@node_dir)
|
|
56
74
|
|
|
57
75
|
env = { 'RUBYCRAWL_NODE_PORT' => @port.to_s }
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
76
|
+
if @node_log
|
|
77
|
+
out = File.open(@node_log, 'a')
|
|
78
|
+
@node_pid = Process.spawn(env, @node_bin, 'src/index.js', chdir: @node_dir, out: out, err: out)
|
|
79
|
+
out.close
|
|
80
|
+
else
|
|
81
|
+
@node_pid = Process.spawn(env, @node_bin, 'src/index.js', chdir: @node_dir, out: File::NULL, err: File::NULL)
|
|
82
|
+
end
|
|
61
83
|
Process.detach(@node_pid)
|
|
62
84
|
end
|
|
63
85
|
|
|
@@ -5,7 +5,7 @@ require 'set'
|
|
|
5
5
|
class RubyCrawl
|
|
6
6
|
# BFS crawler that follows links with deduplication.
|
|
7
7
|
class SiteCrawler
|
|
8
|
-
# Page result yielded to the block with lazy
|
|
8
|
+
# Page result yielded to the block with lazy clean_markdown.
|
|
9
9
|
class PageResult
|
|
10
10
|
attr_reader :url, :html, :links, :metadata, :depth
|
|
11
11
|
|
|
@@ -17,14 +17,15 @@ class RubyCrawl
|
|
|
17
17
|
@depth = depth
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
-
#
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
# Returns clean markdown converted from the page HTML.
|
|
21
|
+
# Relative URLs are resolved using the page's final_url.
|
|
22
|
+
def clean_markdown
|
|
23
|
+
@clean_markdown ||= MarkdownConverter.convert(html, base_url: final_url)
|
|
23
24
|
end
|
|
24
25
|
|
|
25
26
|
# The final URL after redirects.
|
|
26
27
|
def final_url
|
|
27
|
-
metadata['final_url'] ||
|
|
28
|
+
metadata['final_url'] || url
|
|
28
29
|
end
|
|
29
30
|
end
|
|
30
31
|
|
|
@@ -35,8 +36,10 @@ class RubyCrawl
|
|
|
35
36
|
@same_host_only = options.fetch(:same_host_only, true)
|
|
36
37
|
@wait_until = options.fetch(:wait_until, nil)
|
|
37
38
|
@block_resources = options.fetch(:block_resources, nil)
|
|
39
|
+
@max_attempts = options.fetch(:max_attempts, nil)
|
|
38
40
|
@visited = Set.new
|
|
39
41
|
@queue = []
|
|
42
|
+
@session_id = nil
|
|
40
43
|
end
|
|
41
44
|
|
|
42
45
|
def crawl(start_url, &block)
|
|
@@ -46,8 +49,11 @@ class RubyCrawl
|
|
|
46
49
|
raise ConfigurationError, "Invalid start URL: #{start_url}" unless normalized
|
|
47
50
|
|
|
48
51
|
@base_url = normalized
|
|
52
|
+
@session_id = @client.create_session
|
|
49
53
|
enqueue(normalized, 0)
|
|
50
54
|
process_queue(&block)
|
|
55
|
+
ensure
|
|
56
|
+
@client.destroy_session(@session_id) if @session_id
|
|
51
57
|
end
|
|
52
58
|
|
|
53
59
|
private
|
|
@@ -77,7 +83,9 @@ class RubyCrawl
|
|
|
77
83
|
end
|
|
78
84
|
|
|
79
85
|
def crawl_page(url, depth)
|
|
80
|
-
|
|
86
|
+
opts = { wait_until: @wait_until, block_resources: @block_resources, session_id: @session_id }
|
|
87
|
+
opts[:max_attempts] = @max_attempts if @max_attempts
|
|
88
|
+
result = @client.crawl(url, **opts)
|
|
81
89
|
build_page_result(url, depth, result)
|
|
82
90
|
rescue Error => e
|
|
83
91
|
warn "[rubycrawl] Failed to crawl #{url}: #{e.message}"
|
data/lib/rubycrawl/version.rb
CHANGED
data/lib/rubycrawl.rb
CHANGED
|
@@ -45,6 +45,18 @@ class RubyCrawl
|
|
|
45
45
|
client.crawl_site(url, ...)
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
+
# Create a session for reusing browser context across multiple crawls.
|
|
49
|
+
# @return [String] session_id
|
|
50
|
+
def create_session
|
|
51
|
+
client.create_session
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Destroy a session and close its browser context.
|
|
55
|
+
# @param session_id [String]
|
|
56
|
+
def destroy_session(session_id)
|
|
57
|
+
client.destroy_session(session_id)
|
|
58
|
+
end
|
|
59
|
+
|
|
48
60
|
def configure(**options)
|
|
49
61
|
@client = new(**options)
|
|
50
62
|
end
|
|
@@ -55,17 +67,30 @@ class RubyCrawl
|
|
|
55
67
|
build_service_client
|
|
56
68
|
end
|
|
57
69
|
|
|
58
|
-
def crawl(url, wait_until: @wait_until, block_resources: @block_resources,
|
|
70
|
+
def crawl(url, wait_until: @wait_until, block_resources: @block_resources, max_attempts: @max_attempts, session_id: nil)
|
|
59
71
|
validate_url!(url)
|
|
60
72
|
@service_client.ensure_running
|
|
61
|
-
with_retries(
|
|
62
|
-
payload = build_payload(url, wait_until, block_resources)
|
|
73
|
+
with_retries(max_attempts) do
|
|
74
|
+
payload = build_payload(url, wait_until, block_resources, session_id)
|
|
63
75
|
response = @service_client.post_json('/crawl', payload)
|
|
64
76
|
raise_node_error!(response)
|
|
65
77
|
build_result(response)
|
|
66
78
|
end
|
|
67
79
|
end
|
|
68
80
|
|
|
81
|
+
# Create a session for reusing browser context.
|
|
82
|
+
# @return [String] session_id
|
|
83
|
+
def create_session
|
|
84
|
+
@service_client.ensure_running
|
|
85
|
+
@service_client.create_session
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Destroy a session.
|
|
89
|
+
# @param session_id [String]
|
|
90
|
+
def destroy_session(session_id)
|
|
91
|
+
@service_client.destroy_session(session_id)
|
|
92
|
+
end
|
|
93
|
+
|
|
69
94
|
# Crawl multiple pages starting from a URL, following links.
|
|
70
95
|
# @see RubyCrawl.crawl_site
|
|
71
96
|
def crawl_site(url, **options, &block)
|
|
@@ -106,7 +131,7 @@ class RubyCrawl
|
|
|
106
131
|
@node_log = options.fetch(:node_log, ENV.fetch('RUBYCRAWL_NODE_LOG', nil))
|
|
107
132
|
@wait_until = options.fetch(:wait_until, nil)
|
|
108
133
|
@block_resources = options.fetch(:block_resources, nil)
|
|
109
|
-
@
|
|
134
|
+
@max_attempts = options.fetch(:max_attempts, 3)
|
|
110
135
|
end
|
|
111
136
|
|
|
112
137
|
def build_service_client
|
|
@@ -119,9 +144,9 @@ class RubyCrawl
|
|
|
119
144
|
)
|
|
120
145
|
end
|
|
121
146
|
|
|
122
|
-
def retry_with_backoff(attempt,
|
|
147
|
+
def retry_with_backoff(attempt, max_attempts, error)
|
|
123
148
|
backoff_seconds = 2**attempt
|
|
124
|
-
warn "[rubycrawl]
|
|
149
|
+
warn "[rubycrawl] Attempt #{attempt + 1}/#{max_attempts} failed, retrying in #{backoff_seconds}s: #{error.message}"
|
|
125
150
|
sleep(backoff_seconds)
|
|
126
151
|
end
|
|
127
152
|
|
|
@@ -131,7 +156,8 @@ class RubyCrawl
|
|
|
131
156
|
max_depth: options.fetch(:max_depth, 3),
|
|
132
157
|
same_host_only: options.fetch(:same_host_only, true),
|
|
133
158
|
wait_until: options.fetch(:wait_until, @wait_until),
|
|
134
|
-
block_resources: options.fetch(:block_resources, @block_resources)
|
|
159
|
+
block_resources: options.fetch(:block_resources, @block_resources),
|
|
160
|
+
max_attempts: options.fetch(:max_attempts, @max_attempts)
|
|
135
161
|
}
|
|
136
162
|
end
|
|
137
163
|
|
data/node/src/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import http from "node:http";
|
|
3
|
+
import crypto from "node:crypto";
|
|
3
4
|
import { chromium } from "playwright";
|
|
4
5
|
|
|
5
6
|
const HOST = "127.0.0.1";
|
|
@@ -21,10 +22,19 @@ function json(res, statusCode, body) {
|
|
|
21
22
|
res.end(payload);
|
|
22
23
|
}
|
|
23
24
|
|
|
25
|
+
const MAX_BODY_SIZE = 1 * 1024 * 1024; // 1 MB
|
|
26
|
+
|
|
24
27
|
function readJson(req) {
|
|
25
28
|
return new Promise((resolve, reject) => {
|
|
26
29
|
let data = "";
|
|
30
|
+
let size = 0;
|
|
27
31
|
req.on("data", (chunk) => {
|
|
32
|
+
size += chunk.length;
|
|
33
|
+
if (size > MAX_BODY_SIZE) {
|
|
34
|
+
reject(new Error("Request body too large"));
|
|
35
|
+
req.destroy();
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
28
38
|
data += chunk;
|
|
29
39
|
});
|
|
30
40
|
req.on("end", () => {
|
|
@@ -46,29 +56,100 @@ function validateRequest(body) {
|
|
|
46
56
|
return { ok: true };
|
|
47
57
|
}
|
|
48
58
|
|
|
49
|
-
let
|
|
50
|
-
|
|
59
|
+
let browser = null;
|
|
60
|
+
|
|
61
|
+
// Session storage: session_id -> { context, createdAt, lastUsedAt }
|
|
62
|
+
const sessions = new Map();
|
|
63
|
+
|
|
64
|
+
// Session TTL: 30 minutes of inactivity
|
|
65
|
+
const SESSION_TTL_MS = 30 * 60 * 1000;
|
|
66
|
+
// Cleanup interval: every 5 minutes
|
|
67
|
+
const CLEANUP_INTERVAL_MS = 5 * 60 * 1000;
|
|
68
|
+
|
|
69
|
+
function generateSessionId() {
|
|
70
|
+
return `sess_${crypto.randomBytes(16).toString("hex")}`;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async function getBrowser() {
|
|
74
|
+
if (browser && browser.isConnected()) return browser;
|
|
75
|
+
browser = await chromium.launch({ headless: true });
|
|
76
|
+
return browser;
|
|
77
|
+
}
|
|
51
78
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
79
|
+
/**
|
|
80
|
+
* Create a fresh browser context.
|
|
81
|
+
*/
|
|
82
|
+
async function createContext() {
|
|
83
|
+
const browser = await getBrowser();
|
|
84
|
+
return browser.newContext();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Get or create context based on session_id.
|
|
89
|
+
* If session_id provided and exists, reuse existing context.
|
|
90
|
+
* If session_id provided but expired/destroyed, create new context (handles retries).
|
|
91
|
+
* Otherwise create a fresh one-off context.
|
|
92
|
+
*/
|
|
93
|
+
async function getContext(sessionId) {
|
|
94
|
+
if (sessionId && sessions.has(sessionId)) {
|
|
95
|
+
// Update last used time
|
|
96
|
+
const session = sessions.get(sessionId);
|
|
97
|
+
session.lastUsedAt = Date.now();
|
|
98
|
+
return { context: session.context, isSession: true };
|
|
55
99
|
}
|
|
56
|
-
|
|
100
|
+
|
|
101
|
+
// If session_id provided but doesn't exist (expired/destroyed), recreate it
|
|
102
|
+
// This handles job retries gracefully
|
|
103
|
+
if (sessionId) {
|
|
104
|
+
const context = await createContext();
|
|
105
|
+
const now = Date.now();
|
|
106
|
+
sessions.set(sessionId, { context, createdAt: now, lastUsedAt: now });
|
|
107
|
+
// eslint-disable-next-line no-console
|
|
108
|
+
console.log(
|
|
109
|
+
`[rubycrawl] session recreated ${sessionId} (was expired or destroyed)`,
|
|
110
|
+
);
|
|
111
|
+
return { context, isSession: true };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return { context: await createContext(), isSession: false };
|
|
57
115
|
}
|
|
58
116
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
117
|
+
/**
|
|
118
|
+
* Cleanup expired sessions (no activity for SESSION_TTL_MS).
|
|
119
|
+
*/
|
|
120
|
+
async function cleanupExpiredSessions() {
|
|
121
|
+
const now = Date.now();
|
|
122
|
+
const expiredIds = [];
|
|
123
|
+
|
|
124
|
+
for (const [sessionId, session] of sessions) {
|
|
125
|
+
if (now - session.lastUsedAt > SESSION_TTL_MS) {
|
|
126
|
+
expiredIds.push(sessionId);
|
|
127
|
+
}
|
|
63
128
|
}
|
|
64
129
|
|
|
65
|
-
|
|
130
|
+
for (const sessionId of expiredIds) {
|
|
131
|
+
const session = sessions.get(sessionId);
|
|
132
|
+
await session.context.close().catch(() => {});
|
|
133
|
+
sessions.delete(sessionId);
|
|
134
|
+
// eslint-disable-next-line no-console
|
|
135
|
+
console.log(
|
|
136
|
+
`[rubycrawl] session expired ${sessionId} (inactive for ${SESSION_TTL_MS / 60000} min)`,
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (expiredIds.length > 0) {
|
|
141
|
+
// eslint-disable-next-line no-console
|
|
142
|
+
console.log(
|
|
143
|
+
`[rubycrawl] cleanup: ${expiredIds.length} expired, ${sessions.size} active`,
|
|
144
|
+
);
|
|
145
|
+
}
|
|
66
146
|
}
|
|
67
147
|
|
|
148
|
+
// Start cleanup interval
|
|
149
|
+
setInterval(cleanupExpiredSessions, CLEANUP_INTERVAL_MS);
|
|
150
|
+
|
|
68
151
|
/**
|
|
69
152
|
* Extract HTML metadata from the page
|
|
70
|
-
* @param {import('playwright').Page} page - The Playwright page object
|
|
71
|
-
* @returns {Promise<Object>} Metadata object with title, description, OG tags, etc.
|
|
72
153
|
*/
|
|
73
154
|
async function extractMetadata(page) {
|
|
74
155
|
return page.evaluate(() => {
|
|
@@ -107,8 +188,6 @@ async function extractMetadata(page) {
|
|
|
107
188
|
|
|
108
189
|
/**
|
|
109
190
|
* Extract links from the page.
|
|
110
|
-
* @param {import('playwright').Page} page - The Playwright page object
|
|
111
|
-
* @returns {Promise<Array>} Array of link objects
|
|
112
191
|
*/
|
|
113
192
|
async function extractLinks(page) {
|
|
114
193
|
return page.evaluate(() => {
|
|
@@ -122,7 +201,17 @@ async function extractLinks(page) {
|
|
|
122
201
|
});
|
|
123
202
|
}
|
|
124
203
|
|
|
204
|
+
/**
|
|
205
|
+
* Extract plain text content from the page using innerText.
|
|
206
|
+
*/
|
|
207
|
+
async function extractText(page) {
|
|
208
|
+
return page.evaluate(() => (document.body?.innerText || "").trim());
|
|
209
|
+
}
|
|
210
|
+
|
|
125
211
|
async function handleCrawl(req, res) {
|
|
212
|
+
let context = null;
|
|
213
|
+
let isSession = false;
|
|
214
|
+
|
|
126
215
|
try {
|
|
127
216
|
const body = await readJson(req);
|
|
128
217
|
const validation = validateRequest(body);
|
|
@@ -138,9 +227,15 @@ async function handleCrawl(req, res) {
|
|
|
138
227
|
|
|
139
228
|
const start = Date.now();
|
|
140
229
|
// eslint-disable-next-line no-console
|
|
141
|
-
console.log(
|
|
230
|
+
console.log(
|
|
231
|
+
`[rubycrawl] crawl start ${body.url}${body.session_id ? ` (session=${body.session_id})` : ""}`,
|
|
232
|
+
);
|
|
233
|
+
|
|
234
|
+
// Get context (reuse if session_id provided)
|
|
235
|
+
const ctxResult = await getContext(body.session_id);
|
|
236
|
+
context = ctxResult.context;
|
|
237
|
+
isSession = ctxResult.isSession;
|
|
142
238
|
|
|
143
|
-
const context = await getContext();
|
|
144
239
|
const page = await context.newPage();
|
|
145
240
|
|
|
146
241
|
try {
|
|
@@ -164,6 +259,7 @@ async function handleCrawl(req, res) {
|
|
|
164
259
|
const status = response ? response.status() : null;
|
|
165
260
|
const htmlMetadata = await extractMetadata(page);
|
|
166
261
|
const links = await extractLinks(page);
|
|
262
|
+
const text = await extractText(page);
|
|
167
263
|
|
|
168
264
|
// eslint-disable-next-line no-console
|
|
169
265
|
console.log(
|
|
@@ -174,8 +270,7 @@ async function handleCrawl(req, res) {
|
|
|
174
270
|
ok: true,
|
|
175
271
|
url: body.url,
|
|
176
272
|
html,
|
|
177
|
-
text
|
|
178
|
-
markdown: "",
|
|
273
|
+
text,
|
|
179
274
|
links,
|
|
180
275
|
metadata: {
|
|
181
276
|
status,
|
|
@@ -192,16 +287,95 @@ async function handleCrawl(req, res) {
|
|
|
192
287
|
// eslint-disable-next-line no-console
|
|
193
288
|
console.log(`[rubycrawl] crawl error ${code} ${error?.message || ""}`);
|
|
194
289
|
return json(res, 400, { error: code, message: error?.message });
|
|
290
|
+
} finally {
|
|
291
|
+
// Only close context if not a session (sessions are managed separately)
|
|
292
|
+
if (context && !isSession) {
|
|
293
|
+
await context.close().catch(() => {});
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Create a new session with a reusable browser context.
|
|
300
|
+
*/
|
|
301
|
+
async function handleSessionCreate(req, res) {
|
|
302
|
+
try {
|
|
303
|
+
const sessionId = generateSessionId();
|
|
304
|
+
const context = await createContext();
|
|
305
|
+
const now = Date.now();
|
|
306
|
+
sessions.set(sessionId, { context, createdAt: now, lastUsedAt: now });
|
|
307
|
+
|
|
308
|
+
// eslint-disable-next-line no-console
|
|
309
|
+
console.log(
|
|
310
|
+
`[rubycrawl] session created ${sessionId} (active=${sessions.size})`,
|
|
311
|
+
);
|
|
312
|
+
|
|
313
|
+
return json(res, 200, { ok: true, session_id: sessionId });
|
|
314
|
+
} catch (error) {
|
|
315
|
+
// eslint-disable-next-line no-console
|
|
316
|
+
console.log(`[rubycrawl] session create error ${error?.message || ""}`);
|
|
317
|
+
return json(res, 400, {
|
|
318
|
+
error: "session_create_failed",
|
|
319
|
+
message: error?.message,
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Destroy a session and close its browser context.
|
|
326
|
+
* Returns success even if session doesn't exist (idempotent for retries).
|
|
327
|
+
*/
|
|
328
|
+
async function handleSessionDestroy(req, res) {
|
|
329
|
+
try {
|
|
330
|
+
const body = await readJson(req);
|
|
331
|
+
const sessionId = body.session_id;
|
|
332
|
+
|
|
333
|
+
if (!sessionId) {
|
|
334
|
+
return json(res, 422, { error: "session_id required" });
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// Idempotent: if session doesn't exist, still return success
|
|
338
|
+
if (!sessions.has(sessionId)) {
|
|
339
|
+
return json(res, 200, {
|
|
340
|
+
ok: true,
|
|
341
|
+
message: "session already destroyed or expired",
|
|
342
|
+
});
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
const session = sessions.get(sessionId);
|
|
346
|
+
await session.context.close().catch(() => {});
|
|
347
|
+
sessions.delete(sessionId);
|
|
348
|
+
|
|
349
|
+
// eslint-disable-next-line no-console
|
|
350
|
+
console.log(`[rubycrawl] session destroyed ${sessionId}`);
|
|
351
|
+
|
|
352
|
+
return json(res, 200, { ok: true });
|
|
353
|
+
} catch (error) {
|
|
354
|
+
// eslint-disable-next-line no-console
|
|
355
|
+
console.log(`[rubycrawl] session destroy error ${error?.message || ""}`);
|
|
356
|
+
return json(res, 400, {
|
|
357
|
+
error: "session_destroy_failed",
|
|
358
|
+
message: error?.message,
|
|
359
|
+
});
|
|
195
360
|
}
|
|
196
361
|
}
|
|
197
362
|
|
|
198
363
|
const server = http.createServer((req, res) => {
|
|
199
364
|
// eslint-disable-next-line no-console
|
|
200
365
|
console.log(`[rubycrawl] request ${req.method} ${req.url}`);
|
|
366
|
+
|
|
201
367
|
if (req.method === "POST" && req.url === "/crawl") {
|
|
202
368
|
return handleCrawl(req, res);
|
|
203
369
|
}
|
|
204
370
|
|
|
371
|
+
if (req.method === "POST" && req.url === "/session/create") {
|
|
372
|
+
return handleSessionCreate(req, res);
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if (req.method === "POST" && req.url === "/session/destroy") {
|
|
376
|
+
return handleSessionDestroy(req, res);
|
|
377
|
+
}
|
|
378
|
+
|
|
205
379
|
if (req.method === "GET" && req.url === "/health") {
|
|
206
380
|
return json(res, 200, { ok: true });
|
|
207
381
|
}
|
data/rubycrawl.gemspec
CHANGED
|
@@ -15,8 +15,9 @@ Gem::Specification.new do |spec|
|
|
|
15
15
|
|
|
16
16
|
spec.required_ruby_version = '>= 3.0'
|
|
17
17
|
|
|
18
|
-
spec.files
|
|
19
|
-
spec.files +=
|
|
18
|
+
spec.files = Dir.glob('{lib}/**/*', File::FNM_DOTMATCH).reject { |f| File.directory?(f) }
|
|
19
|
+
spec.files += Dir.glob('node/**/*', File::FNM_DOTMATCH).reject { |f| File.directory?(f) || f.include?('node_modules') }
|
|
20
|
+
spec.files += %w[README.md LICENSE Rakefile rubycrawl.gemspec .rspec]
|
|
20
21
|
|
|
21
22
|
spec.bindir = 'bin'
|
|
22
23
|
spec.executables = []
|