rubycrawl 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/Gemfile +11 -0
- data/LICENSE +21 -0
- data/README.md +585 -0
- data/Rakefile +8 -0
- data/bin/console +9 -0
- data/bin/setup +4 -0
- data/lib/rubycrawl/errors.rb +18 -0
- data/lib/rubycrawl/helpers.rb +66 -0
- data/lib/rubycrawl/markdown_converter.rb +37 -0
- data/lib/rubycrawl/railtie.rb +12 -0
- data/lib/rubycrawl/result.rb +40 -0
- data/lib/rubycrawl/service_client.rb +86 -0
- data/lib/rubycrawl/site_crawler.rb +113 -0
- data/lib/rubycrawl/tasks/install.rake +85 -0
- data/lib/rubycrawl/url_normalizer.rb +68 -0
- data/lib/rubycrawl/version.rb +5 -0
- data/lib/rubycrawl.rb +141 -0
- data/node/.gitignore +2 -0
- data/node/.npmrc +1 -0
- data/node/README.md +19 -0
- data/node/node_modules/.bin/playwright +1 -0
- data/node/node_modules/.bin/playwright-core +1 -0
- data/node/node_modules/.package-lock.json +65 -0
- data/node/node_modules/dotenv/CHANGELOG.md +520 -0
- data/node/node_modules/dotenv/LICENSE +23 -0
- data/node/node_modules/dotenv/README-es.md +411 -0
- data/node/node_modules/dotenv/README.md +645 -0
- data/node/node_modules/dotenv/SECURITY.md +1 -0
- data/node/node_modules/dotenv/config.d.ts +1 -0
- data/node/node_modules/dotenv/config.js +9 -0
- data/node/node_modules/dotenv/lib/cli-options.js +17 -0
- data/node/node_modules/dotenv/lib/env-options.js +28 -0
- data/node/node_modules/dotenv/lib/main.d.ts +162 -0
- data/node/node_modules/dotenv/lib/main.js +386 -0
- data/node/node_modules/dotenv/package.json +62 -0
- data/node/node_modules/playwright/LICENSE +202 -0
- data/node/node_modules/playwright/NOTICE +5 -0
- data/node/node_modules/playwright/README.md +168 -0
- data/node/node_modules/playwright/ThirdPartyNotices.txt +5042 -0
- data/node/node_modules/playwright/cli.js +19 -0
- data/node/node_modules/playwright/index.d.ts +17 -0
- data/node/node_modules/playwright/index.js +17 -0
- data/node/node_modules/playwright/index.mjs +18 -0
- data/node/node_modules/playwright/jsx-runtime.js +42 -0
- data/node/node_modules/playwright/jsx-runtime.mjs +21 -0
- data/node/node_modules/playwright/lib/agents/agentParser.js +89 -0
- data/node/node_modules/playwright/lib/agents/copilot-setup-steps.yml +34 -0
- data/node/node_modules/playwright/lib/agents/generateAgents.js +348 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-coverage.prompt.md +31 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-generate.prompt.md +8 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-generator.agent.md +88 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-heal.prompt.md +6 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-healer.agent.md +55 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-plan.prompt.md +9 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-planner.agent.md +73 -0
- data/node/node_modules/playwright/lib/common/config.js +282 -0
- data/node/node_modules/playwright/lib/common/configLoader.js +344 -0
- data/node/node_modules/playwright/lib/common/esmLoaderHost.js +104 -0
- data/node/node_modules/playwright/lib/common/expectBundle.js +28 -0
- data/node/node_modules/playwright/lib/common/expectBundleImpl.js +407 -0
- data/node/node_modules/playwright/lib/common/fixtures.js +302 -0
- data/node/node_modules/playwright/lib/common/globals.js +58 -0
- data/node/node_modules/playwright/lib/common/ipc.js +60 -0
- data/node/node_modules/playwright/lib/common/poolBuilder.js +85 -0
- data/node/node_modules/playwright/lib/common/process.js +132 -0
- data/node/node_modules/playwright/lib/common/suiteUtils.js +140 -0
- data/node/node_modules/playwright/lib/common/test.js +321 -0
- data/node/node_modules/playwright/lib/common/testLoader.js +101 -0
- data/node/node_modules/playwright/lib/common/testType.js +298 -0
- data/node/node_modules/playwright/lib/common/validators.js +68 -0
- data/node/node_modules/playwright/lib/fsWatcher.js +67 -0
- data/node/node_modules/playwright/lib/index.js +726 -0
- data/node/node_modules/playwright/lib/internalsForTest.js +42 -0
- data/node/node_modules/playwright/lib/isomorphic/events.js +77 -0
- data/node/node_modules/playwright/lib/isomorphic/folders.js +30 -0
- data/node/node_modules/playwright/lib/isomorphic/stringInternPool.js +69 -0
- data/node/node_modules/playwright/lib/isomorphic/teleReceiver.js +521 -0
- data/node/node_modules/playwright/lib/isomorphic/teleSuiteUpdater.js +157 -0
- data/node/node_modules/playwright/lib/isomorphic/testServerConnection.js +225 -0
- data/node/node_modules/playwright/lib/isomorphic/testServerInterface.js +16 -0
- data/node/node_modules/playwright/lib/isomorphic/testTree.js +329 -0
- data/node/node_modules/playwright/lib/isomorphic/types.d.js +16 -0
- data/node/node_modules/playwright/lib/loader/loaderMain.js +59 -0
- data/node/node_modules/playwright/lib/matchers/expect.js +311 -0
- data/node/node_modules/playwright/lib/matchers/matcherHint.js +44 -0
- data/node/node_modules/playwright/lib/matchers/matchers.js +383 -0
- data/node/node_modules/playwright/lib/matchers/toBeTruthy.js +75 -0
- data/node/node_modules/playwright/lib/matchers/toEqual.js +100 -0
- data/node/node_modules/playwright/lib/matchers/toHaveURL.js +101 -0
- data/node/node_modules/playwright/lib/matchers/toMatchAriaSnapshot.js +159 -0
- data/node/node_modules/playwright/lib/matchers/toMatchSnapshot.js +342 -0
- data/node/node_modules/playwright/lib/matchers/toMatchText.js +99 -0
- data/node/node_modules/playwright/lib/mcp/browser/browserContextFactory.js +329 -0
- data/node/node_modules/playwright/lib/mcp/browser/browserServerBackend.js +84 -0
- data/node/node_modules/playwright/lib/mcp/browser/config.js +421 -0
- data/node/node_modules/playwright/lib/mcp/browser/context.js +244 -0
- data/node/node_modules/playwright/lib/mcp/browser/response.js +278 -0
- data/node/node_modules/playwright/lib/mcp/browser/sessionLog.js +75 -0
- data/node/node_modules/playwright/lib/mcp/browser/tab.js +343 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/common.js +65 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/console.js +46 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/dialogs.js +60 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/evaluate.js +61 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/files.js +58 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/form.js +63 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/install.js +72 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +107 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/mouse.js +107 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/navigate.js +71 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/network.js +63 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/open.js +57 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/pdf.js +49 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/runCode.js +78 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/screenshot.js +93 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +173 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/tabs.js +67 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/tool.js +47 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/tracing.js +74 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/utils.js +94 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/verify.js +143 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/wait.js +63 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools.js +84 -0
- data/node/node_modules/playwright/lib/mcp/browser/watchdog.js +44 -0
- data/node/node_modules/playwright/lib/mcp/config.d.js +16 -0
- data/node/node_modules/playwright/lib/mcp/extension/cdpRelay.js +351 -0
- data/node/node_modules/playwright/lib/mcp/extension/extensionContextFactory.js +76 -0
- data/node/node_modules/playwright/lib/mcp/extension/protocol.js +28 -0
- data/node/node_modules/playwright/lib/mcp/index.js +61 -0
- data/node/node_modules/playwright/lib/mcp/log.js +35 -0
- data/node/node_modules/playwright/lib/mcp/program.js +111 -0
- data/node/node_modules/playwright/lib/mcp/sdk/exports.js +28 -0
- data/node/node_modules/playwright/lib/mcp/sdk/http.js +152 -0
- data/node/node_modules/playwright/lib/mcp/sdk/inProcessTransport.js +71 -0
- data/node/node_modules/playwright/lib/mcp/sdk/server.js +223 -0
- data/node/node_modules/playwright/lib/mcp/sdk/tool.js +47 -0
- data/node/node_modules/playwright/lib/mcp/terminal/cli.js +296 -0
- data/node/node_modules/playwright/lib/mcp/terminal/command.js +56 -0
- data/node/node_modules/playwright/lib/mcp/terminal/commands.js +333 -0
- data/node/node_modules/playwright/lib/mcp/terminal/daemon.js +129 -0
- data/node/node_modules/playwright/lib/mcp/terminal/help.json +32 -0
- data/node/node_modules/playwright/lib/mcp/terminal/helpGenerator.js +88 -0
- data/node/node_modules/playwright/lib/mcp/terminal/socketConnection.js +80 -0
- data/node/node_modules/playwright/lib/mcp/test/browserBackend.js +98 -0
- data/node/node_modules/playwright/lib/mcp/test/generatorTools.js +122 -0
- data/node/node_modules/playwright/lib/mcp/test/plannerTools.js +145 -0
- data/node/node_modules/playwright/lib/mcp/test/seed.js +82 -0
- data/node/node_modules/playwright/lib/mcp/test/streams.js +44 -0
- data/node/node_modules/playwright/lib/mcp/test/testBackend.js +99 -0
- data/node/node_modules/playwright/lib/mcp/test/testContext.js +285 -0
- data/node/node_modules/playwright/lib/mcp/test/testTool.js +30 -0
- data/node/node_modules/playwright/lib/mcp/test/testTools.js +108 -0
- data/node/node_modules/playwright/lib/plugins/gitCommitInfoPlugin.js +198 -0
- data/node/node_modules/playwright/lib/plugins/index.js +28 -0
- data/node/node_modules/playwright/lib/plugins/webServerPlugin.js +237 -0
- data/node/node_modules/playwright/lib/program.js +417 -0
- data/node/node_modules/playwright/lib/reporters/base.js +634 -0
- data/node/node_modules/playwright/lib/reporters/blob.js +138 -0
- data/node/node_modules/playwright/lib/reporters/dot.js +99 -0
- data/node/node_modules/playwright/lib/reporters/empty.js +32 -0
- data/node/node_modules/playwright/lib/reporters/github.js +128 -0
- data/node/node_modules/playwright/lib/reporters/html.js +633 -0
- data/node/node_modules/playwright/lib/reporters/internalReporter.js +138 -0
- data/node/node_modules/playwright/lib/reporters/json.js +254 -0
- data/node/node_modules/playwright/lib/reporters/junit.js +232 -0
- data/node/node_modules/playwright/lib/reporters/line.js +131 -0
- data/node/node_modules/playwright/lib/reporters/list.js +253 -0
- data/node/node_modules/playwright/lib/reporters/listModeReporter.js +69 -0
- data/node/node_modules/playwright/lib/reporters/markdown.js +144 -0
- data/node/node_modules/playwright/lib/reporters/merge.js +558 -0
- data/node/node_modules/playwright/lib/reporters/multiplexer.js +112 -0
- data/node/node_modules/playwright/lib/reporters/reporterV2.js +102 -0
- data/node/node_modules/playwright/lib/reporters/teleEmitter.js +317 -0
- data/node/node_modules/playwright/lib/reporters/versions/blobV1.js +16 -0
- data/node/node_modules/playwright/lib/runner/dispatcher.js +530 -0
- data/node/node_modules/playwright/lib/runner/failureTracker.js +72 -0
- data/node/node_modules/playwright/lib/runner/lastRun.js +77 -0
- data/node/node_modules/playwright/lib/runner/loadUtils.js +334 -0
- data/node/node_modules/playwright/lib/runner/loaderHost.js +89 -0
- data/node/node_modules/playwright/lib/runner/processHost.js +180 -0
- data/node/node_modules/playwright/lib/runner/projectUtils.js +241 -0
- data/node/node_modules/playwright/lib/runner/rebase.js +189 -0
- data/node/node_modules/playwright/lib/runner/reporters.js +138 -0
- data/node/node_modules/playwright/lib/runner/sigIntWatcher.js +96 -0
- data/node/node_modules/playwright/lib/runner/storage.js +91 -0
- data/node/node_modules/playwright/lib/runner/taskRunner.js +127 -0
- data/node/node_modules/playwright/lib/runner/tasks.js +410 -0
- data/node/node_modules/playwright/lib/runner/testGroups.js +125 -0
- data/node/node_modules/playwright/lib/runner/testRunner.js +398 -0
- data/node/node_modules/playwright/lib/runner/testServer.js +269 -0
- data/node/node_modules/playwright/lib/runner/uiModeReporter.js +30 -0
- data/node/node_modules/playwright/lib/runner/vcs.js +72 -0
- data/node/node_modules/playwright/lib/runner/watchMode.js +396 -0
- data/node/node_modules/playwright/lib/runner/workerHost.js +104 -0
- data/node/node_modules/playwright/lib/third_party/pirates.js +62 -0
- data/node/node_modules/playwright/lib/third_party/tsconfig-loader.js +103 -0
- data/node/node_modules/playwright/lib/transform/babelBundle.js +46 -0
- data/node/node_modules/playwright/lib/transform/babelBundleImpl.js +461 -0
- data/node/node_modules/playwright/lib/transform/compilationCache.js +274 -0
- data/node/node_modules/playwright/lib/transform/esmLoader.js +103 -0
- data/node/node_modules/playwright/lib/transform/md.js +221 -0
- data/node/node_modules/playwright/lib/transform/portTransport.js +67 -0
- data/node/node_modules/playwright/lib/transform/transform.js +303 -0
- data/node/node_modules/playwright/lib/util.js +400 -0
- data/node/node_modules/playwright/lib/utilsBundle.js +50 -0
- data/node/node_modules/playwright/lib/utilsBundleImpl.js +103 -0
- data/node/node_modules/playwright/lib/worker/fixtureRunner.js +262 -0
- data/node/node_modules/playwright/lib/worker/testInfo.js +536 -0
- data/node/node_modules/playwright/lib/worker/testTracing.js +345 -0
- data/node/node_modules/playwright/lib/worker/timeoutManager.js +174 -0
- data/node/node_modules/playwright/lib/worker/util.js +31 -0
- data/node/node_modules/playwright/lib/worker/workerMain.js +530 -0
- data/node/node_modules/playwright/package.json +72 -0
- data/node/node_modules/playwright/test.d.ts +18 -0
- data/node/node_modules/playwright/test.js +24 -0
- data/node/node_modules/playwright/test.mjs +34 -0
- data/node/node_modules/playwright/types/test.d.ts +10251 -0
- data/node/node_modules/playwright/types/testReporter.d.ts +822 -0
- data/node/node_modules/playwright-core/LICENSE +202 -0
- data/node/node_modules/playwright-core/NOTICE +5 -0
- data/node/node_modules/playwright-core/README.md +3 -0
- data/node/node_modules/playwright-core/ThirdPartyNotices.txt +4076 -0
- data/node/node_modules/playwright-core/bin/install_media_pack.ps1 +5 -0
- data/node/node_modules/playwright-core/bin/install_webkit_wsl.ps1 +33 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_linux.sh +42 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_mac.sh +13 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_win.ps1 +24 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_linux.sh +42 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_mac.sh +12 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_win.ps1 +24 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_linux.sh +48 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_mac.sh +11 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_win.ps1 +23 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_linux.sh +48 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_mac.sh +11 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_win.ps1 +23 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_linux.sh +48 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_mac.sh +11 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_win.ps1 +24 -0
- data/node/node_modules/playwright-core/browsers.json +79 -0
- data/node/node_modules/playwright-core/cli.js +18 -0
- data/node/node_modules/playwright-core/index.d.ts +17 -0
- data/node/node_modules/playwright-core/index.js +32 -0
- data/node/node_modules/playwright-core/index.mjs +28 -0
- data/node/node_modules/playwright-core/lib/androidServerImpl.js +65 -0
- data/node/node_modules/playwright-core/lib/browserServerImpl.js +120 -0
- data/node/node_modules/playwright-core/lib/cli/driver.js +97 -0
- data/node/node_modules/playwright-core/lib/cli/program.js +589 -0
- data/node/node_modules/playwright-core/lib/cli/programWithTestStub.js +74 -0
- data/node/node_modules/playwright-core/lib/client/android.js +361 -0
- data/node/node_modules/playwright-core/lib/client/api.js +137 -0
- data/node/node_modules/playwright-core/lib/client/artifact.js +79 -0
- data/node/node_modules/playwright-core/lib/client/browser.js +161 -0
- data/node/node_modules/playwright-core/lib/client/browserContext.js +582 -0
- data/node/node_modules/playwright-core/lib/client/browserType.js +185 -0
- data/node/node_modules/playwright-core/lib/client/cdpSession.js +51 -0
- data/node/node_modules/playwright-core/lib/client/channelOwner.js +194 -0
- data/node/node_modules/playwright-core/lib/client/clientHelper.js +64 -0
- data/node/node_modules/playwright-core/lib/client/clientInstrumentation.js +55 -0
- data/node/node_modules/playwright-core/lib/client/clientStackTrace.js +69 -0
- data/node/node_modules/playwright-core/lib/client/clock.js +68 -0
- data/node/node_modules/playwright-core/lib/client/connection.js +318 -0
- data/node/node_modules/playwright-core/lib/client/consoleMessage.js +58 -0
- data/node/node_modules/playwright-core/lib/client/coverage.js +44 -0
- data/node/node_modules/playwright-core/lib/client/dialog.js +56 -0
- data/node/node_modules/playwright-core/lib/client/download.js +62 -0
- data/node/node_modules/playwright-core/lib/client/electron.js +138 -0
- data/node/node_modules/playwright-core/lib/client/elementHandle.js +284 -0
- data/node/node_modules/playwright-core/lib/client/errors.js +77 -0
- data/node/node_modules/playwright-core/lib/client/eventEmitter.js +314 -0
- data/node/node_modules/playwright-core/lib/client/events.js +103 -0
- data/node/node_modules/playwright-core/lib/client/fetch.js +368 -0
- data/node/node_modules/playwright-core/lib/client/fileChooser.js +46 -0
- data/node/node_modules/playwright-core/lib/client/fileUtils.js +34 -0
- data/node/node_modules/playwright-core/lib/client/frame.js +409 -0
- data/node/node_modules/playwright-core/lib/client/harRouter.js +87 -0
- data/node/node_modules/playwright-core/lib/client/input.js +84 -0
- data/node/node_modules/playwright-core/lib/client/jsHandle.js +109 -0
- data/node/node_modules/playwright-core/lib/client/jsonPipe.js +39 -0
- data/node/node_modules/playwright-core/lib/client/localUtils.js +60 -0
- data/node/node_modules/playwright-core/lib/client/locator.js +369 -0
- data/node/node_modules/playwright-core/lib/client/network.js +747 -0
- data/node/node_modules/playwright-core/lib/client/page.js +745 -0
- data/node/node_modules/playwright-core/lib/client/pageAgent.js +64 -0
- data/node/node_modules/playwright-core/lib/client/platform.js +77 -0
- data/node/node_modules/playwright-core/lib/client/playwright.js +71 -0
- data/node/node_modules/playwright-core/lib/client/selectors.js +55 -0
- data/node/node_modules/playwright-core/lib/client/stream.js +39 -0
- data/node/node_modules/playwright-core/lib/client/timeoutSettings.js +79 -0
- data/node/node_modules/playwright-core/lib/client/tracing.js +119 -0
- data/node/node_modules/playwright-core/lib/client/types.js +28 -0
- data/node/node_modules/playwright-core/lib/client/video.js +59 -0
- data/node/node_modules/playwright-core/lib/client/waiter.js +142 -0
- data/node/node_modules/playwright-core/lib/client/webError.js +39 -0
- data/node/node_modules/playwright-core/lib/client/webSocket.js +93 -0
- data/node/node_modules/playwright-core/lib/client/worker.js +85 -0
- data/node/node_modules/playwright-core/lib/client/writableStream.js +39 -0
- data/node/node_modules/playwright-core/lib/generated/bindingsControllerSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/clockSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/injectedScriptSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/pollingRecorderSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/storageScriptSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/utilityScriptSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/webSocketMockSource.js +336 -0
- data/node/node_modules/playwright-core/lib/inProcessFactory.js +60 -0
- data/node/node_modules/playwright-core/lib/inprocess.js +3 -0
- data/node/node_modules/playwright-core/lib/mcpBundle.js +84 -0
- data/node/node_modules/playwright-core/lib/mcpBundleImpl/index.js +147 -0
- data/node/node_modules/playwright-core/lib/outofprocess.js +76 -0
- data/node/node_modules/playwright-core/lib/protocol/serializers.js +197 -0
- data/node/node_modules/playwright-core/lib/protocol/validator.js +2969 -0
- data/node/node_modules/playwright-core/lib/protocol/validatorPrimitives.js +193 -0
- data/node/node_modules/playwright-core/lib/remote/playwrightConnection.js +129 -0
- data/node/node_modules/playwright-core/lib/remote/playwrightServer.js +334 -0
- data/node/node_modules/playwright-core/lib/server/agent/actionRunner.js +335 -0
- data/node/node_modules/playwright-core/lib/server/agent/actions.js +128 -0
- data/node/node_modules/playwright-core/lib/server/agent/codegen.js +111 -0
- data/node/node_modules/playwright-core/lib/server/agent/context.js +150 -0
- data/node/node_modules/playwright-core/lib/server/agent/expectTools.js +156 -0
- data/node/node_modules/playwright-core/lib/server/agent/pageAgent.js +204 -0
- data/node/node_modules/playwright-core/lib/server/agent/performTools.js +262 -0
- data/node/node_modules/playwright-core/lib/server/agent/tool.js +109 -0
- data/node/node_modules/playwright-core/lib/server/android/android.js +465 -0
- data/node/node_modules/playwright-core/lib/server/android/backendAdb.js +177 -0
- data/node/node_modules/playwright-core/lib/server/artifact.js +127 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiBrowser.js +549 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiChromium.js +148 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiConnection.js +213 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiDeserializer.js +116 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiExecutionContext.js +267 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiFirefox.js +128 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiInput.js +146 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiNetworkManager.js +383 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiOverCdp.js +102 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiPage.js +583 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiPdf.js +106 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiCommands.d.js +22 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiKeyboard.js +256 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocol.js +24 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocolCore.js +180 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocolPermissions.js +42 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiSerializer.js +148 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/firefoxPrefs.js +259 -0
- data/node/node_modules/playwright-core/lib/server/browser.js +149 -0
- data/node/node_modules/playwright-core/lib/server/browserContext.js +702 -0
- data/node/node_modules/playwright-core/lib/server/browserType.js +336 -0
- data/node/node_modules/playwright-core/lib/server/callLog.js +82 -0
- data/node/node_modules/playwright-core/lib/server/chromium/appIcon.png +0 -0
- data/node/node_modules/playwright-core/lib/server/chromium/chromium.js +395 -0
- data/node/node_modules/playwright-core/lib/server/chromium/chromiumSwitches.js +104 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crBrowser.js +511 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crConnection.js +197 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crCoverage.js +235 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crDevTools.js +111 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crDragDrop.js +131 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crExecutionContext.js +146 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crInput.js +187 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crNetworkManager.js +707 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crPage.js +1001 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crPdf.js +121 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crProtocolHelper.js +145 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crServiceWorker.js +136 -0
- data/node/node_modules/playwright-core/lib/server/chromium/defaultFontFamilies.js +162 -0
- data/node/node_modules/playwright-core/lib/server/chromium/protocol.d.js +16 -0
- data/node/node_modules/playwright-core/lib/server/clock.js +149 -0
- data/node/node_modules/playwright-core/lib/server/codegen/csharp.js +327 -0
- data/node/node_modules/playwright-core/lib/server/codegen/java.js +274 -0
- data/node/node_modules/playwright-core/lib/server/codegen/javascript.js +247 -0
- data/node/node_modules/playwright-core/lib/server/codegen/jsonl.js +52 -0
- data/node/node_modules/playwright-core/lib/server/codegen/language.js +132 -0
- data/node/node_modules/playwright-core/lib/server/codegen/languages.js +68 -0
- data/node/node_modules/playwright-core/lib/server/codegen/python.js +279 -0
- data/node/node_modules/playwright-core/lib/server/codegen/types.js +16 -0
- data/node/node_modules/playwright-core/lib/server/console.js +57 -0
- data/node/node_modules/playwright-core/lib/server/cookieStore.js +206 -0
- data/node/node_modules/playwright-core/lib/server/debugController.js +191 -0
- data/node/node_modules/playwright-core/lib/server/debugger.js +119 -0
- data/node/node_modules/playwright-core/lib/server/deviceDescriptors.js +39 -0
- data/node/node_modules/playwright-core/lib/server/deviceDescriptorsSource.json +1779 -0
- data/node/node_modules/playwright-core/lib/server/dialog.js +116 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/androidDispatcher.js +325 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/artifactDispatcher.js +118 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/browserContextDispatcher.js +384 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/browserDispatcher.js +118 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/browserTypeDispatcher.js +64 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/cdpSessionDispatcher.js +44 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/debugControllerDispatcher.js +78 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/dialogDispatcher.js +47 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/dispatcher.js +364 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/electronDispatcher.js +89 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/elementHandlerDispatcher.js +181 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/frameDispatcher.js +227 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/jsHandleDispatcher.js +85 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/jsonPipeDispatcher.js +58 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/localUtilsDispatcher.js +149 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/networkDispatchers.js +213 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/pageAgentDispatcher.js +96 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/pageDispatcher.js +393 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/playwrightDispatcher.js +108 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/streamDispatcher.js +67 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/tracingDispatcher.js +68 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/webSocketRouteDispatcher.js +165 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/writableStreamDispatcher.js +79 -0
- data/node/node_modules/playwright-core/lib/server/dom.js +815 -0
- data/node/node_modules/playwright-core/lib/server/download.js +70 -0
- data/node/node_modules/playwright-core/lib/server/electron/electron.js +273 -0
- data/node/node_modules/playwright-core/lib/server/electron/loader.js +29 -0
- data/node/node_modules/playwright-core/lib/server/errors.js +69 -0
- data/node/node_modules/playwright-core/lib/server/fetch.js +621 -0
- data/node/node_modules/playwright-core/lib/server/fileChooser.js +43 -0
- data/node/node_modules/playwright-core/lib/server/fileUploadUtils.js +84 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffBrowser.js +418 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffConnection.js +142 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffExecutionContext.js +150 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffInput.js +159 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffNetworkManager.js +256 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffPage.js +497 -0
- data/node/node_modules/playwright-core/lib/server/firefox/firefox.js +114 -0
- data/node/node_modules/playwright-core/lib/server/firefox/protocol.d.js +16 -0
- data/node/node_modules/playwright-core/lib/server/formData.js +147 -0
- data/node/node_modules/playwright-core/lib/server/frameSelectors.js +160 -0
- data/node/node_modules/playwright-core/lib/server/frames.js +1471 -0
- data/node/node_modules/playwright-core/lib/server/har/harRecorder.js +147 -0
- data/node/node_modules/playwright-core/lib/server/har/harTracer.js +607 -0
- data/node/node_modules/playwright-core/lib/server/harBackend.js +157 -0
- data/node/node_modules/playwright-core/lib/server/helper.js +96 -0
- data/node/node_modules/playwright-core/lib/server/index.js +58 -0
- data/node/node_modules/playwright-core/lib/server/input.js +277 -0
- data/node/node_modules/playwright-core/lib/server/instrumentation.js +72 -0
- data/node/node_modules/playwright-core/lib/server/javascript.js +291 -0
- data/node/node_modules/playwright-core/lib/server/launchApp.js +128 -0
- data/node/node_modules/playwright-core/lib/server/localUtils.js +214 -0
- data/node/node_modules/playwright-core/lib/server/macEditingCommands.js +143 -0
- data/node/node_modules/playwright-core/lib/server/network.js +667 -0
- data/node/node_modules/playwright-core/lib/server/page.js +830 -0
- data/node/node_modules/playwright-core/lib/server/pipeTransport.js +89 -0
- data/node/node_modules/playwright-core/lib/server/playwright.js +69 -0
- data/node/node_modules/playwright-core/lib/server/progress.js +132 -0
- data/node/node_modules/playwright-core/lib/server/protocolError.js +52 -0
- data/node/node_modules/playwright-core/lib/server/recorder/chat.js +161 -0
- data/node/node_modules/playwright-core/lib/server/recorder/recorderApp.js +366 -0
- data/node/node_modules/playwright-core/lib/server/recorder/recorderRunner.js +138 -0
- data/node/node_modules/playwright-core/lib/server/recorder/recorderSignalProcessor.js +83 -0
- data/node/node_modules/playwright-core/lib/server/recorder/recorderUtils.js +157 -0
- data/node/node_modules/playwright-core/lib/server/recorder/throttledFile.js +57 -0
- data/node/node_modules/playwright-core/lib/server/recorder.js +499 -0
- data/node/node_modules/playwright-core/lib/server/registry/browserFetcher.js +177 -0
- data/node/node_modules/playwright-core/lib/server/registry/dependencies.js +371 -0
- data/node/node_modules/playwright-core/lib/server/registry/index.js +1422 -0
- data/node/node_modules/playwright-core/lib/server/registry/nativeDeps.js +1280 -0
- data/node/node_modules/playwright-core/lib/server/registry/oopDownloadBrowserMain.js +127 -0
- data/node/node_modules/playwright-core/lib/server/screencast.js +190 -0
- data/node/node_modules/playwright-core/lib/server/screenshotter.js +333 -0
- data/node/node_modules/playwright-core/lib/server/selectors.js +112 -0
- data/node/node_modules/playwright-core/lib/server/socksClientCertificatesInterceptor.js +383 -0
- data/node/node_modules/playwright-core/lib/server/socksInterceptor.js +95 -0
- data/node/node_modules/playwright-core/lib/server/trace/recorder/snapshotter.js +147 -0
- data/node/node_modules/playwright-core/lib/server/trace/recorder/snapshotterInjected.js +561 -0
- data/node/node_modules/playwright-core/lib/server/trace/recorder/tracing.js +604 -0
- data/node/node_modules/playwright-core/lib/server/trace/viewer/traceParser.js +72 -0
- data/node/node_modules/playwright-core/lib/server/trace/viewer/traceViewer.js +245 -0
- data/node/node_modules/playwright-core/lib/server/transport.js +181 -0
- data/node/node_modules/playwright-core/lib/server/types.js +28 -0
- data/node/node_modules/playwright-core/lib/server/usKeyboardLayout.js +145 -0
- data/node/node_modules/playwright-core/lib/server/utils/ascii.js +44 -0
- data/node/node_modules/playwright-core/lib/server/utils/comparators.js +139 -0
- data/node/node_modules/playwright-core/lib/server/utils/crypto.js +216 -0
- data/node/node_modules/playwright-core/lib/server/utils/debug.js +42 -0
- data/node/node_modules/playwright-core/lib/server/utils/debugLogger.js +122 -0
- data/node/node_modules/playwright-core/lib/server/utils/env.js +73 -0
- data/node/node_modules/playwright-core/lib/server/utils/eventsHelper.js +39 -0
- data/node/node_modules/playwright-core/lib/server/utils/expectUtils.js +123 -0
- data/node/node_modules/playwright-core/lib/server/utils/fileUtils.js +191 -0
- data/node/node_modules/playwright-core/lib/server/utils/happyEyeballs.js +207 -0
- data/node/node_modules/playwright-core/lib/server/utils/hostPlatform.js +123 -0
- data/node/node_modules/playwright-core/lib/server/utils/httpServer.js +203 -0
- data/node/node_modules/playwright-core/lib/server/utils/imageUtils.js +141 -0
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/colorUtils.js +89 -0
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/compare.js +109 -0
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/imageChannel.js +78 -0
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/stats.js +102 -0
- data/node/node_modules/playwright-core/lib/server/utils/linuxUtils.js +71 -0
- data/node/node_modules/playwright-core/lib/server/utils/network.js +242 -0
- data/node/node_modules/playwright-core/lib/server/utils/nodePlatform.js +154 -0
- data/node/node_modules/playwright-core/lib/server/utils/pipeTransport.js +84 -0
- data/node/node_modules/playwright-core/lib/server/utils/processLauncher.js +241 -0
- data/node/node_modules/playwright-core/lib/server/utils/profiler.js +65 -0
- data/node/node_modules/playwright-core/lib/server/utils/socksProxy.js +511 -0
- data/node/node_modules/playwright-core/lib/server/utils/spawnAsync.js +41 -0
- data/node/node_modules/playwright-core/lib/server/utils/task.js +51 -0
- data/node/node_modules/playwright-core/lib/server/utils/userAgent.js +98 -0
- data/node/node_modules/playwright-core/lib/server/utils/wsServer.js +121 -0
- data/node/node_modules/playwright-core/lib/server/utils/zipFile.js +74 -0
- data/node/node_modules/playwright-core/lib/server/utils/zones.js +57 -0
- data/node/node_modules/playwright-core/lib/server/videoRecorder.js +124 -0
- data/node/node_modules/playwright-core/lib/server/webkit/protocol.d.js +16 -0
- data/node/node_modules/playwright-core/lib/server/webkit/webkit.js +108 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkBrowser.js +335 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkConnection.js +144 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkExecutionContext.js +154 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkInput.js +181 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkInterceptableRequest.js +197 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkPage.js +1158 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkProvisionalPage.js +83 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkWorkers.js +105 -0
- data/node/node_modules/playwright-core/lib/third_party/pixelmatch.js +255 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/ariaSnapshot.js +455 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/assert.js +31 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/colors.js +72 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/cssParser.js +245 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/cssTokenizer.js +1051 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/headers.js +53 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorGenerators.js +689 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorParser.js +176 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorUtils.js +81 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/lruCache.js +51 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/manualPromise.js +114 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/mimeType.js +459 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/multimap.js +80 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/protocolFormatter.js +81 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/protocolMetainfo.js +330 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/rtti.js +43 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/selectorParser.js +386 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/semaphore.js +54 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/stackTrace.js +158 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/stringUtils.js +204 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/time.js +49 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/timeoutRunner.js +66 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/entries.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotRenderer.js +499 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotServer.js +120 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotStorage.js +89 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceLoader.js +131 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceModel.js +365 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceModernizer.js +400 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV3.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV4.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV5.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV6.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV7.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV8.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/traceUtils.js +58 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/types.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/urlMatch.js +190 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/utilityScriptSerializers.js +251 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/yaml.js +84 -0
- data/node/node_modules/playwright-core/lib/utils.js +111 -0
- data/node/node_modules/playwright-core/lib/utilsBundle.js +109 -0
- data/node/node_modules/playwright-core/lib/utilsBundleImpl/index.js +218 -0
- data/node/node_modules/playwright-core/lib/utilsBundleImpl/xdg-open +1066 -0
- data/node/node_modules/playwright-core/lib/vite/htmlReport/index.html +84 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/codeMirrorModule-DYBRYzYX.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/codeMirrorModule-DadYNm1I.js +32 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/codicon-DCmgc-ay.ttf +0 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/index-BSjZa4pk.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/index-BhTWtUlo.js +193 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/index.html +29 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/playwright-logo.svg +9 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/codeMirrorModule-a5XoALAZ.js +32 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/defaultSettingsView-CJSZINFr.js +266 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/xtermModule-CsJ4vdCR.js +9 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/codeMirrorModule.DYBRYzYX.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/codicon.DCmgc-ay.ttf +0 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/defaultSettingsView.7ch9cixO.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/index.BVu7tZDe.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/index.Bk2uYQRV.js +2 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/index.html +43 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/manifest.webmanifest +16 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/playwright-logo.svg +9 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/snapshot.html +21 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/sw.bundle.js +5 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.Btcz36p_.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.CQJ9SCIQ.js +5 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.html +17 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/xtermModule.DYP7pi_n.css +32 -0
- data/node/node_modules/playwright-core/lib/zipBundle.js +34 -0
- data/node/node_modules/playwright-core/lib/zipBundleImpl.js +5 -0
- data/node/node_modules/playwright-core/package.json +43 -0
- data/node/node_modules/playwright-core/types/protocol.d.ts +23824 -0
- data/node/node_modules/playwright-core/types/structs.d.ts +45 -0
- data/node/node_modules/playwright-core/types/types.d.ts +22843 -0
- data/node/package-lock.json +72 -0
- data/node/package.json +14 -0
- data/node/src/index.js +215 -0
- data/rubycrawl.gemspec +29 -0
- data/spec/rubycrawl_spec.rb +51 -0
- data/spec/spec_helper.rb +11 -0
- metadata +645 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'uri'
|
|
4
|
+
|
|
5
|
+
class RubyCrawl
|
|
6
|
+
# Helper methods for payloads, validation, and errors.
|
|
7
|
+
module Helpers
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
def validate_url!(url)
|
|
11
|
+
uri = URI.parse(url)
|
|
12
|
+
|
|
13
|
+
unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
|
14
|
+
raise ConfigurationError, "Only HTTP(S) URLs are supported, got: #{url}"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
if uri.host&.match?(/^(localhost|127\.|192\.168\.|10\.|172\.(1[6-9]|2[0-9]|3[01]))/)
|
|
18
|
+
warn '[rubycrawl] Warning: Crawling internal/private IP addresses'
|
|
19
|
+
end
|
|
20
|
+
rescue URI::InvalidURIError => e
|
|
21
|
+
raise ConfigurationError, "Invalid URL: #{e.message}"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def build_payload(url, wait_until, block_resources)
|
|
25
|
+
payload = { url: url }
|
|
26
|
+
payload[:wait_until] = wait_until if wait_until
|
|
27
|
+
payload[:block_resources] = block_resources unless block_resources.nil?
|
|
28
|
+
payload
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def build_result(response)
|
|
32
|
+
Result.new(
|
|
33
|
+
text: response['text'].to_s,
|
|
34
|
+
html: response['html'].to_s,
|
|
35
|
+
links: Array(response['links']),
|
|
36
|
+
metadata: response['metadata'].is_a?(Hash) ? response['metadata'] : {}
|
|
37
|
+
)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def error_class_for(error_code)
|
|
41
|
+
case error_code
|
|
42
|
+
when 'navigation_timeout', 'crawl_timeout'
|
|
43
|
+
TimeoutError
|
|
44
|
+
when 'navigation_failed', 'crawl_failed'
|
|
45
|
+
NavigationError
|
|
46
|
+
when 'invalid_json', 'invalid_json_response'
|
|
47
|
+
ServiceError
|
|
48
|
+
else
|
|
49
|
+
Error
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def error_message_for(error_code, error_message)
|
|
54
|
+
case error_code
|
|
55
|
+
when 'navigation_timeout', 'crawl_timeout'
|
|
56
|
+
"Crawl timeout: #{error_message}"
|
|
57
|
+
when 'navigation_failed', 'crawl_failed'
|
|
58
|
+
"Navigation failed: #{error_message}"
|
|
59
|
+
when 'invalid_json', 'invalid_json_response'
|
|
60
|
+
"Node service returned invalid JSON: #{error_message}"
|
|
61
|
+
else
|
|
62
|
+
"Crawl error [#{error_code}]: #{error_message}"
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class RubyCrawl
|
|
4
|
+
# Converts HTML to Markdown using reverse_markdown gem.
|
|
5
|
+
module MarkdownConverter
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
# Convert HTML to Markdown.
|
|
9
|
+
#
|
|
10
|
+
# @param html [String] The HTML content to convert
|
|
11
|
+
# @param options [Hash] Options for conversion
|
|
12
|
+
# @option options [Boolean] :unknown_tags (:bypass) How to handle unknown tags
|
|
13
|
+
# @option options [Boolean] :github_flavored (true) Use GitHub-flavored markdown
|
|
14
|
+
# @return [String] The Markdown content
|
|
15
|
+
def convert(html, options = {})
|
|
16
|
+
return '' if html.nil? || html.empty?
|
|
17
|
+
|
|
18
|
+
require_reverse_markdown
|
|
19
|
+
ReverseMarkdown.convert(html, default_options.merge(options))
|
|
20
|
+
rescue LoadError
|
|
21
|
+
warn '[rubycrawl] reverse_markdown gem not installed. Add it to your Gemfile for markdown support.'
|
|
22
|
+
''
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def require_reverse_markdown
|
|
26
|
+
require 'reverse_markdown'
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def default_options
|
|
30
|
+
{
|
|
31
|
+
unknown_tags: :bypass,
|
|
32
|
+
github_flavored: true,
|
|
33
|
+
tag_border: ''
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class RubyCrawl
|
|
4
|
+
# Result object with lazy markdown conversion.
|
|
5
|
+
class Result
|
|
6
|
+
attr_reader :text, :html, :links, :metadata
|
|
7
|
+
|
|
8
|
+
def initialize(text:, html:, links:, metadata:, markdown: nil)
|
|
9
|
+
@text = text
|
|
10
|
+
@html = html
|
|
11
|
+
@links = links
|
|
12
|
+
@metadata = metadata
|
|
13
|
+
@markdown = markdown unless markdown.to_s.empty?
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Returns markdown, converting from HTML lazily if needed.
|
|
17
|
+
#
|
|
18
|
+
# @return [String] Markdown content
|
|
19
|
+
def markdown
|
|
20
|
+
@markdown ||= MarkdownConverter.convert(html)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Check if markdown has been computed.
|
|
24
|
+
#
|
|
25
|
+
# @return [Boolean]
|
|
26
|
+
def markdown?
|
|
27
|
+
!@markdown.nil?
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def to_h
|
|
31
|
+
{
|
|
32
|
+
text: text,
|
|
33
|
+
html: html,
|
|
34
|
+
links: links,
|
|
35
|
+
metadata: metadata,
|
|
36
|
+
markdown: markdown
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'net/http'
|
|
5
|
+
require 'uri'
|
|
6
|
+
|
|
7
|
+
class RubyCrawl
|
|
8
|
+
# Handles node service lifecycle and HTTP requests.
|
|
9
|
+
class ServiceClient
|
|
10
|
+
def initialize(host:, port:, node_dir:, node_bin:, node_log:)
|
|
11
|
+
@host = host
|
|
12
|
+
@port = Integer(port)
|
|
13
|
+
@node_dir = node_dir
|
|
14
|
+
@node_bin = node_bin
|
|
15
|
+
@node_log = node_log
|
|
16
|
+
@node_pid = nil
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def ensure_running
|
|
20
|
+
return if healthy?
|
|
21
|
+
|
|
22
|
+
start_service
|
|
23
|
+
wait_until_healthy
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def post_json(path, body)
|
|
27
|
+
uri = URI("http://#{@host}:#{@port}#{path}")
|
|
28
|
+
request = build_request(uri, body)
|
|
29
|
+
response = perform_request(uri, request)
|
|
30
|
+
JSON.parse(response.body)
|
|
31
|
+
rescue JSON::ParserError => e
|
|
32
|
+
raise ServiceError, "Node service returned invalid JSON: #{e.message}"
|
|
33
|
+
rescue Errno::ECONNREFUSED, Errno::ECONNRESET => e
|
|
34
|
+
raise ServiceError, "Cannot connect to node service at #{uri}: #{e.message}"
|
|
35
|
+
rescue Net::OpenTimeout, Net::ReadTimeout => e
|
|
36
|
+
raise TimeoutError, "Request to node service timed out: #{e.message}"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def build_request(uri, body)
|
|
42
|
+
request = Net::HTTP::Post.new(uri)
|
|
43
|
+
request['Content-Type'] = 'application/json'
|
|
44
|
+
request.body = JSON.generate(body)
|
|
45
|
+
request
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def perform_request(uri, request)
|
|
49
|
+
Net::HTTP.start(uri.host, uri.port, open_timeout: 5, read_timeout: 30) do |http|
|
|
50
|
+
http.request(request)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def start_service
|
|
55
|
+
raise ServiceError, "rubycrawl node service directory not found: #{@node_dir}" unless Dir.exist?(@node_dir)
|
|
56
|
+
|
|
57
|
+
env = { 'RUBYCRAWL_NODE_PORT' => @port.to_s }
|
|
58
|
+
out = @node_log ? File.open(@node_log, 'a') : File::NULL
|
|
59
|
+
err = @node_log ? out : File::NULL
|
|
60
|
+
@node_pid = Process.spawn(env, @node_bin, 'src/index.js', chdir: @node_dir, out: out, err: err)
|
|
61
|
+
Process.detach(@node_pid)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def wait_until_healthy(timeout: 5)
|
|
65
|
+
deadline = Time.now + timeout
|
|
66
|
+
until Time.now > deadline
|
|
67
|
+
return true if healthy?
|
|
68
|
+
|
|
69
|
+
sleep 0.2
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
raise ServiceError, "rubycrawl node service failed to start within #{timeout}s. " \
|
|
73
|
+
"Check logs at #{@node_log || 'RUBYCRAWL_NODE_LOG'}"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def healthy?
|
|
77
|
+
uri = URI("http://#{@host}:#{@port}/health")
|
|
78
|
+
response = Net::HTTP.start(uri.host, uri.port, open_timeout: 1, read_timeout: 1) do |http|
|
|
79
|
+
http.get(uri.request_uri)
|
|
80
|
+
end
|
|
81
|
+
response.is_a?(Net::HTTPSuccess)
|
|
82
|
+
rescue StandardError
|
|
83
|
+
false
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
|
|
5
|
+
class RubyCrawl
|
|
6
|
+
# BFS crawler that follows links with deduplication.
|
|
7
|
+
class SiteCrawler
|
|
8
|
+
# Page result yielded to the block with lazy markdown.
|
|
9
|
+
class PageResult
|
|
10
|
+
attr_reader :url, :html, :links, :metadata, :depth
|
|
11
|
+
|
|
12
|
+
def initialize(url:, html:, links:, metadata:, depth:)
|
|
13
|
+
@url = url
|
|
14
|
+
@html = html
|
|
15
|
+
@links = links
|
|
16
|
+
@metadata = metadata
|
|
17
|
+
@depth = depth
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Lazy markdown conversion.
|
|
21
|
+
def markdown
|
|
22
|
+
@markdown ||= MarkdownConverter.convert(html)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def initialize(client, options = {})
|
|
27
|
+
@client = client
|
|
28
|
+
@max_pages = options.fetch(:max_pages, 50)
|
|
29
|
+
@max_depth = options.fetch(:max_depth, 3)
|
|
30
|
+
@same_host_only = options.fetch(:same_host_only, true)
|
|
31
|
+
@wait_until = options.fetch(:wait_until, nil)
|
|
32
|
+
@block_resources = options.fetch(:block_resources, nil)
|
|
33
|
+
@visited = Set.new
|
|
34
|
+
@queue = []
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def crawl(start_url, &block)
|
|
38
|
+
raise ArgumentError, 'Block required for site crawl' unless block_given?
|
|
39
|
+
|
|
40
|
+
normalized = UrlNormalizer.normalize(start_url)
|
|
41
|
+
raise ConfigurationError, "Invalid start URL: #{start_url}" unless normalized
|
|
42
|
+
|
|
43
|
+
@base_url = normalized
|
|
44
|
+
enqueue(normalized, 0)
|
|
45
|
+
process_queue(&block)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def process_queue
|
|
51
|
+
pages_crawled = 0
|
|
52
|
+
|
|
53
|
+
while (item = @queue.shift) && pages_crawled < @max_pages
|
|
54
|
+
url, depth = item
|
|
55
|
+
next if @visited.include?(url)
|
|
56
|
+
|
|
57
|
+
result = process_page(url, depth)
|
|
58
|
+
next unless result
|
|
59
|
+
|
|
60
|
+
yield result
|
|
61
|
+
pages_crawled += 1
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
pages_crawled
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def process_page(url, depth)
|
|
68
|
+
@visited.add(url)
|
|
69
|
+
result = crawl_page(url, depth)
|
|
70
|
+
enqueue_links(result.links, depth + 1) if result && depth < @max_depth
|
|
71
|
+
result
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def crawl_page(url, depth)
|
|
75
|
+
result = @client.crawl(url, wait_until: @wait_until, block_resources: @block_resources)
|
|
76
|
+
build_page_result(url, depth, result)
|
|
77
|
+
rescue Error => e
|
|
78
|
+
warn "[rubycrawl] Failed to crawl #{url}: #{e.message}"
|
|
79
|
+
nil
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def build_page_result(url, depth, result)
|
|
83
|
+
PageResult.new(
|
|
84
|
+
url: url,
|
|
85
|
+
html: result.html,
|
|
86
|
+
links: extract_urls(result.links),
|
|
87
|
+
metadata: result.metadata,
|
|
88
|
+
depth: depth
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def extract_urls(links)
|
|
93
|
+
links.map { |link| link['url'] || link[:url] }.compact
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def enqueue_links(links, depth)
|
|
97
|
+
links.each do |link|
|
|
98
|
+
normalized = UrlNormalizer.normalize(link, @base_url)
|
|
99
|
+
next unless normalized
|
|
100
|
+
next if @visited.include?(normalized)
|
|
101
|
+
next if @same_host_only && !UrlNormalizer.same_host?(normalized, @base_url)
|
|
102
|
+
|
|
103
|
+
enqueue(normalized, depth)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def enqueue(url, depth)
|
|
108
|
+
return if @visited.include?(url)
|
|
109
|
+
|
|
110
|
+
@queue.push([url, depth])
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rubocop:disable Metrics/BlockLength
|
|
4
|
+
namespace :rubycrawl do
|
|
5
|
+
desc 'Install Node dependencies and create initializer'
|
|
6
|
+
task :install do
|
|
7
|
+
require 'fileutils'
|
|
8
|
+
|
|
9
|
+
# Check Node.js is installed
|
|
10
|
+
unless system('node', '--version', out: File::NULL, err: File::NULL)
|
|
11
|
+
abort <<~MSG
|
|
12
|
+
[rubycrawl] ERROR: Node.js is not installed or not in PATH.
|
|
13
|
+
|
|
14
|
+
RubyCrawl requires Node.js (v18+ recommended) for browser automation.
|
|
15
|
+
|
|
16
|
+
Install Node.js:
|
|
17
|
+
- macOS: brew install node
|
|
18
|
+
- Ubuntu: curl -fsSL https://deb.nodesource.com/setup_lts.x | sudo -E bash - && sudo apt-get install -y nodejs
|
|
19
|
+
- Windows: https://nodejs.org/en/download/
|
|
20
|
+
|
|
21
|
+
After installing, run this task again:
|
|
22
|
+
bundle exec rake rubycrawl:install
|
|
23
|
+
MSG
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
gem_root = File.expand_path('../../../', __dir__)
|
|
27
|
+
node_dir = File.join(gem_root, 'node')
|
|
28
|
+
|
|
29
|
+
abort("[rubycrawl] ERROR: node directory not found at #{node_dir}") unless Dir.exist?(node_dir)
|
|
30
|
+
|
|
31
|
+
Dir.chdir(node_dir) do
|
|
32
|
+
puts('[rubycrawl] Installing Node dependencies...')
|
|
33
|
+
system('npm', 'install') || abort('[rubycrawl] ERROR: npm install failed')
|
|
34
|
+
|
|
35
|
+
puts('[rubycrawl] Installing Playwright browsers...')
|
|
36
|
+
system('npx', 'playwright', 'install') || abort('[rubycrawl] ERROR: playwright install failed')
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
if defined?(Rails)
|
|
40
|
+
initializer_path = Rails.root.join('config', 'initializers', 'rubycrawl.rb')
|
|
41
|
+
if File.exist?(initializer_path)
|
|
42
|
+
puts("[rubycrawl] Initializer already exists at #{initializer_path}")
|
|
43
|
+
else
|
|
44
|
+
content = <<~RUBY
|
|
45
|
+
# frozen_string_literal: true
|
|
46
|
+
|
|
47
|
+
# RubyCrawl Configuration
|
|
48
|
+
# =======================
|
|
49
|
+
# Uncomment and modify options as needed.
|
|
50
|
+
|
|
51
|
+
RubyCrawl.configure(
|
|
52
|
+
# wait_until - Page load strategy:
|
|
53
|
+
# "load" - Wait for load event (fastest, good for static sites)
|
|
54
|
+
# "domcontentloaded" - Wait for DOM ready (medium speed)
|
|
55
|
+
# "networkidle" - Wait until no network requests for 500ms (best for SPAs)
|
|
56
|
+
# wait_until: "load",
|
|
57
|
+
|
|
58
|
+
# Block images, fonts, CSS, media for faster crawls (2-3x speedup)
|
|
59
|
+
# block_resources: true,
|
|
60
|
+
|
|
61
|
+
# Maximum retry attempts for transient failures (with exponential backoff)
|
|
62
|
+
# max_retries: 3,
|
|
63
|
+
|
|
64
|
+
# Node service settings (usually no need to change)
|
|
65
|
+
# host: "127.0.0.1",
|
|
66
|
+
# port: 3344,
|
|
67
|
+
|
|
68
|
+
# Custom Node.js binary path (if not in PATH)
|
|
69
|
+
# node_bin: "/usr/local/bin/node",
|
|
70
|
+
|
|
71
|
+
# Log file for Node service output (useful for debugging)
|
|
72
|
+
# node_log: Rails.root.join("log", "rubycrawl.log").to_s
|
|
73
|
+
)
|
|
74
|
+
RUBY
|
|
75
|
+
|
|
76
|
+
FileUtils.mkdir_p(File.dirname(initializer_path))
|
|
77
|
+
File.write(initializer_path, content)
|
|
78
|
+
puts("[rubycrawl] Created initializer at #{initializer_path}")
|
|
79
|
+
end
|
|
80
|
+
else
|
|
81
|
+
puts('[rubycrawl] Rails not detected. Skipping initializer creation.')
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
# rubocop:enable Metrics/BlockLength
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'uri'
|
|
4
|
+
require 'set'
|
|
5
|
+
|
|
6
|
+
class RubyCrawl
|
|
7
|
+
# Normalizes URLs for deduplication.
|
|
8
|
+
module UrlNormalizer
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
def normalize(url, base_url = nil)
|
|
12
|
+
uri = parse_uri(url, base_url)
|
|
13
|
+
return nil unless uri&.host
|
|
14
|
+
|
|
15
|
+
normalize_uri_parts(uri)
|
|
16
|
+
uri.to_s
|
|
17
|
+
rescue URI::InvalidURIError
|
|
18
|
+
nil
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def normalize_uri_parts(uri)
|
|
22
|
+
uri.scheme = uri.scheme&.downcase
|
|
23
|
+
uri.host = uri.host&.downcase
|
|
24
|
+
uri.path = normalize_path(uri.path)
|
|
25
|
+
uri.fragment = nil
|
|
26
|
+
uri.query = normalize_query(uri.query)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def same_host?(url, base_url)
|
|
30
|
+
uri = URI.parse(url)
|
|
31
|
+
base_uri = URI.parse(base_url)
|
|
32
|
+
uri.host&.downcase == base_uri.host&.downcase
|
|
33
|
+
rescue URI::InvalidURIError
|
|
34
|
+
false
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def parse_uri(url, base_url)
|
|
38
|
+
uri = URI.parse(url)
|
|
39
|
+
return uri if uri.absolute?
|
|
40
|
+
return nil unless base_url
|
|
41
|
+
|
|
42
|
+
URI.join(base_url, url)
|
|
43
|
+
rescue URI::InvalidURIError
|
|
44
|
+
nil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def normalize_path(path)
|
|
48
|
+
return '/' if path.nil? || path.empty?
|
|
49
|
+
|
|
50
|
+
# Remove trailing slash except for root
|
|
51
|
+
path = path.chomp('/') if path.length > 1
|
|
52
|
+
path
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def normalize_query(query)
|
|
56
|
+
return nil if query.nil? || query.empty?
|
|
57
|
+
|
|
58
|
+
# Remove tracking params
|
|
59
|
+
tracking_params = %w[utm_source utm_medium utm_campaign utm_term utm_content fbclid gclid]
|
|
60
|
+
params = URI.decode_www_form(query).reject { |k, _| tracking_params.include?(k.downcase) }
|
|
61
|
+
return nil if params.empty?
|
|
62
|
+
|
|
63
|
+
URI.encode_www_form(params.sort)
|
|
64
|
+
rescue ArgumentError
|
|
65
|
+
query
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
data/lib/rubycrawl.rb
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'rubycrawl/version'
|
|
4
|
+
require_relative 'rubycrawl/errors'
|
|
5
|
+
require_relative 'rubycrawl/helpers'
|
|
6
|
+
require_relative 'rubycrawl/service_client'
|
|
7
|
+
require_relative 'rubycrawl/url_normalizer'
|
|
8
|
+
require_relative 'rubycrawl/markdown_converter'
|
|
9
|
+
require_relative 'rubycrawl/result'
|
|
10
|
+
require_relative 'rubycrawl/site_crawler'
|
|
11
|
+
require_relative 'rubycrawl/railtie' if defined?(Rails)
|
|
12
|
+
|
|
13
|
+
# RubyCrawl provides a simple interface for crawling pages via a local Playwright service.
|
|
14
|
+
class RubyCrawl
|
|
15
|
+
include Helpers
|
|
16
|
+
|
|
17
|
+
DEFAULT_HOST = '127.0.0.1'
|
|
18
|
+
DEFAULT_PORT = 3344
|
|
19
|
+
|
|
20
|
+
class << self
|
|
21
|
+
def client
|
|
22
|
+
@client ||= new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def crawl(url, **options)
|
|
26
|
+
client.crawl(url, **options)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Crawl multiple pages starting from a URL, following links.
|
|
30
|
+
# Yields each page result to the block as it is crawled.
|
|
31
|
+
#
|
|
32
|
+
# @param url [String] The starting URL
|
|
33
|
+
# @param max_pages [Integer] Maximum number of pages to crawl (default: 50)
|
|
34
|
+
# @param max_depth [Integer] Maximum link depth from start URL (default: 3)
|
|
35
|
+
# @param same_host_only [Boolean] Only follow links on the same host (default: true)
|
|
36
|
+
# @yield [page] Yields each page result as it is crawled
|
|
37
|
+
# @yieldparam page [SiteCrawler::PageResult] The crawled page result
|
|
38
|
+
# @return [Integer] Number of pages crawled
|
|
39
|
+
#
|
|
40
|
+
# @example Save pages to database
|
|
41
|
+
# RubyCrawl.crawl_site("https://example.com", max_pages: 100) do |page|
|
|
42
|
+
# Page.create!(url: page.url, html: page.html, depth: page.depth)
|
|
43
|
+
# end
|
|
44
|
+
def crawl_site(url, ...)
|
|
45
|
+
client.crawl_site(url, ...)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def configure(**options)
|
|
49
|
+
@client = new(**options)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def initialize(**options)
|
|
54
|
+
load_options(options)
|
|
55
|
+
build_service_client
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def crawl(url, wait_until: @wait_until, block_resources: @block_resources, retries: @max_retries)
|
|
59
|
+
validate_url!(url)
|
|
60
|
+
@service_client.ensure_running
|
|
61
|
+
with_retries(retries) do
|
|
62
|
+
payload = build_payload(url, wait_until, block_resources)
|
|
63
|
+
response = @service_client.post_json('/crawl', payload)
|
|
64
|
+
raise_node_error!(response)
|
|
65
|
+
build_result(response)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Crawl multiple pages starting from a URL, following links.
|
|
70
|
+
# @see RubyCrawl.crawl_site
|
|
71
|
+
def crawl_site(url, **options, &block)
|
|
72
|
+
@service_client.ensure_running
|
|
73
|
+
crawler_options = build_crawler_options(options)
|
|
74
|
+
crawler = SiteCrawler.new(self, crawler_options)
|
|
75
|
+
crawler.crawl(url, &block)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def raise_node_error!(response)
|
|
81
|
+
return unless response.is_a?(Hash) && response['error']
|
|
82
|
+
|
|
83
|
+
error_code = response['error']
|
|
84
|
+
error_message = response['message'] || error_code
|
|
85
|
+
raise error_class_for(error_code), error_message_for(error_code, error_message)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def with_retries(retries)
|
|
89
|
+
attempt = 0
|
|
90
|
+
begin
|
|
91
|
+
yield
|
|
92
|
+
rescue ServiceError, TimeoutError => e
|
|
93
|
+
attempt += 1
|
|
94
|
+
raise unless attempt < retries
|
|
95
|
+
|
|
96
|
+
retry_with_backoff(attempt, retries, e)
|
|
97
|
+
retry
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def load_options(options)
|
|
102
|
+
@host = options.fetch(:host, DEFAULT_HOST)
|
|
103
|
+
@port = Integer(options.fetch(:port, DEFAULT_PORT))
|
|
104
|
+
@node_dir = options.fetch(:node_dir, default_node_dir)
|
|
105
|
+
@node_bin = options.fetch(:node_bin, ENV.fetch('RUBYCRAWL_NODE_BIN', nil)) || 'node'
|
|
106
|
+
@node_log = options.fetch(:node_log, ENV.fetch('RUBYCRAWL_NODE_LOG', nil))
|
|
107
|
+
@wait_until = options.fetch(:wait_until, nil)
|
|
108
|
+
@block_resources = options.fetch(:block_resources, nil)
|
|
109
|
+
@max_retries = options.fetch(:max_retries, 3)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def build_service_client
|
|
113
|
+
@service_client = ServiceClient.new(
|
|
114
|
+
host: @host,
|
|
115
|
+
port: @port,
|
|
116
|
+
node_dir: @node_dir,
|
|
117
|
+
node_bin: @node_bin,
|
|
118
|
+
node_log: @node_log
|
|
119
|
+
)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def retry_with_backoff(attempt, retries, error)
|
|
123
|
+
backoff_seconds = 2**attempt
|
|
124
|
+
warn "[rubycrawl] Retry #{attempt}/#{retries - 1} after #{backoff_seconds}s: #{error.message}"
|
|
125
|
+
sleep(backoff_seconds)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def build_crawler_options(options)
|
|
129
|
+
{
|
|
130
|
+
max_pages: options.fetch(:max_pages, 50),
|
|
131
|
+
max_depth: options.fetch(:max_depth, 3),
|
|
132
|
+
same_host_only: options.fetch(:same_host_only, true),
|
|
133
|
+
wait_until: options.fetch(:wait_until, @wait_until),
|
|
134
|
+
block_resources: options.fetch(:block_resources, @block_resources)
|
|
135
|
+
}
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def default_node_dir
|
|
139
|
+
File.expand_path('../node', __dir__)
|
|
140
|
+
end
|
|
141
|
+
end
|
data/node/.gitignore
ADDED
data/node/.npmrc
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
fund=false
|
data/node/README.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# rubycrawl Node Service
|
|
2
|
+
|
|
3
|
+
Local Playwright-backed HTTP service used by the Ruby gem.
|
|
4
|
+
|
|
5
|
+
## Run
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
npm install
|
|
9
|
+
npm start
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Environment
|
|
13
|
+
|
|
14
|
+
Create a `.env` file (or copy from `.env.example`) if you need custom settings.
|
|
15
|
+
|
|
16
|
+
## Endpoints
|
|
17
|
+
|
|
18
|
+
- `POST /crawl` JSON body: `{ "url": "https://example.com" }`
|
|
19
|
+
- `GET /health`
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../playwright/cli.js
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../playwright-core/cli.js
|