rubycrawl 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/Gemfile +11 -0
- data/LICENSE +21 -0
- data/README.md +585 -0
- data/Rakefile +8 -0
- data/bin/console +9 -0
- data/bin/setup +4 -0
- data/lib/rubycrawl/errors.rb +18 -0
- data/lib/rubycrawl/helpers.rb +66 -0
- data/lib/rubycrawl/markdown_converter.rb +37 -0
- data/lib/rubycrawl/railtie.rb +12 -0
- data/lib/rubycrawl/result.rb +40 -0
- data/lib/rubycrawl/service_client.rb +86 -0
- data/lib/rubycrawl/site_crawler.rb +113 -0
- data/lib/rubycrawl/tasks/install.rake +85 -0
- data/lib/rubycrawl/url_normalizer.rb +68 -0
- data/lib/rubycrawl/version.rb +5 -0
- data/lib/rubycrawl.rb +141 -0
- data/node/.gitignore +2 -0
- data/node/.npmrc +1 -0
- data/node/README.md +19 -0
- data/node/node_modules/.bin/playwright +1 -0
- data/node/node_modules/.bin/playwright-core +1 -0
- data/node/node_modules/.package-lock.json +65 -0
- data/node/node_modules/dotenv/CHANGELOG.md +520 -0
- data/node/node_modules/dotenv/LICENSE +23 -0
- data/node/node_modules/dotenv/README-es.md +411 -0
- data/node/node_modules/dotenv/README.md +645 -0
- data/node/node_modules/dotenv/SECURITY.md +1 -0
- data/node/node_modules/dotenv/config.d.ts +1 -0
- data/node/node_modules/dotenv/config.js +9 -0
- data/node/node_modules/dotenv/lib/cli-options.js +17 -0
- data/node/node_modules/dotenv/lib/env-options.js +28 -0
- data/node/node_modules/dotenv/lib/main.d.ts +162 -0
- data/node/node_modules/dotenv/lib/main.js +386 -0
- data/node/node_modules/dotenv/package.json +62 -0
- data/node/node_modules/playwright/LICENSE +202 -0
- data/node/node_modules/playwright/NOTICE +5 -0
- data/node/node_modules/playwright/README.md +168 -0
- data/node/node_modules/playwright/ThirdPartyNotices.txt +5042 -0
- data/node/node_modules/playwright/cli.js +19 -0
- data/node/node_modules/playwright/index.d.ts +17 -0
- data/node/node_modules/playwright/index.js +17 -0
- data/node/node_modules/playwright/index.mjs +18 -0
- data/node/node_modules/playwright/jsx-runtime.js +42 -0
- data/node/node_modules/playwright/jsx-runtime.mjs +21 -0
- data/node/node_modules/playwright/lib/agents/agentParser.js +89 -0
- data/node/node_modules/playwright/lib/agents/copilot-setup-steps.yml +34 -0
- data/node/node_modules/playwright/lib/agents/generateAgents.js +348 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-coverage.prompt.md +31 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-generate.prompt.md +8 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-generator.agent.md +88 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-heal.prompt.md +6 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-healer.agent.md +55 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-plan.prompt.md +9 -0
- data/node/node_modules/playwright/lib/agents/playwright-test-planner.agent.md +73 -0
- data/node/node_modules/playwright/lib/common/config.js +282 -0
- data/node/node_modules/playwright/lib/common/configLoader.js +344 -0
- data/node/node_modules/playwright/lib/common/esmLoaderHost.js +104 -0
- data/node/node_modules/playwright/lib/common/expectBundle.js +28 -0
- data/node/node_modules/playwright/lib/common/expectBundleImpl.js +407 -0
- data/node/node_modules/playwright/lib/common/fixtures.js +302 -0
- data/node/node_modules/playwright/lib/common/globals.js +58 -0
- data/node/node_modules/playwright/lib/common/ipc.js +60 -0
- data/node/node_modules/playwright/lib/common/poolBuilder.js +85 -0
- data/node/node_modules/playwright/lib/common/process.js +132 -0
- data/node/node_modules/playwright/lib/common/suiteUtils.js +140 -0
- data/node/node_modules/playwright/lib/common/test.js +321 -0
- data/node/node_modules/playwright/lib/common/testLoader.js +101 -0
- data/node/node_modules/playwright/lib/common/testType.js +298 -0
- data/node/node_modules/playwright/lib/common/validators.js +68 -0
- data/node/node_modules/playwright/lib/fsWatcher.js +67 -0
- data/node/node_modules/playwright/lib/index.js +726 -0
- data/node/node_modules/playwright/lib/internalsForTest.js +42 -0
- data/node/node_modules/playwright/lib/isomorphic/events.js +77 -0
- data/node/node_modules/playwright/lib/isomorphic/folders.js +30 -0
- data/node/node_modules/playwright/lib/isomorphic/stringInternPool.js +69 -0
- data/node/node_modules/playwright/lib/isomorphic/teleReceiver.js +521 -0
- data/node/node_modules/playwright/lib/isomorphic/teleSuiteUpdater.js +157 -0
- data/node/node_modules/playwright/lib/isomorphic/testServerConnection.js +225 -0
- data/node/node_modules/playwright/lib/isomorphic/testServerInterface.js +16 -0
- data/node/node_modules/playwright/lib/isomorphic/testTree.js +329 -0
- data/node/node_modules/playwright/lib/isomorphic/types.d.js +16 -0
- data/node/node_modules/playwright/lib/loader/loaderMain.js +59 -0
- data/node/node_modules/playwright/lib/matchers/expect.js +311 -0
- data/node/node_modules/playwright/lib/matchers/matcherHint.js +44 -0
- data/node/node_modules/playwright/lib/matchers/matchers.js +383 -0
- data/node/node_modules/playwright/lib/matchers/toBeTruthy.js +75 -0
- data/node/node_modules/playwright/lib/matchers/toEqual.js +100 -0
- data/node/node_modules/playwright/lib/matchers/toHaveURL.js +101 -0
- data/node/node_modules/playwright/lib/matchers/toMatchAriaSnapshot.js +159 -0
- data/node/node_modules/playwright/lib/matchers/toMatchSnapshot.js +342 -0
- data/node/node_modules/playwright/lib/matchers/toMatchText.js +99 -0
- data/node/node_modules/playwright/lib/mcp/browser/browserContextFactory.js +329 -0
- data/node/node_modules/playwright/lib/mcp/browser/browserServerBackend.js +84 -0
- data/node/node_modules/playwright/lib/mcp/browser/config.js +421 -0
- data/node/node_modules/playwright/lib/mcp/browser/context.js +244 -0
- data/node/node_modules/playwright/lib/mcp/browser/response.js +278 -0
- data/node/node_modules/playwright/lib/mcp/browser/sessionLog.js +75 -0
- data/node/node_modules/playwright/lib/mcp/browser/tab.js +343 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/common.js +65 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/console.js +46 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/dialogs.js +60 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/evaluate.js +61 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/files.js +58 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/form.js +63 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/install.js +72 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +107 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/mouse.js +107 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/navigate.js +71 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/network.js +63 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/open.js +57 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/pdf.js +49 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/runCode.js +78 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/screenshot.js +93 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +173 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/tabs.js +67 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/tool.js +47 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/tracing.js +74 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/utils.js +94 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/verify.js +143 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools/wait.js +63 -0
- data/node/node_modules/playwright/lib/mcp/browser/tools.js +84 -0
- data/node/node_modules/playwright/lib/mcp/browser/watchdog.js +44 -0
- data/node/node_modules/playwright/lib/mcp/config.d.js +16 -0
- data/node/node_modules/playwright/lib/mcp/extension/cdpRelay.js +351 -0
- data/node/node_modules/playwright/lib/mcp/extension/extensionContextFactory.js +76 -0
- data/node/node_modules/playwright/lib/mcp/extension/protocol.js +28 -0
- data/node/node_modules/playwright/lib/mcp/index.js +61 -0
- data/node/node_modules/playwright/lib/mcp/log.js +35 -0
- data/node/node_modules/playwright/lib/mcp/program.js +111 -0
- data/node/node_modules/playwright/lib/mcp/sdk/exports.js +28 -0
- data/node/node_modules/playwright/lib/mcp/sdk/http.js +152 -0
- data/node/node_modules/playwright/lib/mcp/sdk/inProcessTransport.js +71 -0
- data/node/node_modules/playwright/lib/mcp/sdk/server.js +223 -0
- data/node/node_modules/playwright/lib/mcp/sdk/tool.js +47 -0
- data/node/node_modules/playwright/lib/mcp/terminal/cli.js +296 -0
- data/node/node_modules/playwright/lib/mcp/terminal/command.js +56 -0
- data/node/node_modules/playwright/lib/mcp/terminal/commands.js +333 -0
- data/node/node_modules/playwright/lib/mcp/terminal/daemon.js +129 -0
- data/node/node_modules/playwright/lib/mcp/terminal/help.json +32 -0
- data/node/node_modules/playwright/lib/mcp/terminal/helpGenerator.js +88 -0
- data/node/node_modules/playwright/lib/mcp/terminal/socketConnection.js +80 -0
- data/node/node_modules/playwright/lib/mcp/test/browserBackend.js +98 -0
- data/node/node_modules/playwright/lib/mcp/test/generatorTools.js +122 -0
- data/node/node_modules/playwright/lib/mcp/test/plannerTools.js +145 -0
- data/node/node_modules/playwright/lib/mcp/test/seed.js +82 -0
- data/node/node_modules/playwright/lib/mcp/test/streams.js +44 -0
- data/node/node_modules/playwright/lib/mcp/test/testBackend.js +99 -0
- data/node/node_modules/playwright/lib/mcp/test/testContext.js +285 -0
- data/node/node_modules/playwright/lib/mcp/test/testTool.js +30 -0
- data/node/node_modules/playwright/lib/mcp/test/testTools.js +108 -0
- data/node/node_modules/playwright/lib/plugins/gitCommitInfoPlugin.js +198 -0
- data/node/node_modules/playwright/lib/plugins/index.js +28 -0
- data/node/node_modules/playwright/lib/plugins/webServerPlugin.js +237 -0
- data/node/node_modules/playwright/lib/program.js +417 -0
- data/node/node_modules/playwright/lib/reporters/base.js +634 -0
- data/node/node_modules/playwright/lib/reporters/blob.js +138 -0
- data/node/node_modules/playwright/lib/reporters/dot.js +99 -0
- data/node/node_modules/playwright/lib/reporters/empty.js +32 -0
- data/node/node_modules/playwright/lib/reporters/github.js +128 -0
- data/node/node_modules/playwright/lib/reporters/html.js +633 -0
- data/node/node_modules/playwright/lib/reporters/internalReporter.js +138 -0
- data/node/node_modules/playwright/lib/reporters/json.js +254 -0
- data/node/node_modules/playwright/lib/reporters/junit.js +232 -0
- data/node/node_modules/playwright/lib/reporters/line.js +131 -0
- data/node/node_modules/playwright/lib/reporters/list.js +253 -0
- data/node/node_modules/playwright/lib/reporters/listModeReporter.js +69 -0
- data/node/node_modules/playwright/lib/reporters/markdown.js +144 -0
- data/node/node_modules/playwright/lib/reporters/merge.js +558 -0
- data/node/node_modules/playwright/lib/reporters/multiplexer.js +112 -0
- data/node/node_modules/playwright/lib/reporters/reporterV2.js +102 -0
- data/node/node_modules/playwright/lib/reporters/teleEmitter.js +317 -0
- data/node/node_modules/playwright/lib/reporters/versions/blobV1.js +16 -0
- data/node/node_modules/playwright/lib/runner/dispatcher.js +530 -0
- data/node/node_modules/playwright/lib/runner/failureTracker.js +72 -0
- data/node/node_modules/playwright/lib/runner/lastRun.js +77 -0
- data/node/node_modules/playwright/lib/runner/loadUtils.js +334 -0
- data/node/node_modules/playwright/lib/runner/loaderHost.js +89 -0
- data/node/node_modules/playwright/lib/runner/processHost.js +180 -0
- data/node/node_modules/playwright/lib/runner/projectUtils.js +241 -0
- data/node/node_modules/playwright/lib/runner/rebase.js +189 -0
- data/node/node_modules/playwright/lib/runner/reporters.js +138 -0
- data/node/node_modules/playwright/lib/runner/sigIntWatcher.js +96 -0
- data/node/node_modules/playwright/lib/runner/storage.js +91 -0
- data/node/node_modules/playwright/lib/runner/taskRunner.js +127 -0
- data/node/node_modules/playwright/lib/runner/tasks.js +410 -0
- data/node/node_modules/playwright/lib/runner/testGroups.js +125 -0
- data/node/node_modules/playwright/lib/runner/testRunner.js +398 -0
- data/node/node_modules/playwright/lib/runner/testServer.js +269 -0
- data/node/node_modules/playwright/lib/runner/uiModeReporter.js +30 -0
- data/node/node_modules/playwright/lib/runner/vcs.js +72 -0
- data/node/node_modules/playwright/lib/runner/watchMode.js +396 -0
- data/node/node_modules/playwright/lib/runner/workerHost.js +104 -0
- data/node/node_modules/playwright/lib/third_party/pirates.js +62 -0
- data/node/node_modules/playwright/lib/third_party/tsconfig-loader.js +103 -0
- data/node/node_modules/playwright/lib/transform/babelBundle.js +46 -0
- data/node/node_modules/playwright/lib/transform/babelBundleImpl.js +461 -0
- data/node/node_modules/playwright/lib/transform/compilationCache.js +274 -0
- data/node/node_modules/playwright/lib/transform/esmLoader.js +103 -0
- data/node/node_modules/playwright/lib/transform/md.js +221 -0
- data/node/node_modules/playwright/lib/transform/portTransport.js +67 -0
- data/node/node_modules/playwright/lib/transform/transform.js +303 -0
- data/node/node_modules/playwright/lib/util.js +400 -0
- data/node/node_modules/playwright/lib/utilsBundle.js +50 -0
- data/node/node_modules/playwright/lib/utilsBundleImpl.js +103 -0
- data/node/node_modules/playwright/lib/worker/fixtureRunner.js +262 -0
- data/node/node_modules/playwright/lib/worker/testInfo.js +536 -0
- data/node/node_modules/playwright/lib/worker/testTracing.js +345 -0
- data/node/node_modules/playwright/lib/worker/timeoutManager.js +174 -0
- data/node/node_modules/playwright/lib/worker/util.js +31 -0
- data/node/node_modules/playwright/lib/worker/workerMain.js +530 -0
- data/node/node_modules/playwright/package.json +72 -0
- data/node/node_modules/playwright/test.d.ts +18 -0
- data/node/node_modules/playwright/test.js +24 -0
- data/node/node_modules/playwright/test.mjs +34 -0
- data/node/node_modules/playwright/types/test.d.ts +10251 -0
- data/node/node_modules/playwright/types/testReporter.d.ts +822 -0
- data/node/node_modules/playwright-core/LICENSE +202 -0
- data/node/node_modules/playwright-core/NOTICE +5 -0
- data/node/node_modules/playwright-core/README.md +3 -0
- data/node/node_modules/playwright-core/ThirdPartyNotices.txt +4076 -0
- data/node/node_modules/playwright-core/bin/install_media_pack.ps1 +5 -0
- data/node/node_modules/playwright-core/bin/install_webkit_wsl.ps1 +33 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_linux.sh +42 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_mac.sh +13 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_win.ps1 +24 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_linux.sh +42 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_mac.sh +12 -0
- data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_win.ps1 +24 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_linux.sh +48 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_mac.sh +11 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_win.ps1 +23 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_linux.sh +48 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_mac.sh +11 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_win.ps1 +23 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_linux.sh +48 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_mac.sh +11 -0
- data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_win.ps1 +24 -0
- data/node/node_modules/playwright-core/browsers.json +79 -0
- data/node/node_modules/playwright-core/cli.js +18 -0
- data/node/node_modules/playwright-core/index.d.ts +17 -0
- data/node/node_modules/playwright-core/index.js +32 -0
- data/node/node_modules/playwright-core/index.mjs +28 -0
- data/node/node_modules/playwright-core/lib/androidServerImpl.js +65 -0
- data/node/node_modules/playwright-core/lib/browserServerImpl.js +120 -0
- data/node/node_modules/playwright-core/lib/cli/driver.js +97 -0
- data/node/node_modules/playwright-core/lib/cli/program.js +589 -0
- data/node/node_modules/playwright-core/lib/cli/programWithTestStub.js +74 -0
- data/node/node_modules/playwright-core/lib/client/android.js +361 -0
- data/node/node_modules/playwright-core/lib/client/api.js +137 -0
- data/node/node_modules/playwright-core/lib/client/artifact.js +79 -0
- data/node/node_modules/playwright-core/lib/client/browser.js +161 -0
- data/node/node_modules/playwright-core/lib/client/browserContext.js +582 -0
- data/node/node_modules/playwright-core/lib/client/browserType.js +185 -0
- data/node/node_modules/playwright-core/lib/client/cdpSession.js +51 -0
- data/node/node_modules/playwright-core/lib/client/channelOwner.js +194 -0
- data/node/node_modules/playwright-core/lib/client/clientHelper.js +64 -0
- data/node/node_modules/playwright-core/lib/client/clientInstrumentation.js +55 -0
- data/node/node_modules/playwright-core/lib/client/clientStackTrace.js +69 -0
- data/node/node_modules/playwright-core/lib/client/clock.js +68 -0
- data/node/node_modules/playwright-core/lib/client/connection.js +318 -0
- data/node/node_modules/playwright-core/lib/client/consoleMessage.js +58 -0
- data/node/node_modules/playwright-core/lib/client/coverage.js +44 -0
- data/node/node_modules/playwright-core/lib/client/dialog.js +56 -0
- data/node/node_modules/playwright-core/lib/client/download.js +62 -0
- data/node/node_modules/playwright-core/lib/client/electron.js +138 -0
- data/node/node_modules/playwright-core/lib/client/elementHandle.js +284 -0
- data/node/node_modules/playwright-core/lib/client/errors.js +77 -0
- data/node/node_modules/playwright-core/lib/client/eventEmitter.js +314 -0
- data/node/node_modules/playwright-core/lib/client/events.js +103 -0
- data/node/node_modules/playwright-core/lib/client/fetch.js +368 -0
- data/node/node_modules/playwright-core/lib/client/fileChooser.js +46 -0
- data/node/node_modules/playwright-core/lib/client/fileUtils.js +34 -0
- data/node/node_modules/playwright-core/lib/client/frame.js +409 -0
- data/node/node_modules/playwright-core/lib/client/harRouter.js +87 -0
- data/node/node_modules/playwright-core/lib/client/input.js +84 -0
- data/node/node_modules/playwright-core/lib/client/jsHandle.js +109 -0
- data/node/node_modules/playwright-core/lib/client/jsonPipe.js +39 -0
- data/node/node_modules/playwright-core/lib/client/localUtils.js +60 -0
- data/node/node_modules/playwright-core/lib/client/locator.js +369 -0
- data/node/node_modules/playwright-core/lib/client/network.js +747 -0
- data/node/node_modules/playwright-core/lib/client/page.js +745 -0
- data/node/node_modules/playwright-core/lib/client/pageAgent.js +64 -0
- data/node/node_modules/playwright-core/lib/client/platform.js +77 -0
- data/node/node_modules/playwright-core/lib/client/playwright.js +71 -0
- data/node/node_modules/playwright-core/lib/client/selectors.js +55 -0
- data/node/node_modules/playwright-core/lib/client/stream.js +39 -0
- data/node/node_modules/playwright-core/lib/client/timeoutSettings.js +79 -0
- data/node/node_modules/playwright-core/lib/client/tracing.js +119 -0
- data/node/node_modules/playwright-core/lib/client/types.js +28 -0
- data/node/node_modules/playwright-core/lib/client/video.js +59 -0
- data/node/node_modules/playwright-core/lib/client/waiter.js +142 -0
- data/node/node_modules/playwright-core/lib/client/webError.js +39 -0
- data/node/node_modules/playwright-core/lib/client/webSocket.js +93 -0
- data/node/node_modules/playwright-core/lib/client/worker.js +85 -0
- data/node/node_modules/playwright-core/lib/client/writableStream.js +39 -0
- data/node/node_modules/playwright-core/lib/generated/bindingsControllerSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/clockSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/injectedScriptSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/pollingRecorderSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/storageScriptSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/utilityScriptSource.js +28 -0
- data/node/node_modules/playwright-core/lib/generated/webSocketMockSource.js +336 -0
- data/node/node_modules/playwright-core/lib/inProcessFactory.js +60 -0
- data/node/node_modules/playwright-core/lib/inprocess.js +3 -0
- data/node/node_modules/playwright-core/lib/mcpBundle.js +84 -0
- data/node/node_modules/playwright-core/lib/mcpBundleImpl/index.js +147 -0
- data/node/node_modules/playwright-core/lib/outofprocess.js +76 -0
- data/node/node_modules/playwright-core/lib/protocol/serializers.js +197 -0
- data/node/node_modules/playwright-core/lib/protocol/validator.js +2969 -0
- data/node/node_modules/playwright-core/lib/protocol/validatorPrimitives.js +193 -0
- data/node/node_modules/playwright-core/lib/remote/playwrightConnection.js +129 -0
- data/node/node_modules/playwright-core/lib/remote/playwrightServer.js +334 -0
- data/node/node_modules/playwright-core/lib/server/agent/actionRunner.js +335 -0
- data/node/node_modules/playwright-core/lib/server/agent/actions.js +128 -0
- data/node/node_modules/playwright-core/lib/server/agent/codegen.js +111 -0
- data/node/node_modules/playwright-core/lib/server/agent/context.js +150 -0
- data/node/node_modules/playwright-core/lib/server/agent/expectTools.js +156 -0
- data/node/node_modules/playwright-core/lib/server/agent/pageAgent.js +204 -0
- data/node/node_modules/playwright-core/lib/server/agent/performTools.js +262 -0
- data/node/node_modules/playwright-core/lib/server/agent/tool.js +109 -0
- data/node/node_modules/playwright-core/lib/server/android/android.js +465 -0
- data/node/node_modules/playwright-core/lib/server/android/backendAdb.js +177 -0
- data/node/node_modules/playwright-core/lib/server/artifact.js +127 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiBrowser.js +549 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiChromium.js +148 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiConnection.js +213 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiDeserializer.js +116 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiExecutionContext.js +267 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiFirefox.js +128 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiInput.js +146 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiNetworkManager.js +383 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiOverCdp.js +102 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiPage.js +583 -0
- data/node/node_modules/playwright-core/lib/server/bidi/bidiPdf.js +106 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiCommands.d.js +22 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiKeyboard.js +256 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocol.js +24 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocolCore.js +180 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocolPermissions.js +42 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiSerializer.js +148 -0
- data/node/node_modules/playwright-core/lib/server/bidi/third_party/firefoxPrefs.js +259 -0
- data/node/node_modules/playwright-core/lib/server/browser.js +149 -0
- data/node/node_modules/playwright-core/lib/server/browserContext.js +702 -0
- data/node/node_modules/playwright-core/lib/server/browserType.js +336 -0
- data/node/node_modules/playwright-core/lib/server/callLog.js +82 -0
- data/node/node_modules/playwright-core/lib/server/chromium/appIcon.png +0 -0
- data/node/node_modules/playwright-core/lib/server/chromium/chromium.js +395 -0
- data/node/node_modules/playwright-core/lib/server/chromium/chromiumSwitches.js +104 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crBrowser.js +511 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crConnection.js +197 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crCoverage.js +235 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crDevTools.js +111 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crDragDrop.js +131 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crExecutionContext.js +146 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crInput.js +187 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crNetworkManager.js +707 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crPage.js +1001 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crPdf.js +121 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crProtocolHelper.js +145 -0
- data/node/node_modules/playwright-core/lib/server/chromium/crServiceWorker.js +136 -0
- data/node/node_modules/playwright-core/lib/server/chromium/defaultFontFamilies.js +162 -0
- data/node/node_modules/playwright-core/lib/server/chromium/protocol.d.js +16 -0
- data/node/node_modules/playwright-core/lib/server/clock.js +149 -0
- data/node/node_modules/playwright-core/lib/server/codegen/csharp.js +327 -0
- data/node/node_modules/playwright-core/lib/server/codegen/java.js +274 -0
- data/node/node_modules/playwright-core/lib/server/codegen/javascript.js +247 -0
- data/node/node_modules/playwright-core/lib/server/codegen/jsonl.js +52 -0
- data/node/node_modules/playwright-core/lib/server/codegen/language.js +132 -0
- data/node/node_modules/playwright-core/lib/server/codegen/languages.js +68 -0
- data/node/node_modules/playwright-core/lib/server/codegen/python.js +279 -0
- data/node/node_modules/playwright-core/lib/server/codegen/types.js +16 -0
- data/node/node_modules/playwright-core/lib/server/console.js +57 -0
- data/node/node_modules/playwright-core/lib/server/cookieStore.js +206 -0
- data/node/node_modules/playwright-core/lib/server/debugController.js +191 -0
- data/node/node_modules/playwright-core/lib/server/debugger.js +119 -0
- data/node/node_modules/playwright-core/lib/server/deviceDescriptors.js +39 -0
- data/node/node_modules/playwright-core/lib/server/deviceDescriptorsSource.json +1779 -0
- data/node/node_modules/playwright-core/lib/server/dialog.js +116 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/androidDispatcher.js +325 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/artifactDispatcher.js +118 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/browserContextDispatcher.js +384 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/browserDispatcher.js +118 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/browserTypeDispatcher.js +64 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/cdpSessionDispatcher.js +44 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/debugControllerDispatcher.js +78 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/dialogDispatcher.js +47 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/dispatcher.js +364 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/electronDispatcher.js +89 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/elementHandlerDispatcher.js +181 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/frameDispatcher.js +227 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/jsHandleDispatcher.js +85 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/jsonPipeDispatcher.js +58 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/localUtilsDispatcher.js +149 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/networkDispatchers.js +213 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/pageAgentDispatcher.js +96 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/pageDispatcher.js +393 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/playwrightDispatcher.js +108 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/streamDispatcher.js +67 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/tracingDispatcher.js +68 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/webSocketRouteDispatcher.js +165 -0
- data/node/node_modules/playwright-core/lib/server/dispatchers/writableStreamDispatcher.js +79 -0
- data/node/node_modules/playwright-core/lib/server/dom.js +815 -0
- data/node/node_modules/playwright-core/lib/server/download.js +70 -0
- data/node/node_modules/playwright-core/lib/server/electron/electron.js +273 -0
- data/node/node_modules/playwright-core/lib/server/electron/loader.js +29 -0
- data/node/node_modules/playwright-core/lib/server/errors.js +69 -0
- data/node/node_modules/playwright-core/lib/server/fetch.js +621 -0
- data/node/node_modules/playwright-core/lib/server/fileChooser.js +43 -0
- data/node/node_modules/playwright-core/lib/server/fileUploadUtils.js +84 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffBrowser.js +418 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffConnection.js +142 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffExecutionContext.js +150 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffInput.js +159 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffNetworkManager.js +256 -0
- data/node/node_modules/playwright-core/lib/server/firefox/ffPage.js +497 -0
- data/node/node_modules/playwright-core/lib/server/firefox/firefox.js +114 -0
- data/node/node_modules/playwright-core/lib/server/firefox/protocol.d.js +16 -0
- data/node/node_modules/playwright-core/lib/server/formData.js +147 -0
- data/node/node_modules/playwright-core/lib/server/frameSelectors.js +160 -0
- data/node/node_modules/playwright-core/lib/server/frames.js +1471 -0
- data/node/node_modules/playwright-core/lib/server/har/harRecorder.js +147 -0
- data/node/node_modules/playwright-core/lib/server/har/harTracer.js +607 -0
- data/node/node_modules/playwright-core/lib/server/harBackend.js +157 -0
- data/node/node_modules/playwright-core/lib/server/helper.js +96 -0
- data/node/node_modules/playwright-core/lib/server/index.js +58 -0
- data/node/node_modules/playwright-core/lib/server/input.js +277 -0
- data/node/node_modules/playwright-core/lib/server/instrumentation.js +72 -0
- data/node/node_modules/playwright-core/lib/server/javascript.js +291 -0
- data/node/node_modules/playwright-core/lib/server/launchApp.js +128 -0
- data/node/node_modules/playwright-core/lib/server/localUtils.js +214 -0
- data/node/node_modules/playwright-core/lib/server/macEditingCommands.js +143 -0
- data/node/node_modules/playwright-core/lib/server/network.js +667 -0
- data/node/node_modules/playwright-core/lib/server/page.js +830 -0
- data/node/node_modules/playwright-core/lib/server/pipeTransport.js +89 -0
- data/node/node_modules/playwright-core/lib/server/playwright.js +69 -0
- data/node/node_modules/playwright-core/lib/server/progress.js +132 -0
- data/node/node_modules/playwright-core/lib/server/protocolError.js +52 -0
- data/node/node_modules/playwright-core/lib/server/recorder/chat.js +161 -0
- data/node/node_modules/playwright-core/lib/server/recorder/recorderApp.js +366 -0
- data/node/node_modules/playwright-core/lib/server/recorder/recorderRunner.js +138 -0
- data/node/node_modules/playwright-core/lib/server/recorder/recorderSignalProcessor.js +83 -0
- data/node/node_modules/playwright-core/lib/server/recorder/recorderUtils.js +157 -0
- data/node/node_modules/playwright-core/lib/server/recorder/throttledFile.js +57 -0
- data/node/node_modules/playwright-core/lib/server/recorder.js +499 -0
- data/node/node_modules/playwright-core/lib/server/registry/browserFetcher.js +177 -0
- data/node/node_modules/playwright-core/lib/server/registry/dependencies.js +371 -0
- data/node/node_modules/playwright-core/lib/server/registry/index.js +1422 -0
- data/node/node_modules/playwright-core/lib/server/registry/nativeDeps.js +1280 -0
- data/node/node_modules/playwright-core/lib/server/registry/oopDownloadBrowserMain.js +127 -0
- data/node/node_modules/playwright-core/lib/server/screencast.js +190 -0
- data/node/node_modules/playwright-core/lib/server/screenshotter.js +333 -0
- data/node/node_modules/playwright-core/lib/server/selectors.js +112 -0
- data/node/node_modules/playwright-core/lib/server/socksClientCertificatesInterceptor.js +383 -0
- data/node/node_modules/playwright-core/lib/server/socksInterceptor.js +95 -0
- data/node/node_modules/playwright-core/lib/server/trace/recorder/snapshotter.js +147 -0
- data/node/node_modules/playwright-core/lib/server/trace/recorder/snapshotterInjected.js +561 -0
- data/node/node_modules/playwright-core/lib/server/trace/recorder/tracing.js +604 -0
- data/node/node_modules/playwright-core/lib/server/trace/viewer/traceParser.js +72 -0
- data/node/node_modules/playwright-core/lib/server/trace/viewer/traceViewer.js +245 -0
- data/node/node_modules/playwright-core/lib/server/transport.js +181 -0
- data/node/node_modules/playwright-core/lib/server/types.js +28 -0
- data/node/node_modules/playwright-core/lib/server/usKeyboardLayout.js +145 -0
- data/node/node_modules/playwright-core/lib/server/utils/ascii.js +44 -0
- data/node/node_modules/playwright-core/lib/server/utils/comparators.js +139 -0
- data/node/node_modules/playwright-core/lib/server/utils/crypto.js +216 -0
- data/node/node_modules/playwright-core/lib/server/utils/debug.js +42 -0
- data/node/node_modules/playwright-core/lib/server/utils/debugLogger.js +122 -0
- data/node/node_modules/playwright-core/lib/server/utils/env.js +73 -0
- data/node/node_modules/playwright-core/lib/server/utils/eventsHelper.js +39 -0
- data/node/node_modules/playwright-core/lib/server/utils/expectUtils.js +123 -0
- data/node/node_modules/playwright-core/lib/server/utils/fileUtils.js +191 -0
- data/node/node_modules/playwright-core/lib/server/utils/happyEyeballs.js +207 -0
- data/node/node_modules/playwright-core/lib/server/utils/hostPlatform.js +123 -0
- data/node/node_modules/playwright-core/lib/server/utils/httpServer.js +203 -0
- data/node/node_modules/playwright-core/lib/server/utils/imageUtils.js +141 -0
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/colorUtils.js +89 -0
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/compare.js +109 -0
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/imageChannel.js +78 -0
- data/node/node_modules/playwright-core/lib/server/utils/image_tools/stats.js +102 -0
- data/node/node_modules/playwright-core/lib/server/utils/linuxUtils.js +71 -0
- data/node/node_modules/playwright-core/lib/server/utils/network.js +242 -0
- data/node/node_modules/playwright-core/lib/server/utils/nodePlatform.js +154 -0
- data/node/node_modules/playwright-core/lib/server/utils/pipeTransport.js +84 -0
- data/node/node_modules/playwright-core/lib/server/utils/processLauncher.js +241 -0
- data/node/node_modules/playwright-core/lib/server/utils/profiler.js +65 -0
- data/node/node_modules/playwright-core/lib/server/utils/socksProxy.js +511 -0
- data/node/node_modules/playwright-core/lib/server/utils/spawnAsync.js +41 -0
- data/node/node_modules/playwright-core/lib/server/utils/task.js +51 -0
- data/node/node_modules/playwright-core/lib/server/utils/userAgent.js +98 -0
- data/node/node_modules/playwright-core/lib/server/utils/wsServer.js +121 -0
- data/node/node_modules/playwright-core/lib/server/utils/zipFile.js +74 -0
- data/node/node_modules/playwright-core/lib/server/utils/zones.js +57 -0
- data/node/node_modules/playwright-core/lib/server/videoRecorder.js +124 -0
- data/node/node_modules/playwright-core/lib/server/webkit/protocol.d.js +16 -0
- data/node/node_modules/playwright-core/lib/server/webkit/webkit.js +108 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkBrowser.js +335 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkConnection.js +144 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkExecutionContext.js +154 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkInput.js +181 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkInterceptableRequest.js +197 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkPage.js +1158 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkProvisionalPage.js +83 -0
- data/node/node_modules/playwright-core/lib/server/webkit/wkWorkers.js +105 -0
- data/node/node_modules/playwright-core/lib/third_party/pixelmatch.js +255 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/ariaSnapshot.js +455 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/assert.js +31 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/colors.js +72 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/cssParser.js +245 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/cssTokenizer.js +1051 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/headers.js +53 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorGenerators.js +689 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorParser.js +176 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorUtils.js +81 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/lruCache.js +51 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/manualPromise.js +114 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/mimeType.js +459 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/multimap.js +80 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/protocolFormatter.js +81 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/protocolMetainfo.js +330 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/rtti.js +43 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/selectorParser.js +386 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/semaphore.js +54 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/stackTrace.js +158 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/stringUtils.js +204 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/time.js +49 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/timeoutRunner.js +66 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/entries.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotRenderer.js +499 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotServer.js +120 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotStorage.js +89 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceLoader.js +131 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceModel.js +365 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceModernizer.js +400 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV3.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV4.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV5.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV6.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV7.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV8.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/traceUtils.js +58 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/types.js +16 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/urlMatch.js +190 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/utilityScriptSerializers.js +251 -0
- data/node/node_modules/playwright-core/lib/utils/isomorphic/yaml.js +84 -0
- data/node/node_modules/playwright-core/lib/utils.js +111 -0
- data/node/node_modules/playwright-core/lib/utilsBundle.js +109 -0
- data/node/node_modules/playwright-core/lib/utilsBundleImpl/index.js +218 -0
- data/node/node_modules/playwright-core/lib/utilsBundleImpl/xdg-open +1066 -0
- data/node/node_modules/playwright-core/lib/vite/htmlReport/index.html +84 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/codeMirrorModule-DYBRYzYX.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/codeMirrorModule-DadYNm1I.js +32 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/codicon-DCmgc-ay.ttf +0 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/index-BSjZa4pk.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/assets/index-BhTWtUlo.js +193 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/index.html +29 -0
- data/node/node_modules/playwright-core/lib/vite/recorder/playwright-logo.svg +9 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/codeMirrorModule-a5XoALAZ.js +32 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/defaultSettingsView-CJSZINFr.js +266 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/xtermModule-CsJ4vdCR.js +9 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/codeMirrorModule.DYBRYzYX.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/codicon.DCmgc-ay.ttf +0 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/defaultSettingsView.7ch9cixO.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/index.BVu7tZDe.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/index.Bk2uYQRV.js +2 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/index.html +43 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/manifest.webmanifest +16 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/playwright-logo.svg +9 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/snapshot.html +21 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/sw.bundle.js +5 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.Btcz36p_.css +1 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.CQJ9SCIQ.js +5 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.html +17 -0
- data/node/node_modules/playwright-core/lib/vite/traceViewer/xtermModule.DYP7pi_n.css +32 -0
- data/node/node_modules/playwright-core/lib/zipBundle.js +34 -0
- data/node/node_modules/playwright-core/lib/zipBundleImpl.js +5 -0
- data/node/node_modules/playwright-core/package.json +43 -0
- data/node/node_modules/playwright-core/types/protocol.d.ts +23824 -0
- data/node/node_modules/playwright-core/types/structs.d.ts +45 -0
- data/node/node_modules/playwright-core/types/types.d.ts +22843 -0
- data/node/package-lock.json +72 -0
- data/node/package.json +14 -0
- data/node/src/index.js +215 -0
- data/rubycrawl.gemspec +29 -0
- data/spec/rubycrawl_spec.rb +51 -0
- data/spec/spec_helper.rb +11 -0
- metadata +645 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: f463d9ba6ffa83c283954dd411a08dc0184ed065128f57da625b9c349447b77a
|
|
4
|
+
data.tar.gz: cc8adb28596fe65e54f18ec97d83152c9a8df6d38c3cf584c359f2a2230e6048
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 98c20cc8a1ff17df7a830e93f6aa49e5c630c7d43533e516648b4c4fdc301c7e733ab9aba6502d6de7bb5b5f1afe40f037d4fad59e77051322780dba5c575fa2
|
|
7
|
+
data.tar.gz: a789dea3bfbd3c63dc8d364da49b38904675d1311dc143dacbb0cf58631a0e8d59d8b3484148dde85b6efb03f8ec3caf6026e209239486219c7f55a7c955ff5c
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 RubyCrawl
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,585 @@
|
|
|
1
|
+
# rubycrawl
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/rb/rubycrawl)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
**Playwright-based web crawler for Ruby** — Inspired by [crawl4ai](https://github.com/unclecode/crawl4ai) (Python), designed idiomatically for Ruby with production-ready features.
|
|
7
|
+
|
|
8
|
+
RubyCrawl provides accurate, JavaScript-enabled web scraping using Playwright's battle-tested browser automation, wrapped in a clean Ruby API. Perfect for extracting content from modern SPAs and dynamic websites.
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
- **Playwright-powered**: Real browser automation for JavaScript-heavy sites
|
|
13
|
+
- **Production-ready**: Designed for Rails apps and production environments
|
|
14
|
+
- **Simple API**: Clean, minimal Ruby interface — zero Playwright knowledge required
|
|
15
|
+
- **Resource optimization**: Built-in resource blocking for faster crawls
|
|
16
|
+
- **Auto-managed browsers**: Browser process reuse and automatic lifecycle management
|
|
17
|
+
- **Content extraction**: HTML, links, and Markdown conversion
|
|
18
|
+
- **Multi-page crawling**: BFS crawler with depth limits and deduplication
|
|
19
|
+
- **Rails integration**: First-class Rails support with generators and initializers
|
|
20
|
+
|
|
21
|
+
## Table of Contents
|
|
22
|
+
|
|
23
|
+
- [Installation](#installation)
|
|
24
|
+
- [Quick Start](#quick-start)
|
|
25
|
+
- [Usage](#usage)
|
|
26
|
+
- [Basic Crawling](#basic-crawling)
|
|
27
|
+
- [Multi-Page Crawling](#multi-page-crawling)
|
|
28
|
+
- [Configuration](#configuration)
|
|
29
|
+
- [Result Object](#result-object)
|
|
30
|
+
- [Rails Integration](#rails-integration)
|
|
31
|
+
- [Production Deployment](#production-deployment)
|
|
32
|
+
- [Architecture](#architecture)
|
|
33
|
+
- [Performance](#performance)
|
|
34
|
+
- [Development](#development)
|
|
35
|
+
- [Contributing](#contributing)
|
|
36
|
+
- [License](#license)
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
### Requirements
|
|
41
|
+
|
|
42
|
+
- **Ruby** >= 3.0
|
|
43
|
+
- **Node.js** LTS (v18+ recommended) — required for the bundled Playwright service
|
|
44
|
+
|
|
45
|
+
### Add to Gemfile
|
|
46
|
+
|
|
47
|
+
```ruby
|
|
48
|
+
gem "rubycrawl"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Then install:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
bundle install
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Install Playwright browsers
|
|
58
|
+
|
|
59
|
+
After bundling, install the Playwright browsers:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
bundle exec rake rubycrawl:install
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
This command:
|
|
66
|
+
|
|
67
|
+
- Installs Node.js dependencies in the bundled `node/` directory
|
|
68
|
+
- Downloads Playwright browsers (Chromium, Firefox, WebKit)
|
|
69
|
+
- Creates a Rails initializer (if using Rails)
|
|
70
|
+
|
|
71
|
+
## Quick Start
|
|
72
|
+
|
|
73
|
+
```ruby
|
|
74
|
+
require "rubycrawl"
|
|
75
|
+
|
|
76
|
+
# Simple crawl
|
|
77
|
+
result = RubyCrawl.crawl("https://example.com")
|
|
78
|
+
|
|
79
|
+
# Access extracted content
|
|
80
|
+
puts result.html # Raw HTML content
|
|
81
|
+
puts result.markdown # Converted to Markdown
|
|
82
|
+
puts result.links # Extracted links from the page
|
|
83
|
+
puts result.metadata # Status code, final URL, etc.
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Usage
|
|
87
|
+
|
|
88
|
+
### Basic Crawling
|
|
89
|
+
|
|
90
|
+
The simplest way to crawl a URL:
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
result = RubyCrawl.crawl("https://example.com")
|
|
94
|
+
|
|
95
|
+
# Access the results
|
|
96
|
+
result.html # => "<html>...</html>"
|
|
97
|
+
result.markdown # => "# Example Domain\n\nThis domain is..." (lazy-loaded)
|
|
98
|
+
result.links # => [{ "url" => "https://...", "text" => "More info" }, ...]
|
|
99
|
+
result.metadata # => { "status" => 200, "final_url" => "https://example.com" }
|
|
100
|
+
result.text # => "" (coming soon)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Multi-Page Crawling
|
|
104
|
+
|
|
105
|
+
Crawl an entire site following links with BFS (breadth-first search):
|
|
106
|
+
|
|
107
|
+
```ruby
|
|
108
|
+
# Crawl up to 100 pages, max 3 links deep
|
|
109
|
+
RubyCrawl.crawl_site("https://example.com", max_pages: 100, max_depth: 3) do |page|
|
|
110
|
+
# Each page is yielded as it's crawled (streaming)
|
|
111
|
+
puts "Crawled: #{page.url} (depth: #{page.depth})"
|
|
112
|
+
|
|
113
|
+
# Save to database
|
|
114
|
+
Page.create!(
|
|
115
|
+
url: page.url,
|
|
116
|
+
html: page.html,
|
|
117
|
+
markdown: page.markdown,
|
|
118
|
+
depth: page.depth
|
|
119
|
+
)
|
|
120
|
+
end
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
#### Multi-Page Options
|
|
124
|
+
|
|
125
|
+
| Option | Default | Description |
|
|
126
|
+
|--------|---------|-------------|
|
|
127
|
+
| `max_pages` | 50 | Maximum number of pages to crawl |
|
|
128
|
+
| `max_depth` | 3 | Maximum link depth from start URL |
|
|
129
|
+
| `same_host_only` | true | Only follow links on the same domain |
|
|
130
|
+
| `wait_until` | inherited | Page load strategy |
|
|
131
|
+
| `block_resources` | inherited | Block images/fonts/CSS |
|
|
132
|
+
|
|
133
|
+
#### Page Result Object
|
|
134
|
+
|
|
135
|
+
The block receives a `PageResult` with:
|
|
136
|
+
|
|
137
|
+
```ruby
|
|
138
|
+
page.url # String: Final URL after redirects
|
|
139
|
+
page.html # String: Full HTML content
|
|
140
|
+
page.markdown # String: Lazy-converted Markdown
|
|
141
|
+
page.links # Array: URLs extracted from page
|
|
142
|
+
page.metadata # Hash: HTTP status, final URL, etc.
|
|
143
|
+
page.depth # Integer: Link depth from start URL
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Configuration
|
|
147
|
+
|
|
148
|
+
#### Global Configuration
|
|
149
|
+
|
|
150
|
+
Set default options that apply to all crawls:
|
|
151
|
+
|
|
152
|
+
```ruby
|
|
153
|
+
RubyCrawl.configure(
|
|
154
|
+
wait_until: "networkidle", # Wait until network is idle
|
|
155
|
+
block_resources: true # Block images, fonts, CSS for speed
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# All subsequent crawls use these defaults
|
|
159
|
+
result = RubyCrawl.crawl("https://example.com")
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
#### Per-Request Options
|
|
163
|
+
|
|
164
|
+
Override defaults for specific requests:
|
|
165
|
+
|
|
166
|
+
```ruby
|
|
167
|
+
# Use global defaults
|
|
168
|
+
result = RubyCrawl.crawl("https://example.com")
|
|
169
|
+
|
|
170
|
+
# Override for this request only
|
|
171
|
+
result = RubyCrawl.crawl(
|
|
172
|
+
"https://example.com",
|
|
173
|
+
wait_until: "domcontentloaded",
|
|
174
|
+
block_resources: false
|
|
175
|
+
)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
#### Configuration Options
|
|
179
|
+
|
|
180
|
+
| Option | Values | Default | Description |
|
|
181
|
+
| ----------------- | ----------------------------------------------- | -------- | ------------------------------------------------- |
|
|
182
|
+
| `wait_until` | `"load"`, `"domcontentloaded"`, `"networkidle"` | `"load"` | When to consider page loaded |
|
|
183
|
+
| `block_resources` | `true`, `false` | `true` | Block images, fonts, CSS, media for faster crawls |
|
|
184
|
+
|
|
185
|
+
**Wait strategies explained:**
|
|
186
|
+
|
|
187
|
+
- `load` — Wait for the load event (fastest, good for static sites)
|
|
188
|
+
- `domcontentloaded` — Wait for DOM ready (medium speed)
|
|
189
|
+
- `networkidle` — Wait until no network requests for 500ms (slowest, best for SPAs)
|
|
190
|
+
|
|
191
|
+
### Result Object
|
|
192
|
+
|
|
193
|
+
The crawl result is a `RubyCrawl::Result` object with these attributes:
|
|
194
|
+
|
|
195
|
+
```ruby
|
|
196
|
+
result = RubyCrawl.crawl("https://example.com")
|
|
197
|
+
|
|
198
|
+
result.html # String: Raw HTML content from page
|
|
199
|
+
result.markdown # String: Markdown conversion (lazy-loaded on first access)
|
|
200
|
+
result.links # Array: Extracted links with url and text
|
|
201
|
+
result.text # String: Plain text (coming soon)
|
|
202
|
+
result.metadata # Hash: Comprehensive metadata (see below)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
#### Links Format
|
|
206
|
+
|
|
207
|
+
```ruby
|
|
208
|
+
result.links
|
|
209
|
+
# => [
|
|
210
|
+
# { "url" => "https://example.com/about", "text" => "About Us" },
|
|
211
|
+
# { "url" => "https://example.com/contact", "text" => "Contact" },
|
|
212
|
+
# ...
|
|
213
|
+
# ]
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
#### Markdown Conversion
|
|
217
|
+
|
|
218
|
+
Markdown is **lazy-loaded** — conversion only happens when you access `.markdown`:
|
|
219
|
+
|
|
220
|
+
```ruby
|
|
221
|
+
result = RubyCrawl.crawl(url)
|
|
222
|
+
result.html # ✅ No overhead
|
|
223
|
+
result.markdown # ⬅️ Conversion happens here (first call only)
|
|
224
|
+
result.markdown # ✅ Cached, instant
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
Uses [reverse_markdown](https://github.com/xijo/reverse_markdown) with GitHub-flavored output.
|
|
228
|
+
|
|
229
|
+
#### Metadata Fields
|
|
230
|
+
|
|
231
|
+
The `metadata` hash includes HTTP and HTML metadata:
|
|
232
|
+
|
|
233
|
+
```ruby
|
|
234
|
+
result.metadata
|
|
235
|
+
# => {
|
|
236
|
+
# "status" => 200, # HTTP status code
|
|
237
|
+
# "final_url" => "https://...", # Final URL after redirects
|
|
238
|
+
# "title" => "Page Title", # <title> tag
|
|
239
|
+
# "description" => "...", # Meta description
|
|
240
|
+
# "keywords" => "ruby, web", # Meta keywords
|
|
241
|
+
# "author" => "Author Name", # Meta author
|
|
242
|
+
# "og_title" => "...", # Open Graph title
|
|
243
|
+
# "og_description" => "...", # Open Graph description
|
|
244
|
+
# "og_image" => "https://...", # Open Graph image
|
|
245
|
+
# "og_url" => "https://...", # Open Graph URL
|
|
246
|
+
# "og_type" => "website", # Open Graph type
|
|
247
|
+
# "twitter_card" => "summary", # Twitter card type
|
|
248
|
+
# "twitter_title" => "...", # Twitter title
|
|
249
|
+
# "twitter_description" => "...", # Twitter description
|
|
250
|
+
# "twitter_image" => "https://...",# Twitter image
|
|
251
|
+
# "canonical" => "https://...", # Canonical URL
|
|
252
|
+
# "lang" => "en", # Page language
|
|
253
|
+
# "charset" => "UTF-8" # Character encoding
|
|
254
|
+
# }
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
Note: All HTML metadata fields may be `null` if not present on the page.
|
|
258
|
+
|
|
259
|
+
### Error Handling
|
|
260
|
+
|
|
261
|
+
RubyCrawl provides specific exception classes for different error scenarios:
|
|
262
|
+
|
|
263
|
+
```ruby
|
|
264
|
+
begin
|
|
265
|
+
result = RubyCrawl.crawl(url)
|
|
266
|
+
rescue RubyCrawl::ConfigurationError => e
|
|
267
|
+
# Invalid URL or configuration
|
|
268
|
+
puts "Configuration error: #{e.message}"
|
|
269
|
+
rescue RubyCrawl::TimeoutError => e
|
|
270
|
+
# Page load timeout or network timeout
|
|
271
|
+
puts "Timeout: #{e.message}"
|
|
272
|
+
rescue RubyCrawl::NavigationError => e
|
|
273
|
+
# Page navigation failed (404, DNS error, SSL error, etc.)
|
|
274
|
+
puts "Navigation failed: #{e.message}"
|
|
275
|
+
rescue RubyCrawl::ServiceError => e
|
|
276
|
+
# Node service unavailable or crashed
|
|
277
|
+
puts "Service error: #{e.message}"
|
|
278
|
+
rescue RubyCrawl::Error => e
|
|
279
|
+
# Catch-all for any RubyCrawl error
|
|
280
|
+
puts "Crawl error: #{e.message}"
|
|
281
|
+
end
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
**Exception Hierarchy:**
|
|
285
|
+
- `RubyCrawl::Error` (base class)
|
|
286
|
+
- `RubyCrawl::ConfigurationError` - Invalid URL or configuration
|
|
287
|
+
- `RubyCrawl::TimeoutError` - Timeout during crawl
|
|
288
|
+
- `RubyCrawl::NavigationError` - Page navigation failed
|
|
289
|
+
- `RubyCrawl::ServiceError` - Node service issues
|
|
290
|
+
|
|
291
|
+
**Automatic Retry:** RubyCrawl automatically retries transient failures (service errors, timeouts) up to 3 times with exponential backoff (2s, 4s, 8s). Configure with:
|
|
292
|
+
|
|
293
|
+
```ruby
|
|
294
|
+
RubyCrawl.configure(max_retries: 5)
|
|
295
|
+
# or per-request
|
|
296
|
+
RubyCrawl.crawl(url, retries: 1) # Disable retry
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## Rails Integration
|
|
300
|
+
|
|
301
|
+
### Installation
|
|
302
|
+
|
|
303
|
+
Run the installer in your Rails app:
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
bundle exec rake rubycrawl:install
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
This creates `config/initializers/rubycrawl.rb`:
|
|
310
|
+
|
|
311
|
+
```ruby
|
|
312
|
+
# frozen_string_literal: true
|
|
313
|
+
|
|
314
|
+
# rubycrawl default configuration
|
|
315
|
+
RubyCrawl.configure(
|
|
316
|
+
wait_until: "load",
|
|
317
|
+
block_resources: true
|
|
318
|
+
)
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Usage in Rails
|
|
322
|
+
|
|
323
|
+
```ruby
|
|
324
|
+
# In a controller, service, or background job
|
|
325
|
+
class ContentScraperJob < ApplicationJob
|
|
326
|
+
def perform(url)
|
|
327
|
+
result = RubyCrawl.crawl(url)
|
|
328
|
+
|
|
329
|
+
# Save to database
|
|
330
|
+
ScrapedContent.create!(
|
|
331
|
+
url: url,
|
|
332
|
+
html: result.html,
|
|
333
|
+
status: result.metadata[:status]
|
|
334
|
+
)
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
## Production Deployment
|
|
340
|
+
|
|
341
|
+
### Pre-deployment Checklist
|
|
342
|
+
|
|
343
|
+
1. **Install Node.js** on your production servers (LTS version recommended)
|
|
344
|
+
2. **Run installer** during deployment:
|
|
345
|
+
```bash
|
|
346
|
+
bundle exec rake rubycrawl:install
|
|
347
|
+
```
|
|
348
|
+
3. **Set environment variables** (optional):
|
|
349
|
+
```bash
|
|
350
|
+
export RUBYCRAWL_NODE_BIN=/usr/bin/node # Custom Node.js path
|
|
351
|
+
export RUBYCRAWL_NODE_LOG=/var/log/rubycrawl.log # Service logs
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
### Docker Example
|
|
355
|
+
|
|
356
|
+
```dockerfile
|
|
357
|
+
FROM ruby:3.2
|
|
358
|
+
|
|
359
|
+
# Install Node.js LTS
|
|
360
|
+
RUN curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - \
|
|
361
|
+
&& apt-get install -y nodejs
|
|
362
|
+
|
|
363
|
+
# Install system dependencies for Playwright
|
|
364
|
+
RUN npx playwright install-deps
|
|
365
|
+
|
|
366
|
+
WORKDIR /app
|
|
367
|
+
COPY Gemfile* ./
|
|
368
|
+
RUN bundle install
|
|
369
|
+
|
|
370
|
+
# Install Playwright browsers
|
|
371
|
+
RUN bundle exec rake rubycrawl:install
|
|
372
|
+
|
|
373
|
+
COPY . .
|
|
374
|
+
CMD ["rails", "server"]
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
### Heroku Deployment
|
|
378
|
+
|
|
379
|
+
Add the Node.js buildpack:
|
|
380
|
+
|
|
381
|
+
```bash
|
|
382
|
+
heroku buildpacks:add heroku/nodejs
|
|
383
|
+
heroku buildpacks:add heroku/ruby
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
Add to `package.json` in your Rails root:
|
|
387
|
+
|
|
388
|
+
```json
|
|
389
|
+
{
|
|
390
|
+
"engines": {
|
|
391
|
+
"node": "18.x"
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
### Performance Tips
|
|
397
|
+
|
|
398
|
+
- **Reuse instances**: Use the class-level `RubyCrawl.crawl` method (recommended) rather than creating new instances
|
|
399
|
+
- **Resource blocking**: Keep `block_resources: true` for 2-3x faster crawls when you don't need images/CSS
|
|
400
|
+
- **Concurrency**: Use background jobs (Sidekiq, etc.) for parallel crawling
|
|
401
|
+
- **Browser reuse**: The first crawl is slower due to browser launch; subsequent crawls reuse the process
|
|
402
|
+
|
|
403
|
+
## Architecture
|
|
404
|
+
|
|
405
|
+
RubyCrawl uses a **dual-process architecture**:
|
|
406
|
+
|
|
407
|
+
```
|
|
408
|
+
┌─────────────────────────────────────────────┐
|
|
409
|
+
│ Ruby Process (Your Application) │
|
|
410
|
+
│ ┌─────────────────────────────────────┐ │
|
|
411
|
+
│ │ RubyCrawl Gem │ │
|
|
412
|
+
│ │ • Public API │ │
|
|
413
|
+
│ │ • Result normalization │ │
|
|
414
|
+
│ │ • Error handling │ │
|
|
415
|
+
│ └────────────┬────────────────────────┘ │
|
|
416
|
+
└───────────────┼─────────────────────────────┘
|
|
417
|
+
│ HTTP/JSON (localhost:3344)
|
|
418
|
+
┌───────────────┼─────────────────────────────┐
|
|
419
|
+
│ Node.js Process (Auto-started) │
|
|
420
|
+
│ ┌────────────┴────────────────────────┐ │
|
|
421
|
+
│ │ Playwright Service │ │
|
|
422
|
+
│ │ • Browser management │ │
|
|
423
|
+
│ │ • Page navigation │ │
|
|
424
|
+
│ │ • HTML extraction │ │
|
|
425
|
+
│ │ • Resource blocking │ │
|
|
426
|
+
│ └─────────────────────────────────────┘ │
|
|
427
|
+
└─────────────────────────────────────────────┘
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
**Why this architecture?**
|
|
431
|
+
|
|
432
|
+
- **Separation of concerns**: Ruby handles orchestration, Node handles browsers
|
|
433
|
+
- **Stability**: Playwright's official Node.js bindings are most reliable
|
|
434
|
+
- **Performance**: Long-running browser process, reused across requests
|
|
435
|
+
- **Simplicity**: No C extensions, pure Ruby + bundled Node service
|
|
436
|
+
|
|
437
|
+
See [.github/copilot-instructions.md](.github/copilot-instructions.md) for detailed architecture documentation.
|
|
438
|
+
|
|
439
|
+
## Performance
|
|
440
|
+
|
|
441
|
+
### Benchmarks
|
|
442
|
+
|
|
443
|
+
Typical crawl times (M1 Mac, fast network):
|
|
444
|
+
|
|
445
|
+
| Page Type | First Crawl | Subsequent | Config |
|
|
446
|
+
| ----------- | ----------- | ---------- | --------------------------- |
|
|
447
|
+
| Static HTML | ~2s | ~500ms | `block_resources: true` |
|
|
448
|
+
| SPA (React) | ~3s | ~1.2s | `wait_until: "networkidle"` |
|
|
449
|
+
| Heavy site | ~4s | ~2s | `block_resources: false` |
|
|
450
|
+
|
|
451
|
+
**Note**: First crawl includes browser launch time (~1.5s). Subsequent crawls reuse the browser.
|
|
452
|
+
|
|
453
|
+
### Optimization Tips
|
|
454
|
+
|
|
455
|
+
1. **Enable resource blocking** for content-only extraction:
|
|
456
|
+
|
|
457
|
+
```ruby
|
|
458
|
+
RubyCrawl.configure(block_resources: true)
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
2. **Use appropriate wait strategy**:
|
|
462
|
+
- Static sites: `wait_until: "load"`
|
|
463
|
+
- SPAs: `wait_until: "networkidle"`
|
|
464
|
+
|
|
465
|
+
3. **Batch processing**: Use background jobs for concurrent crawling:
|
|
466
|
+
```ruby
|
|
467
|
+
urls.each { |url| CrawlJob.perform_later(url) }
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
## Development
|
|
471
|
+
|
|
472
|
+
### Setup
|
|
473
|
+
|
|
474
|
+
```bash
|
|
475
|
+
git clone git@github.com:craft-wise/rubycrawl.git
|
|
476
|
+
cd rubycrawl
|
|
477
|
+
bin/setup # Installs dependencies and sets up Node service
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
### Running Tests
|
|
481
|
+
|
|
482
|
+
```bash
|
|
483
|
+
bundle exec rspec
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
### Manual Testing
|
|
487
|
+
|
|
488
|
+
```bash
|
|
489
|
+
# Terminal 1: Start Node service manually (optional)
|
|
490
|
+
cd node
|
|
491
|
+
npm start
|
|
492
|
+
|
|
493
|
+
# Terminal 2: Ruby console
|
|
494
|
+
bin/console
|
|
495
|
+
> result = RubyCrawl.crawl("https://example.com")
|
|
496
|
+
> puts result.html
|
|
497
|
+
```
|
|
498
|
+
|
|
499
|
+
### Project Structure
|
|
500
|
+
|
|
501
|
+
```
|
|
502
|
+
rubycrawl/
|
|
503
|
+
├── lib/
|
|
504
|
+
│ ├── rubycrawl.rb # Main gem entry point
|
|
505
|
+
│ ├── rubycrawl/
|
|
506
|
+
│ │ ├── version.rb # Gem version
|
|
507
|
+
│ │ ├── railtie.rb # Rails integration
|
|
508
|
+
│ │ └── tasks/
|
|
509
|
+
│ │ └── install.rake # Installation task
|
|
510
|
+
├── node/
|
|
511
|
+
│ ├── src/
|
|
512
|
+
│ │ └── index.js # Playwright HTTP service
|
|
513
|
+
│ ├── package.json
|
|
514
|
+
│ └── README.md
|
|
515
|
+
├── spec/ # RSpec tests
|
|
516
|
+
├── .github/
|
|
517
|
+
│ └── copilot-instructions.md # GitHub Copilot guidelines
|
|
518
|
+
├── CLAUDE.md # Claude AI guidelines
|
|
519
|
+
└── README.md
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
## Roadmap
|
|
523
|
+
|
|
524
|
+
### Current (v0.1.0)
|
|
525
|
+
|
|
526
|
+
- [x] HTML extraction
|
|
527
|
+
- [x] Link extraction
|
|
528
|
+
- [x] Markdown conversion (lazy-loaded)
|
|
529
|
+
- [x] Multi-page crawling with BFS
|
|
530
|
+
- [x] URL normalization and deduplication
|
|
531
|
+
- [x] Basic metadata (status, final URL)
|
|
532
|
+
- [x] Resource blocking
|
|
533
|
+
- [x] Rails integration
|
|
534
|
+
|
|
535
|
+
### Coming Soon
|
|
536
|
+
|
|
537
|
+
- [ ] Plain text extraction
|
|
538
|
+
- [ ] Screenshot capture
|
|
539
|
+
- [ ] Custom JavaScript execution
|
|
540
|
+
- [ ] Session/cookie support
|
|
541
|
+
- [ ] Proxy support
|
|
542
|
+
- [ ] Robots.txt support
|
|
543
|
+
|
|
544
|
+
## Contributing
|
|
545
|
+
|
|
546
|
+
Contributions are welcome! Please read our [contribution guidelines](.github/copilot-instructions.md) first.
|
|
547
|
+
|
|
548
|
+
### Development Philosophy
|
|
549
|
+
|
|
550
|
+
- **Simplicity over cleverness**: Prefer clear, explicit code
|
|
551
|
+
- **Stability over speed**: Correctness first, optimization second
|
|
552
|
+
- **Ruby-first**: Hide Node.js/Playwright complexity from users
|
|
553
|
+
- **No vendor lock-in**: Pure open source, no SaaS dependencies
|
|
554
|
+
|
|
555
|
+
## Comparison with crawl4ai
|
|
556
|
+
|
|
557
|
+
| Feature | crawl4ai (Python) | rubycrawl (Ruby) |
|
|
558
|
+
| ------------------- | ----------------- | ---------------- |
|
|
559
|
+
| Browser automation | Playwright | Playwright |
|
|
560
|
+
| Language | Python | Ruby |
|
|
561
|
+
| LLM extraction | ✅ | Planned |
|
|
562
|
+
| Markdown extraction | ✅ | ✅ |
|
|
563
|
+
| Link extraction | ✅ | ✅ |
|
|
564
|
+
| Multi-page crawling | ✅ | ✅ |
|
|
565
|
+
| Rails integration | N/A | ✅ |
|
|
566
|
+
| Resource blocking | ✅ | ✅ |
|
|
567
|
+
| Session management | ✅ | Planned |
|
|
568
|
+
|
|
569
|
+
RubyCrawl aims to bring the same level of accuracy and reliability to the Ruby ecosystem.
|
|
570
|
+
|
|
571
|
+
## License
|
|
572
|
+
|
|
573
|
+
The gem is available as open source under the terms of the [MIT License](LICENSE).
|
|
574
|
+
|
|
575
|
+
## Credits
|
|
576
|
+
|
|
577
|
+
Inspired by [crawl4ai](https://github.com/unclecode/crawl4ai) by @unclecode.
|
|
578
|
+
|
|
579
|
+
Built with [Playwright](https://playwright.dev/) by Microsoft.
|
|
580
|
+
|
|
581
|
+
## Support
|
|
582
|
+
|
|
583
|
+
- **Issues**: [GitHub Issues](https://github.com/craft-wise/rubycrawl/issues)
|
|
584
|
+
- **Discussions**: [GitHub Discussions](https://github.com/your-org/rubycrawl/discussions)
|
|
585
|
+
- **Email**: ganesh.navale@zohomail.in
|
data/Rakefile
ADDED
data/bin/console
ADDED
data/bin/setup
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class RubyCrawl
|
|
4
|
+
# Base error class for all RubyCrawl errors
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Raised when the Node.js service fails to start or is unavailable
|
|
8
|
+
class ServiceError < Error; end
|
|
9
|
+
|
|
10
|
+
# Raised when page navigation fails (timeout, DNS, SSL, etc.)
|
|
11
|
+
class NavigationError < Error; end
|
|
12
|
+
|
|
13
|
+
# Raised when a crawl operation times out
|
|
14
|
+
class TimeoutError < Error; end
|
|
15
|
+
|
|
16
|
+
# Raised when invalid configuration is provided
|
|
17
|
+
class ConfigurationError < Error; end
|
|
18
|
+
end
|