rubycrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (589) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +11 -0
  4. data/LICENSE +21 -0
  5. data/README.md +585 -0
  6. data/Rakefile +8 -0
  7. data/bin/console +9 -0
  8. data/bin/setup +4 -0
  9. data/lib/rubycrawl/errors.rb +18 -0
  10. data/lib/rubycrawl/helpers.rb +66 -0
  11. data/lib/rubycrawl/markdown_converter.rb +37 -0
  12. data/lib/rubycrawl/railtie.rb +12 -0
  13. data/lib/rubycrawl/result.rb +40 -0
  14. data/lib/rubycrawl/service_client.rb +86 -0
  15. data/lib/rubycrawl/site_crawler.rb +113 -0
  16. data/lib/rubycrawl/tasks/install.rake +85 -0
  17. data/lib/rubycrawl/url_normalizer.rb +68 -0
  18. data/lib/rubycrawl/version.rb +5 -0
  19. data/lib/rubycrawl.rb +141 -0
  20. data/node/.gitignore +2 -0
  21. data/node/.npmrc +1 -0
  22. data/node/README.md +19 -0
  23. data/node/node_modules/.bin/playwright +1 -0
  24. data/node/node_modules/.bin/playwright-core +1 -0
  25. data/node/node_modules/.package-lock.json +65 -0
  26. data/node/node_modules/dotenv/CHANGELOG.md +520 -0
  27. data/node/node_modules/dotenv/LICENSE +23 -0
  28. data/node/node_modules/dotenv/README-es.md +411 -0
  29. data/node/node_modules/dotenv/README.md +645 -0
  30. data/node/node_modules/dotenv/SECURITY.md +1 -0
  31. data/node/node_modules/dotenv/config.d.ts +1 -0
  32. data/node/node_modules/dotenv/config.js +9 -0
  33. data/node/node_modules/dotenv/lib/cli-options.js +17 -0
  34. data/node/node_modules/dotenv/lib/env-options.js +28 -0
  35. data/node/node_modules/dotenv/lib/main.d.ts +162 -0
  36. data/node/node_modules/dotenv/lib/main.js +386 -0
  37. data/node/node_modules/dotenv/package.json +62 -0
  38. data/node/node_modules/playwright/LICENSE +202 -0
  39. data/node/node_modules/playwright/NOTICE +5 -0
  40. data/node/node_modules/playwright/README.md +168 -0
  41. data/node/node_modules/playwright/ThirdPartyNotices.txt +5042 -0
  42. data/node/node_modules/playwright/cli.js +19 -0
  43. data/node/node_modules/playwright/index.d.ts +17 -0
  44. data/node/node_modules/playwright/index.js +17 -0
  45. data/node/node_modules/playwright/index.mjs +18 -0
  46. data/node/node_modules/playwright/jsx-runtime.js +42 -0
  47. data/node/node_modules/playwright/jsx-runtime.mjs +21 -0
  48. data/node/node_modules/playwright/lib/agents/agentParser.js +89 -0
  49. data/node/node_modules/playwright/lib/agents/copilot-setup-steps.yml +34 -0
  50. data/node/node_modules/playwright/lib/agents/generateAgents.js +348 -0
  51. data/node/node_modules/playwright/lib/agents/playwright-test-coverage.prompt.md +31 -0
  52. data/node/node_modules/playwright/lib/agents/playwright-test-generate.prompt.md +8 -0
  53. data/node/node_modules/playwright/lib/agents/playwright-test-generator.agent.md +88 -0
  54. data/node/node_modules/playwright/lib/agents/playwright-test-heal.prompt.md +6 -0
  55. data/node/node_modules/playwright/lib/agents/playwright-test-healer.agent.md +55 -0
  56. data/node/node_modules/playwright/lib/agents/playwright-test-plan.prompt.md +9 -0
  57. data/node/node_modules/playwright/lib/agents/playwright-test-planner.agent.md +73 -0
  58. data/node/node_modules/playwright/lib/common/config.js +282 -0
  59. data/node/node_modules/playwright/lib/common/configLoader.js +344 -0
  60. data/node/node_modules/playwright/lib/common/esmLoaderHost.js +104 -0
  61. data/node/node_modules/playwright/lib/common/expectBundle.js +28 -0
  62. data/node/node_modules/playwright/lib/common/expectBundleImpl.js +407 -0
  63. data/node/node_modules/playwright/lib/common/fixtures.js +302 -0
  64. data/node/node_modules/playwright/lib/common/globals.js +58 -0
  65. data/node/node_modules/playwright/lib/common/ipc.js +60 -0
  66. data/node/node_modules/playwright/lib/common/poolBuilder.js +85 -0
  67. data/node/node_modules/playwright/lib/common/process.js +132 -0
  68. data/node/node_modules/playwright/lib/common/suiteUtils.js +140 -0
  69. data/node/node_modules/playwright/lib/common/test.js +321 -0
  70. data/node/node_modules/playwright/lib/common/testLoader.js +101 -0
  71. data/node/node_modules/playwright/lib/common/testType.js +298 -0
  72. data/node/node_modules/playwright/lib/common/validators.js +68 -0
  73. data/node/node_modules/playwright/lib/fsWatcher.js +67 -0
  74. data/node/node_modules/playwright/lib/index.js +726 -0
  75. data/node/node_modules/playwright/lib/internalsForTest.js +42 -0
  76. data/node/node_modules/playwright/lib/isomorphic/events.js +77 -0
  77. data/node/node_modules/playwright/lib/isomorphic/folders.js +30 -0
  78. data/node/node_modules/playwright/lib/isomorphic/stringInternPool.js +69 -0
  79. data/node/node_modules/playwright/lib/isomorphic/teleReceiver.js +521 -0
  80. data/node/node_modules/playwright/lib/isomorphic/teleSuiteUpdater.js +157 -0
  81. data/node/node_modules/playwright/lib/isomorphic/testServerConnection.js +225 -0
  82. data/node/node_modules/playwright/lib/isomorphic/testServerInterface.js +16 -0
  83. data/node/node_modules/playwright/lib/isomorphic/testTree.js +329 -0
  84. data/node/node_modules/playwright/lib/isomorphic/types.d.js +16 -0
  85. data/node/node_modules/playwright/lib/loader/loaderMain.js +59 -0
  86. data/node/node_modules/playwright/lib/matchers/expect.js +311 -0
  87. data/node/node_modules/playwright/lib/matchers/matcherHint.js +44 -0
  88. data/node/node_modules/playwright/lib/matchers/matchers.js +383 -0
  89. data/node/node_modules/playwright/lib/matchers/toBeTruthy.js +75 -0
  90. data/node/node_modules/playwright/lib/matchers/toEqual.js +100 -0
  91. data/node/node_modules/playwright/lib/matchers/toHaveURL.js +101 -0
  92. data/node/node_modules/playwright/lib/matchers/toMatchAriaSnapshot.js +159 -0
  93. data/node/node_modules/playwright/lib/matchers/toMatchSnapshot.js +342 -0
  94. data/node/node_modules/playwright/lib/matchers/toMatchText.js +99 -0
  95. data/node/node_modules/playwright/lib/mcp/browser/browserContextFactory.js +329 -0
  96. data/node/node_modules/playwright/lib/mcp/browser/browserServerBackend.js +84 -0
  97. data/node/node_modules/playwright/lib/mcp/browser/config.js +421 -0
  98. data/node/node_modules/playwright/lib/mcp/browser/context.js +244 -0
  99. data/node/node_modules/playwright/lib/mcp/browser/response.js +278 -0
  100. data/node/node_modules/playwright/lib/mcp/browser/sessionLog.js +75 -0
  101. data/node/node_modules/playwright/lib/mcp/browser/tab.js +343 -0
  102. data/node/node_modules/playwright/lib/mcp/browser/tools/common.js +65 -0
  103. data/node/node_modules/playwright/lib/mcp/browser/tools/console.js +46 -0
  104. data/node/node_modules/playwright/lib/mcp/browser/tools/dialogs.js +60 -0
  105. data/node/node_modules/playwright/lib/mcp/browser/tools/evaluate.js +61 -0
  106. data/node/node_modules/playwright/lib/mcp/browser/tools/files.js +58 -0
  107. data/node/node_modules/playwright/lib/mcp/browser/tools/form.js +63 -0
  108. data/node/node_modules/playwright/lib/mcp/browser/tools/install.js +72 -0
  109. data/node/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +107 -0
  110. data/node/node_modules/playwright/lib/mcp/browser/tools/mouse.js +107 -0
  111. data/node/node_modules/playwright/lib/mcp/browser/tools/navigate.js +71 -0
  112. data/node/node_modules/playwright/lib/mcp/browser/tools/network.js +63 -0
  113. data/node/node_modules/playwright/lib/mcp/browser/tools/open.js +57 -0
  114. data/node/node_modules/playwright/lib/mcp/browser/tools/pdf.js +49 -0
  115. data/node/node_modules/playwright/lib/mcp/browser/tools/runCode.js +78 -0
  116. data/node/node_modules/playwright/lib/mcp/browser/tools/screenshot.js +93 -0
  117. data/node/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +173 -0
  118. data/node/node_modules/playwright/lib/mcp/browser/tools/tabs.js +67 -0
  119. data/node/node_modules/playwright/lib/mcp/browser/tools/tool.js +47 -0
  120. data/node/node_modules/playwright/lib/mcp/browser/tools/tracing.js +74 -0
  121. data/node/node_modules/playwright/lib/mcp/browser/tools/utils.js +94 -0
  122. data/node/node_modules/playwright/lib/mcp/browser/tools/verify.js +143 -0
  123. data/node/node_modules/playwright/lib/mcp/browser/tools/wait.js +63 -0
  124. data/node/node_modules/playwright/lib/mcp/browser/tools.js +84 -0
  125. data/node/node_modules/playwright/lib/mcp/browser/watchdog.js +44 -0
  126. data/node/node_modules/playwright/lib/mcp/config.d.js +16 -0
  127. data/node/node_modules/playwright/lib/mcp/extension/cdpRelay.js +351 -0
  128. data/node/node_modules/playwright/lib/mcp/extension/extensionContextFactory.js +76 -0
  129. data/node/node_modules/playwright/lib/mcp/extension/protocol.js +28 -0
  130. data/node/node_modules/playwright/lib/mcp/index.js +61 -0
  131. data/node/node_modules/playwright/lib/mcp/log.js +35 -0
  132. data/node/node_modules/playwright/lib/mcp/program.js +111 -0
  133. data/node/node_modules/playwright/lib/mcp/sdk/exports.js +28 -0
  134. data/node/node_modules/playwright/lib/mcp/sdk/http.js +152 -0
  135. data/node/node_modules/playwright/lib/mcp/sdk/inProcessTransport.js +71 -0
  136. data/node/node_modules/playwright/lib/mcp/sdk/server.js +223 -0
  137. data/node/node_modules/playwright/lib/mcp/sdk/tool.js +47 -0
  138. data/node/node_modules/playwright/lib/mcp/terminal/cli.js +296 -0
  139. data/node/node_modules/playwright/lib/mcp/terminal/command.js +56 -0
  140. data/node/node_modules/playwright/lib/mcp/terminal/commands.js +333 -0
  141. data/node/node_modules/playwright/lib/mcp/terminal/daemon.js +129 -0
  142. data/node/node_modules/playwright/lib/mcp/terminal/help.json +32 -0
  143. data/node/node_modules/playwright/lib/mcp/terminal/helpGenerator.js +88 -0
  144. data/node/node_modules/playwright/lib/mcp/terminal/socketConnection.js +80 -0
  145. data/node/node_modules/playwright/lib/mcp/test/browserBackend.js +98 -0
  146. data/node/node_modules/playwright/lib/mcp/test/generatorTools.js +122 -0
  147. data/node/node_modules/playwright/lib/mcp/test/plannerTools.js +145 -0
  148. data/node/node_modules/playwright/lib/mcp/test/seed.js +82 -0
  149. data/node/node_modules/playwright/lib/mcp/test/streams.js +44 -0
  150. data/node/node_modules/playwright/lib/mcp/test/testBackend.js +99 -0
  151. data/node/node_modules/playwright/lib/mcp/test/testContext.js +285 -0
  152. data/node/node_modules/playwright/lib/mcp/test/testTool.js +30 -0
  153. data/node/node_modules/playwright/lib/mcp/test/testTools.js +108 -0
  154. data/node/node_modules/playwright/lib/plugins/gitCommitInfoPlugin.js +198 -0
  155. data/node/node_modules/playwright/lib/plugins/index.js +28 -0
  156. data/node/node_modules/playwright/lib/plugins/webServerPlugin.js +237 -0
  157. data/node/node_modules/playwright/lib/program.js +417 -0
  158. data/node/node_modules/playwright/lib/reporters/base.js +634 -0
  159. data/node/node_modules/playwright/lib/reporters/blob.js +138 -0
  160. data/node/node_modules/playwright/lib/reporters/dot.js +99 -0
  161. data/node/node_modules/playwright/lib/reporters/empty.js +32 -0
  162. data/node/node_modules/playwright/lib/reporters/github.js +128 -0
  163. data/node/node_modules/playwright/lib/reporters/html.js +633 -0
  164. data/node/node_modules/playwright/lib/reporters/internalReporter.js +138 -0
  165. data/node/node_modules/playwright/lib/reporters/json.js +254 -0
  166. data/node/node_modules/playwright/lib/reporters/junit.js +232 -0
  167. data/node/node_modules/playwright/lib/reporters/line.js +131 -0
  168. data/node/node_modules/playwright/lib/reporters/list.js +253 -0
  169. data/node/node_modules/playwright/lib/reporters/listModeReporter.js +69 -0
  170. data/node/node_modules/playwright/lib/reporters/markdown.js +144 -0
  171. data/node/node_modules/playwright/lib/reporters/merge.js +558 -0
  172. data/node/node_modules/playwright/lib/reporters/multiplexer.js +112 -0
  173. data/node/node_modules/playwright/lib/reporters/reporterV2.js +102 -0
  174. data/node/node_modules/playwright/lib/reporters/teleEmitter.js +317 -0
  175. data/node/node_modules/playwright/lib/reporters/versions/blobV1.js +16 -0
  176. data/node/node_modules/playwright/lib/runner/dispatcher.js +530 -0
  177. data/node/node_modules/playwright/lib/runner/failureTracker.js +72 -0
  178. data/node/node_modules/playwright/lib/runner/lastRun.js +77 -0
  179. data/node/node_modules/playwright/lib/runner/loadUtils.js +334 -0
  180. data/node/node_modules/playwright/lib/runner/loaderHost.js +89 -0
  181. data/node/node_modules/playwright/lib/runner/processHost.js +180 -0
  182. data/node/node_modules/playwright/lib/runner/projectUtils.js +241 -0
  183. data/node/node_modules/playwright/lib/runner/rebase.js +189 -0
  184. data/node/node_modules/playwright/lib/runner/reporters.js +138 -0
  185. data/node/node_modules/playwright/lib/runner/sigIntWatcher.js +96 -0
  186. data/node/node_modules/playwright/lib/runner/storage.js +91 -0
  187. data/node/node_modules/playwright/lib/runner/taskRunner.js +127 -0
  188. data/node/node_modules/playwright/lib/runner/tasks.js +410 -0
  189. data/node/node_modules/playwright/lib/runner/testGroups.js +125 -0
  190. data/node/node_modules/playwright/lib/runner/testRunner.js +398 -0
  191. data/node/node_modules/playwright/lib/runner/testServer.js +269 -0
  192. data/node/node_modules/playwright/lib/runner/uiModeReporter.js +30 -0
  193. data/node/node_modules/playwright/lib/runner/vcs.js +72 -0
  194. data/node/node_modules/playwright/lib/runner/watchMode.js +396 -0
  195. data/node/node_modules/playwright/lib/runner/workerHost.js +104 -0
  196. data/node/node_modules/playwright/lib/third_party/pirates.js +62 -0
  197. data/node/node_modules/playwright/lib/third_party/tsconfig-loader.js +103 -0
  198. data/node/node_modules/playwright/lib/transform/babelBundle.js +46 -0
  199. data/node/node_modules/playwright/lib/transform/babelBundleImpl.js +461 -0
  200. data/node/node_modules/playwright/lib/transform/compilationCache.js +274 -0
  201. data/node/node_modules/playwright/lib/transform/esmLoader.js +103 -0
  202. data/node/node_modules/playwright/lib/transform/md.js +221 -0
  203. data/node/node_modules/playwright/lib/transform/portTransport.js +67 -0
  204. data/node/node_modules/playwright/lib/transform/transform.js +303 -0
  205. data/node/node_modules/playwright/lib/util.js +400 -0
  206. data/node/node_modules/playwright/lib/utilsBundle.js +50 -0
  207. data/node/node_modules/playwright/lib/utilsBundleImpl.js +103 -0
  208. data/node/node_modules/playwright/lib/worker/fixtureRunner.js +262 -0
  209. data/node/node_modules/playwright/lib/worker/testInfo.js +536 -0
  210. data/node/node_modules/playwright/lib/worker/testTracing.js +345 -0
  211. data/node/node_modules/playwright/lib/worker/timeoutManager.js +174 -0
  212. data/node/node_modules/playwright/lib/worker/util.js +31 -0
  213. data/node/node_modules/playwright/lib/worker/workerMain.js +530 -0
  214. data/node/node_modules/playwright/package.json +72 -0
  215. data/node/node_modules/playwright/test.d.ts +18 -0
  216. data/node/node_modules/playwright/test.js +24 -0
  217. data/node/node_modules/playwright/test.mjs +34 -0
  218. data/node/node_modules/playwright/types/test.d.ts +10251 -0
  219. data/node/node_modules/playwright/types/testReporter.d.ts +822 -0
  220. data/node/node_modules/playwright-core/LICENSE +202 -0
  221. data/node/node_modules/playwright-core/NOTICE +5 -0
  222. data/node/node_modules/playwright-core/README.md +3 -0
  223. data/node/node_modules/playwright-core/ThirdPartyNotices.txt +4076 -0
  224. data/node/node_modules/playwright-core/bin/install_media_pack.ps1 +5 -0
  225. data/node/node_modules/playwright-core/bin/install_webkit_wsl.ps1 +33 -0
  226. data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_linux.sh +42 -0
  227. data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_mac.sh +13 -0
  228. data/node/node_modules/playwright-core/bin/reinstall_chrome_beta_win.ps1 +24 -0
  229. data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_linux.sh +42 -0
  230. data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_mac.sh +12 -0
  231. data/node/node_modules/playwright-core/bin/reinstall_chrome_stable_win.ps1 +24 -0
  232. data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_linux.sh +48 -0
  233. data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_mac.sh +11 -0
  234. data/node/node_modules/playwright-core/bin/reinstall_msedge_beta_win.ps1 +23 -0
  235. data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_linux.sh +48 -0
  236. data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_mac.sh +11 -0
  237. data/node/node_modules/playwright-core/bin/reinstall_msedge_dev_win.ps1 +23 -0
  238. data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_linux.sh +48 -0
  239. data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_mac.sh +11 -0
  240. data/node/node_modules/playwright-core/bin/reinstall_msedge_stable_win.ps1 +24 -0
  241. data/node/node_modules/playwright-core/browsers.json +79 -0
  242. data/node/node_modules/playwright-core/cli.js +18 -0
  243. data/node/node_modules/playwright-core/index.d.ts +17 -0
  244. data/node/node_modules/playwright-core/index.js +32 -0
  245. data/node/node_modules/playwright-core/index.mjs +28 -0
  246. data/node/node_modules/playwright-core/lib/androidServerImpl.js +65 -0
  247. data/node/node_modules/playwright-core/lib/browserServerImpl.js +120 -0
  248. data/node/node_modules/playwright-core/lib/cli/driver.js +97 -0
  249. data/node/node_modules/playwright-core/lib/cli/program.js +589 -0
  250. data/node/node_modules/playwright-core/lib/cli/programWithTestStub.js +74 -0
  251. data/node/node_modules/playwright-core/lib/client/android.js +361 -0
  252. data/node/node_modules/playwright-core/lib/client/api.js +137 -0
  253. data/node/node_modules/playwright-core/lib/client/artifact.js +79 -0
  254. data/node/node_modules/playwright-core/lib/client/browser.js +161 -0
  255. data/node/node_modules/playwright-core/lib/client/browserContext.js +582 -0
  256. data/node/node_modules/playwright-core/lib/client/browserType.js +185 -0
  257. data/node/node_modules/playwright-core/lib/client/cdpSession.js +51 -0
  258. data/node/node_modules/playwright-core/lib/client/channelOwner.js +194 -0
  259. data/node/node_modules/playwright-core/lib/client/clientHelper.js +64 -0
  260. data/node/node_modules/playwright-core/lib/client/clientInstrumentation.js +55 -0
  261. data/node/node_modules/playwright-core/lib/client/clientStackTrace.js +69 -0
  262. data/node/node_modules/playwright-core/lib/client/clock.js +68 -0
  263. data/node/node_modules/playwright-core/lib/client/connection.js +318 -0
  264. data/node/node_modules/playwright-core/lib/client/consoleMessage.js +58 -0
  265. data/node/node_modules/playwright-core/lib/client/coverage.js +44 -0
  266. data/node/node_modules/playwright-core/lib/client/dialog.js +56 -0
  267. data/node/node_modules/playwright-core/lib/client/download.js +62 -0
  268. data/node/node_modules/playwright-core/lib/client/electron.js +138 -0
  269. data/node/node_modules/playwright-core/lib/client/elementHandle.js +284 -0
  270. data/node/node_modules/playwright-core/lib/client/errors.js +77 -0
  271. data/node/node_modules/playwright-core/lib/client/eventEmitter.js +314 -0
  272. data/node/node_modules/playwright-core/lib/client/events.js +103 -0
  273. data/node/node_modules/playwright-core/lib/client/fetch.js +368 -0
  274. data/node/node_modules/playwright-core/lib/client/fileChooser.js +46 -0
  275. data/node/node_modules/playwright-core/lib/client/fileUtils.js +34 -0
  276. data/node/node_modules/playwright-core/lib/client/frame.js +409 -0
  277. data/node/node_modules/playwright-core/lib/client/harRouter.js +87 -0
  278. data/node/node_modules/playwright-core/lib/client/input.js +84 -0
  279. data/node/node_modules/playwright-core/lib/client/jsHandle.js +109 -0
  280. data/node/node_modules/playwright-core/lib/client/jsonPipe.js +39 -0
  281. data/node/node_modules/playwright-core/lib/client/localUtils.js +60 -0
  282. data/node/node_modules/playwright-core/lib/client/locator.js +369 -0
  283. data/node/node_modules/playwright-core/lib/client/network.js +747 -0
  284. data/node/node_modules/playwright-core/lib/client/page.js +745 -0
  285. data/node/node_modules/playwright-core/lib/client/pageAgent.js +64 -0
  286. data/node/node_modules/playwright-core/lib/client/platform.js +77 -0
  287. data/node/node_modules/playwright-core/lib/client/playwright.js +71 -0
  288. data/node/node_modules/playwright-core/lib/client/selectors.js +55 -0
  289. data/node/node_modules/playwright-core/lib/client/stream.js +39 -0
  290. data/node/node_modules/playwright-core/lib/client/timeoutSettings.js +79 -0
  291. data/node/node_modules/playwright-core/lib/client/tracing.js +119 -0
  292. data/node/node_modules/playwright-core/lib/client/types.js +28 -0
  293. data/node/node_modules/playwright-core/lib/client/video.js +59 -0
  294. data/node/node_modules/playwright-core/lib/client/waiter.js +142 -0
  295. data/node/node_modules/playwright-core/lib/client/webError.js +39 -0
  296. data/node/node_modules/playwright-core/lib/client/webSocket.js +93 -0
  297. data/node/node_modules/playwright-core/lib/client/worker.js +85 -0
  298. data/node/node_modules/playwright-core/lib/client/writableStream.js +39 -0
  299. data/node/node_modules/playwright-core/lib/generated/bindingsControllerSource.js +28 -0
  300. data/node/node_modules/playwright-core/lib/generated/clockSource.js +28 -0
  301. data/node/node_modules/playwright-core/lib/generated/injectedScriptSource.js +28 -0
  302. data/node/node_modules/playwright-core/lib/generated/pollingRecorderSource.js +28 -0
  303. data/node/node_modules/playwright-core/lib/generated/storageScriptSource.js +28 -0
  304. data/node/node_modules/playwright-core/lib/generated/utilityScriptSource.js +28 -0
  305. data/node/node_modules/playwright-core/lib/generated/webSocketMockSource.js +336 -0
  306. data/node/node_modules/playwright-core/lib/inProcessFactory.js +60 -0
  307. data/node/node_modules/playwright-core/lib/inprocess.js +3 -0
  308. data/node/node_modules/playwright-core/lib/mcpBundle.js +84 -0
  309. data/node/node_modules/playwright-core/lib/mcpBundleImpl/index.js +147 -0
  310. data/node/node_modules/playwright-core/lib/outofprocess.js +76 -0
  311. data/node/node_modules/playwright-core/lib/protocol/serializers.js +197 -0
  312. data/node/node_modules/playwright-core/lib/protocol/validator.js +2969 -0
  313. data/node/node_modules/playwright-core/lib/protocol/validatorPrimitives.js +193 -0
  314. data/node/node_modules/playwright-core/lib/remote/playwrightConnection.js +129 -0
  315. data/node/node_modules/playwright-core/lib/remote/playwrightServer.js +334 -0
  316. data/node/node_modules/playwright-core/lib/server/agent/actionRunner.js +335 -0
  317. data/node/node_modules/playwright-core/lib/server/agent/actions.js +128 -0
  318. data/node/node_modules/playwright-core/lib/server/agent/codegen.js +111 -0
  319. data/node/node_modules/playwright-core/lib/server/agent/context.js +150 -0
  320. data/node/node_modules/playwright-core/lib/server/agent/expectTools.js +156 -0
  321. data/node/node_modules/playwright-core/lib/server/agent/pageAgent.js +204 -0
  322. data/node/node_modules/playwright-core/lib/server/agent/performTools.js +262 -0
  323. data/node/node_modules/playwright-core/lib/server/agent/tool.js +109 -0
  324. data/node/node_modules/playwright-core/lib/server/android/android.js +465 -0
  325. data/node/node_modules/playwright-core/lib/server/android/backendAdb.js +177 -0
  326. data/node/node_modules/playwright-core/lib/server/artifact.js +127 -0
  327. data/node/node_modules/playwright-core/lib/server/bidi/bidiBrowser.js +549 -0
  328. data/node/node_modules/playwright-core/lib/server/bidi/bidiChromium.js +148 -0
  329. data/node/node_modules/playwright-core/lib/server/bidi/bidiConnection.js +213 -0
  330. data/node/node_modules/playwright-core/lib/server/bidi/bidiDeserializer.js +116 -0
  331. data/node/node_modules/playwright-core/lib/server/bidi/bidiExecutionContext.js +267 -0
  332. data/node/node_modules/playwright-core/lib/server/bidi/bidiFirefox.js +128 -0
  333. data/node/node_modules/playwright-core/lib/server/bidi/bidiInput.js +146 -0
  334. data/node/node_modules/playwright-core/lib/server/bidi/bidiNetworkManager.js +383 -0
  335. data/node/node_modules/playwright-core/lib/server/bidi/bidiOverCdp.js +102 -0
  336. data/node/node_modules/playwright-core/lib/server/bidi/bidiPage.js +583 -0
  337. data/node/node_modules/playwright-core/lib/server/bidi/bidiPdf.js +106 -0
  338. data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiCommands.d.js +22 -0
  339. data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiKeyboard.js +256 -0
  340. data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocol.js +24 -0
  341. data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocolCore.js +180 -0
  342. data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiProtocolPermissions.js +42 -0
  343. data/node/node_modules/playwright-core/lib/server/bidi/third_party/bidiSerializer.js +148 -0
  344. data/node/node_modules/playwright-core/lib/server/bidi/third_party/firefoxPrefs.js +259 -0
  345. data/node/node_modules/playwright-core/lib/server/browser.js +149 -0
  346. data/node/node_modules/playwright-core/lib/server/browserContext.js +702 -0
  347. data/node/node_modules/playwright-core/lib/server/browserType.js +336 -0
  348. data/node/node_modules/playwright-core/lib/server/callLog.js +82 -0
  349. data/node/node_modules/playwright-core/lib/server/chromium/appIcon.png +0 -0
  350. data/node/node_modules/playwright-core/lib/server/chromium/chromium.js +395 -0
  351. data/node/node_modules/playwright-core/lib/server/chromium/chromiumSwitches.js +104 -0
  352. data/node/node_modules/playwright-core/lib/server/chromium/crBrowser.js +511 -0
  353. data/node/node_modules/playwright-core/lib/server/chromium/crConnection.js +197 -0
  354. data/node/node_modules/playwright-core/lib/server/chromium/crCoverage.js +235 -0
  355. data/node/node_modules/playwright-core/lib/server/chromium/crDevTools.js +111 -0
  356. data/node/node_modules/playwright-core/lib/server/chromium/crDragDrop.js +131 -0
  357. data/node/node_modules/playwright-core/lib/server/chromium/crExecutionContext.js +146 -0
  358. data/node/node_modules/playwright-core/lib/server/chromium/crInput.js +187 -0
  359. data/node/node_modules/playwright-core/lib/server/chromium/crNetworkManager.js +707 -0
  360. data/node/node_modules/playwright-core/lib/server/chromium/crPage.js +1001 -0
  361. data/node/node_modules/playwright-core/lib/server/chromium/crPdf.js +121 -0
  362. data/node/node_modules/playwright-core/lib/server/chromium/crProtocolHelper.js +145 -0
  363. data/node/node_modules/playwright-core/lib/server/chromium/crServiceWorker.js +136 -0
  364. data/node/node_modules/playwright-core/lib/server/chromium/defaultFontFamilies.js +162 -0
  365. data/node/node_modules/playwright-core/lib/server/chromium/protocol.d.js +16 -0
  366. data/node/node_modules/playwright-core/lib/server/clock.js +149 -0
  367. data/node/node_modules/playwright-core/lib/server/codegen/csharp.js +327 -0
  368. data/node/node_modules/playwright-core/lib/server/codegen/java.js +274 -0
  369. data/node/node_modules/playwright-core/lib/server/codegen/javascript.js +247 -0
  370. data/node/node_modules/playwright-core/lib/server/codegen/jsonl.js +52 -0
  371. data/node/node_modules/playwright-core/lib/server/codegen/language.js +132 -0
  372. data/node/node_modules/playwright-core/lib/server/codegen/languages.js +68 -0
  373. data/node/node_modules/playwright-core/lib/server/codegen/python.js +279 -0
  374. data/node/node_modules/playwright-core/lib/server/codegen/types.js +16 -0
  375. data/node/node_modules/playwright-core/lib/server/console.js +57 -0
  376. data/node/node_modules/playwright-core/lib/server/cookieStore.js +206 -0
  377. data/node/node_modules/playwright-core/lib/server/debugController.js +191 -0
  378. data/node/node_modules/playwright-core/lib/server/debugger.js +119 -0
  379. data/node/node_modules/playwright-core/lib/server/deviceDescriptors.js +39 -0
  380. data/node/node_modules/playwright-core/lib/server/deviceDescriptorsSource.json +1779 -0
  381. data/node/node_modules/playwright-core/lib/server/dialog.js +116 -0
  382. data/node/node_modules/playwright-core/lib/server/dispatchers/androidDispatcher.js +325 -0
  383. data/node/node_modules/playwright-core/lib/server/dispatchers/artifactDispatcher.js +118 -0
  384. data/node/node_modules/playwright-core/lib/server/dispatchers/browserContextDispatcher.js +384 -0
  385. data/node/node_modules/playwright-core/lib/server/dispatchers/browserDispatcher.js +118 -0
  386. data/node/node_modules/playwright-core/lib/server/dispatchers/browserTypeDispatcher.js +64 -0
  387. data/node/node_modules/playwright-core/lib/server/dispatchers/cdpSessionDispatcher.js +44 -0
  388. data/node/node_modules/playwright-core/lib/server/dispatchers/debugControllerDispatcher.js +78 -0
  389. data/node/node_modules/playwright-core/lib/server/dispatchers/dialogDispatcher.js +47 -0
  390. data/node/node_modules/playwright-core/lib/server/dispatchers/dispatcher.js +364 -0
  391. data/node/node_modules/playwright-core/lib/server/dispatchers/electronDispatcher.js +89 -0
  392. data/node/node_modules/playwright-core/lib/server/dispatchers/elementHandlerDispatcher.js +181 -0
  393. data/node/node_modules/playwright-core/lib/server/dispatchers/frameDispatcher.js +227 -0
  394. data/node/node_modules/playwright-core/lib/server/dispatchers/jsHandleDispatcher.js +85 -0
  395. data/node/node_modules/playwright-core/lib/server/dispatchers/jsonPipeDispatcher.js +58 -0
  396. data/node/node_modules/playwright-core/lib/server/dispatchers/localUtilsDispatcher.js +149 -0
  397. data/node/node_modules/playwright-core/lib/server/dispatchers/networkDispatchers.js +213 -0
  398. data/node/node_modules/playwright-core/lib/server/dispatchers/pageAgentDispatcher.js +96 -0
  399. data/node/node_modules/playwright-core/lib/server/dispatchers/pageDispatcher.js +393 -0
  400. data/node/node_modules/playwright-core/lib/server/dispatchers/playwrightDispatcher.js +108 -0
  401. data/node/node_modules/playwright-core/lib/server/dispatchers/streamDispatcher.js +67 -0
  402. data/node/node_modules/playwright-core/lib/server/dispatchers/tracingDispatcher.js +68 -0
  403. data/node/node_modules/playwright-core/lib/server/dispatchers/webSocketRouteDispatcher.js +165 -0
  404. data/node/node_modules/playwright-core/lib/server/dispatchers/writableStreamDispatcher.js +79 -0
  405. data/node/node_modules/playwright-core/lib/server/dom.js +815 -0
  406. data/node/node_modules/playwright-core/lib/server/download.js +70 -0
  407. data/node/node_modules/playwright-core/lib/server/electron/electron.js +273 -0
  408. data/node/node_modules/playwright-core/lib/server/electron/loader.js +29 -0
  409. data/node/node_modules/playwright-core/lib/server/errors.js +69 -0
  410. data/node/node_modules/playwright-core/lib/server/fetch.js +621 -0
  411. data/node/node_modules/playwright-core/lib/server/fileChooser.js +43 -0
  412. data/node/node_modules/playwright-core/lib/server/fileUploadUtils.js +84 -0
  413. data/node/node_modules/playwright-core/lib/server/firefox/ffBrowser.js +418 -0
  414. data/node/node_modules/playwright-core/lib/server/firefox/ffConnection.js +142 -0
  415. data/node/node_modules/playwright-core/lib/server/firefox/ffExecutionContext.js +150 -0
  416. data/node/node_modules/playwright-core/lib/server/firefox/ffInput.js +159 -0
  417. data/node/node_modules/playwright-core/lib/server/firefox/ffNetworkManager.js +256 -0
  418. data/node/node_modules/playwright-core/lib/server/firefox/ffPage.js +497 -0
  419. data/node/node_modules/playwright-core/lib/server/firefox/firefox.js +114 -0
  420. data/node/node_modules/playwright-core/lib/server/firefox/protocol.d.js +16 -0
  421. data/node/node_modules/playwright-core/lib/server/formData.js +147 -0
  422. data/node/node_modules/playwright-core/lib/server/frameSelectors.js +160 -0
  423. data/node/node_modules/playwright-core/lib/server/frames.js +1471 -0
  424. data/node/node_modules/playwright-core/lib/server/har/harRecorder.js +147 -0
  425. data/node/node_modules/playwright-core/lib/server/har/harTracer.js +607 -0
  426. data/node/node_modules/playwright-core/lib/server/harBackend.js +157 -0
  427. data/node/node_modules/playwright-core/lib/server/helper.js +96 -0
  428. data/node/node_modules/playwright-core/lib/server/index.js +58 -0
  429. data/node/node_modules/playwright-core/lib/server/input.js +277 -0
  430. data/node/node_modules/playwright-core/lib/server/instrumentation.js +72 -0
  431. data/node/node_modules/playwright-core/lib/server/javascript.js +291 -0
  432. data/node/node_modules/playwright-core/lib/server/launchApp.js +128 -0
  433. data/node/node_modules/playwright-core/lib/server/localUtils.js +214 -0
  434. data/node/node_modules/playwright-core/lib/server/macEditingCommands.js +143 -0
  435. data/node/node_modules/playwright-core/lib/server/network.js +667 -0
  436. data/node/node_modules/playwright-core/lib/server/page.js +830 -0
  437. data/node/node_modules/playwright-core/lib/server/pipeTransport.js +89 -0
  438. data/node/node_modules/playwright-core/lib/server/playwright.js +69 -0
  439. data/node/node_modules/playwright-core/lib/server/progress.js +132 -0
  440. data/node/node_modules/playwright-core/lib/server/protocolError.js +52 -0
  441. data/node/node_modules/playwright-core/lib/server/recorder/chat.js +161 -0
  442. data/node/node_modules/playwright-core/lib/server/recorder/recorderApp.js +366 -0
  443. data/node/node_modules/playwright-core/lib/server/recorder/recorderRunner.js +138 -0
  444. data/node/node_modules/playwright-core/lib/server/recorder/recorderSignalProcessor.js +83 -0
  445. data/node/node_modules/playwright-core/lib/server/recorder/recorderUtils.js +157 -0
  446. data/node/node_modules/playwright-core/lib/server/recorder/throttledFile.js +57 -0
  447. data/node/node_modules/playwright-core/lib/server/recorder.js +499 -0
  448. data/node/node_modules/playwright-core/lib/server/registry/browserFetcher.js +177 -0
  449. data/node/node_modules/playwright-core/lib/server/registry/dependencies.js +371 -0
  450. data/node/node_modules/playwright-core/lib/server/registry/index.js +1422 -0
  451. data/node/node_modules/playwright-core/lib/server/registry/nativeDeps.js +1280 -0
  452. data/node/node_modules/playwright-core/lib/server/registry/oopDownloadBrowserMain.js +127 -0
  453. data/node/node_modules/playwright-core/lib/server/screencast.js +190 -0
  454. data/node/node_modules/playwright-core/lib/server/screenshotter.js +333 -0
  455. data/node/node_modules/playwright-core/lib/server/selectors.js +112 -0
  456. data/node/node_modules/playwright-core/lib/server/socksClientCertificatesInterceptor.js +383 -0
  457. data/node/node_modules/playwright-core/lib/server/socksInterceptor.js +95 -0
  458. data/node/node_modules/playwright-core/lib/server/trace/recorder/snapshotter.js +147 -0
  459. data/node/node_modules/playwright-core/lib/server/trace/recorder/snapshotterInjected.js +561 -0
  460. data/node/node_modules/playwright-core/lib/server/trace/recorder/tracing.js +604 -0
  461. data/node/node_modules/playwright-core/lib/server/trace/viewer/traceParser.js +72 -0
  462. data/node/node_modules/playwright-core/lib/server/trace/viewer/traceViewer.js +245 -0
  463. data/node/node_modules/playwright-core/lib/server/transport.js +181 -0
  464. data/node/node_modules/playwright-core/lib/server/types.js +28 -0
  465. data/node/node_modules/playwright-core/lib/server/usKeyboardLayout.js +145 -0
  466. data/node/node_modules/playwright-core/lib/server/utils/ascii.js +44 -0
  467. data/node/node_modules/playwright-core/lib/server/utils/comparators.js +139 -0
  468. data/node/node_modules/playwright-core/lib/server/utils/crypto.js +216 -0
  469. data/node/node_modules/playwright-core/lib/server/utils/debug.js +42 -0
  470. data/node/node_modules/playwright-core/lib/server/utils/debugLogger.js +122 -0
  471. data/node/node_modules/playwright-core/lib/server/utils/env.js +73 -0
  472. data/node/node_modules/playwright-core/lib/server/utils/eventsHelper.js +39 -0
  473. data/node/node_modules/playwright-core/lib/server/utils/expectUtils.js +123 -0
  474. data/node/node_modules/playwright-core/lib/server/utils/fileUtils.js +191 -0
  475. data/node/node_modules/playwright-core/lib/server/utils/happyEyeballs.js +207 -0
  476. data/node/node_modules/playwright-core/lib/server/utils/hostPlatform.js +123 -0
  477. data/node/node_modules/playwright-core/lib/server/utils/httpServer.js +203 -0
  478. data/node/node_modules/playwright-core/lib/server/utils/imageUtils.js +141 -0
  479. data/node/node_modules/playwright-core/lib/server/utils/image_tools/colorUtils.js +89 -0
  480. data/node/node_modules/playwright-core/lib/server/utils/image_tools/compare.js +109 -0
  481. data/node/node_modules/playwright-core/lib/server/utils/image_tools/imageChannel.js +78 -0
  482. data/node/node_modules/playwright-core/lib/server/utils/image_tools/stats.js +102 -0
  483. data/node/node_modules/playwright-core/lib/server/utils/linuxUtils.js +71 -0
  484. data/node/node_modules/playwright-core/lib/server/utils/network.js +242 -0
  485. data/node/node_modules/playwright-core/lib/server/utils/nodePlatform.js +154 -0
  486. data/node/node_modules/playwright-core/lib/server/utils/pipeTransport.js +84 -0
  487. data/node/node_modules/playwright-core/lib/server/utils/processLauncher.js +241 -0
  488. data/node/node_modules/playwright-core/lib/server/utils/profiler.js +65 -0
  489. data/node/node_modules/playwright-core/lib/server/utils/socksProxy.js +511 -0
  490. data/node/node_modules/playwright-core/lib/server/utils/spawnAsync.js +41 -0
  491. data/node/node_modules/playwright-core/lib/server/utils/task.js +51 -0
  492. data/node/node_modules/playwright-core/lib/server/utils/userAgent.js +98 -0
  493. data/node/node_modules/playwright-core/lib/server/utils/wsServer.js +121 -0
  494. data/node/node_modules/playwright-core/lib/server/utils/zipFile.js +74 -0
  495. data/node/node_modules/playwright-core/lib/server/utils/zones.js +57 -0
  496. data/node/node_modules/playwright-core/lib/server/videoRecorder.js +124 -0
  497. data/node/node_modules/playwright-core/lib/server/webkit/protocol.d.js +16 -0
  498. data/node/node_modules/playwright-core/lib/server/webkit/webkit.js +108 -0
  499. data/node/node_modules/playwright-core/lib/server/webkit/wkBrowser.js +335 -0
  500. data/node/node_modules/playwright-core/lib/server/webkit/wkConnection.js +144 -0
  501. data/node/node_modules/playwright-core/lib/server/webkit/wkExecutionContext.js +154 -0
  502. data/node/node_modules/playwright-core/lib/server/webkit/wkInput.js +181 -0
  503. data/node/node_modules/playwright-core/lib/server/webkit/wkInterceptableRequest.js +197 -0
  504. data/node/node_modules/playwright-core/lib/server/webkit/wkPage.js +1158 -0
  505. data/node/node_modules/playwright-core/lib/server/webkit/wkProvisionalPage.js +83 -0
  506. data/node/node_modules/playwright-core/lib/server/webkit/wkWorkers.js +105 -0
  507. data/node/node_modules/playwright-core/lib/third_party/pixelmatch.js +255 -0
  508. data/node/node_modules/playwright-core/lib/utils/isomorphic/ariaSnapshot.js +455 -0
  509. data/node/node_modules/playwright-core/lib/utils/isomorphic/assert.js +31 -0
  510. data/node/node_modules/playwright-core/lib/utils/isomorphic/colors.js +72 -0
  511. data/node/node_modules/playwright-core/lib/utils/isomorphic/cssParser.js +245 -0
  512. data/node/node_modules/playwright-core/lib/utils/isomorphic/cssTokenizer.js +1051 -0
  513. data/node/node_modules/playwright-core/lib/utils/isomorphic/headers.js +53 -0
  514. data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorGenerators.js +689 -0
  515. data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorParser.js +176 -0
  516. data/node/node_modules/playwright-core/lib/utils/isomorphic/locatorUtils.js +81 -0
  517. data/node/node_modules/playwright-core/lib/utils/isomorphic/lruCache.js +51 -0
  518. data/node/node_modules/playwright-core/lib/utils/isomorphic/manualPromise.js +114 -0
  519. data/node/node_modules/playwright-core/lib/utils/isomorphic/mimeType.js +459 -0
  520. data/node/node_modules/playwright-core/lib/utils/isomorphic/multimap.js +80 -0
  521. data/node/node_modules/playwright-core/lib/utils/isomorphic/protocolFormatter.js +81 -0
  522. data/node/node_modules/playwright-core/lib/utils/isomorphic/protocolMetainfo.js +330 -0
  523. data/node/node_modules/playwright-core/lib/utils/isomorphic/rtti.js +43 -0
  524. data/node/node_modules/playwright-core/lib/utils/isomorphic/selectorParser.js +386 -0
  525. data/node/node_modules/playwright-core/lib/utils/isomorphic/semaphore.js +54 -0
  526. data/node/node_modules/playwright-core/lib/utils/isomorphic/stackTrace.js +158 -0
  527. data/node/node_modules/playwright-core/lib/utils/isomorphic/stringUtils.js +204 -0
  528. data/node/node_modules/playwright-core/lib/utils/isomorphic/time.js +49 -0
  529. data/node/node_modules/playwright-core/lib/utils/isomorphic/timeoutRunner.js +66 -0
  530. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/entries.js +16 -0
  531. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotRenderer.js +499 -0
  532. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotServer.js +120 -0
  533. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/snapshotStorage.js +89 -0
  534. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceLoader.js +131 -0
  535. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceModel.js +365 -0
  536. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/traceModernizer.js +400 -0
  537. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV3.js +16 -0
  538. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV4.js +16 -0
  539. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV5.js +16 -0
  540. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV6.js +16 -0
  541. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV7.js +16 -0
  542. data/node/node_modules/playwright-core/lib/utils/isomorphic/trace/versions/traceV8.js +16 -0
  543. data/node/node_modules/playwright-core/lib/utils/isomorphic/traceUtils.js +58 -0
  544. data/node/node_modules/playwright-core/lib/utils/isomorphic/types.js +16 -0
  545. data/node/node_modules/playwright-core/lib/utils/isomorphic/urlMatch.js +190 -0
  546. data/node/node_modules/playwright-core/lib/utils/isomorphic/utilityScriptSerializers.js +251 -0
  547. data/node/node_modules/playwright-core/lib/utils/isomorphic/yaml.js +84 -0
  548. data/node/node_modules/playwright-core/lib/utils.js +111 -0
  549. data/node/node_modules/playwright-core/lib/utilsBundle.js +109 -0
  550. data/node/node_modules/playwright-core/lib/utilsBundleImpl/index.js +218 -0
  551. data/node/node_modules/playwright-core/lib/utilsBundleImpl/xdg-open +1066 -0
  552. data/node/node_modules/playwright-core/lib/vite/htmlReport/index.html +84 -0
  553. data/node/node_modules/playwright-core/lib/vite/recorder/assets/codeMirrorModule-DYBRYzYX.css +1 -0
  554. data/node/node_modules/playwright-core/lib/vite/recorder/assets/codeMirrorModule-DadYNm1I.js +32 -0
  555. data/node/node_modules/playwright-core/lib/vite/recorder/assets/codicon-DCmgc-ay.ttf +0 -0
  556. data/node/node_modules/playwright-core/lib/vite/recorder/assets/index-BSjZa4pk.css +1 -0
  557. data/node/node_modules/playwright-core/lib/vite/recorder/assets/index-BhTWtUlo.js +193 -0
  558. data/node/node_modules/playwright-core/lib/vite/recorder/index.html +29 -0
  559. data/node/node_modules/playwright-core/lib/vite/recorder/playwright-logo.svg +9 -0
  560. data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/codeMirrorModule-a5XoALAZ.js +32 -0
  561. data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/defaultSettingsView-CJSZINFr.js +266 -0
  562. data/node/node_modules/playwright-core/lib/vite/traceViewer/assets/xtermModule-CsJ4vdCR.js +9 -0
  563. data/node/node_modules/playwright-core/lib/vite/traceViewer/codeMirrorModule.DYBRYzYX.css +1 -0
  564. data/node/node_modules/playwright-core/lib/vite/traceViewer/codicon.DCmgc-ay.ttf +0 -0
  565. data/node/node_modules/playwright-core/lib/vite/traceViewer/defaultSettingsView.7ch9cixO.css +1 -0
  566. data/node/node_modules/playwright-core/lib/vite/traceViewer/index.BVu7tZDe.css +1 -0
  567. data/node/node_modules/playwright-core/lib/vite/traceViewer/index.Bk2uYQRV.js +2 -0
  568. data/node/node_modules/playwright-core/lib/vite/traceViewer/index.html +43 -0
  569. data/node/node_modules/playwright-core/lib/vite/traceViewer/manifest.webmanifest +16 -0
  570. data/node/node_modules/playwright-core/lib/vite/traceViewer/playwright-logo.svg +9 -0
  571. data/node/node_modules/playwright-core/lib/vite/traceViewer/snapshot.html +21 -0
  572. data/node/node_modules/playwright-core/lib/vite/traceViewer/sw.bundle.js +5 -0
  573. data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.Btcz36p_.css +1 -0
  574. data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.CQJ9SCIQ.js +5 -0
  575. data/node/node_modules/playwright-core/lib/vite/traceViewer/uiMode.html +17 -0
  576. data/node/node_modules/playwright-core/lib/vite/traceViewer/xtermModule.DYP7pi_n.css +32 -0
  577. data/node/node_modules/playwright-core/lib/zipBundle.js +34 -0
  578. data/node/node_modules/playwright-core/lib/zipBundleImpl.js +5 -0
  579. data/node/node_modules/playwright-core/package.json +43 -0
  580. data/node/node_modules/playwright-core/types/protocol.d.ts +23824 -0
  581. data/node/node_modules/playwright-core/types/structs.d.ts +45 -0
  582. data/node/node_modules/playwright-core/types/types.d.ts +22843 -0
  583. data/node/package-lock.json +72 -0
  584. data/node/package.json +14 -0
  585. data/node/src/index.js +215 -0
  586. data/rubycrawl.gemspec +29 -0
  587. data/spec/rubycrawl_spec.rb +51 -0
  588. data/spec/spec_helper.rb +11 -0
  589. metadata +645 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f463d9ba6ffa83c283954dd411a08dc0184ed065128f57da625b9c349447b77a
4
+ data.tar.gz: cc8adb28596fe65e54f18ec97d83152c9a8df6d38c3cf584c359f2a2230e6048
5
+ SHA512:
6
+ metadata.gz: 98c20cc8a1ff17df7a830e93f6aa49e5c630c7d43533e516648b4c4fdc301c7e733ab9aba6502d6de7bb5b5f1afe40f037d4fad59e77051322780dba5c575fa2
7
+ data.tar.gz: a789dea3bfbd3c63dc8d364da49b38904675d1311dc143dacbb0cf58631a0e8d59d8b3484148dde85b6efb03f8ec3caf6026e209239486219c7f55a7c955ff5c
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gemspec
6
+
7
+ group :development do
8
+ gem 'rake', '>= 13.0'
9
+ gem 'rspec', '>= 3.12'
10
+ gem 'rubocop', '>= 1.50'
11
+ end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 RubyCrawl
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,585 @@
1
+ # rubycrawl
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rubycrawl.svg)](https://badge.fury.io/rb/rubycrawl)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+
6
+ **Playwright-based web crawler for Ruby** — Inspired by [crawl4ai](https://github.com/unclecode/crawl4ai) (Python), designed idiomatically for Ruby with production-ready features.
7
+
8
+ RubyCrawl provides accurate, JavaScript-enabled web scraping using Playwright's battle-tested browser automation, wrapped in a clean Ruby API. Perfect for extracting content from modern SPAs and dynamic websites.
9
+
10
+ ## Features
11
+
12
+ - **Playwright-powered**: Real browser automation for JavaScript-heavy sites
13
+ - **Production-ready**: Designed for Rails apps and production environments
14
+ - **Simple API**: Clean, minimal Ruby interface — zero Playwright knowledge required
15
+ - **Resource optimization**: Built-in resource blocking for faster crawls
16
+ - **Auto-managed browsers**: Browser process reuse and automatic lifecycle management
17
+ - **Content extraction**: HTML, links, and Markdown conversion
18
+ - **Multi-page crawling**: BFS crawler with depth limits and deduplication
19
+ - **Rails integration**: First-class Rails support with generators and initializers
20
+
21
+ ## Table of Contents
22
+
23
+ - [Installation](#installation)
24
+ - [Quick Start](#quick-start)
25
+ - [Usage](#usage)
26
+ - [Basic Crawling](#basic-crawling)
27
+ - [Multi-Page Crawling](#multi-page-crawling)
28
+ - [Configuration](#configuration)
29
+ - [Result Object](#result-object)
30
+ - [Rails Integration](#rails-integration)
31
+ - [Production Deployment](#production-deployment)
32
+ - [Architecture](#architecture)
33
+ - [Performance](#performance)
34
+ - [Development](#development)
35
+ - [Contributing](#contributing)
36
+ - [License](#license)
37
+
38
+ ## Installation
39
+
40
+ ### Requirements
41
+
42
+ - **Ruby** >= 3.0
43
+ - **Node.js** LTS (v18+ recommended) — required for the bundled Playwright service
44
+
45
+ ### Add to Gemfile
46
+
47
+ ```ruby
48
+ gem "rubycrawl"
49
+ ```
50
+
51
+ Then install:
52
+
53
+ ```bash
54
+ bundle install
55
+ ```
56
+
57
+ ### Install Playwright browsers
58
+
59
+ After bundling, install the Playwright browsers:
60
+
61
+ ```bash
62
+ bundle exec rake rubycrawl:install
63
+ ```
64
+
65
+ This command:
66
+
67
+ - Installs Node.js dependencies in the bundled `node/` directory
68
+ - Downloads Playwright browsers (Chromium, Firefox, WebKit)
69
+ - Creates a Rails initializer (if using Rails)
70
+
71
+ ## Quick Start
72
+
73
+ ```ruby
74
+ require "rubycrawl"
75
+
76
+ # Simple crawl
77
+ result = RubyCrawl.crawl("https://example.com")
78
+
79
+ # Access extracted content
80
+ puts result.html # Raw HTML content
81
+ puts result.markdown # Converted to Markdown
82
+ puts result.links # Extracted links from the page
83
+ puts result.metadata # Status code, final URL, etc.
84
+ ```
85
+
86
+ ## Usage
87
+
88
+ ### Basic Crawling
89
+
90
+ The simplest way to crawl a URL:
91
+
92
+ ```ruby
93
+ result = RubyCrawl.crawl("https://example.com")
94
+
95
+ # Access the results
96
+ result.html # => "<html>...</html>"
97
+ result.markdown # => "# Example Domain\n\nThis domain is..." (lazy-loaded)
98
+ result.links # => [{ "url" => "https://...", "text" => "More info" }, ...]
99
+ result.metadata # => { "status" => 200, "final_url" => "https://example.com" }
100
+ result.text # => "" (coming soon)
101
+ ```
102
+
103
+ ### Multi-Page Crawling
104
+
105
+ Crawl an entire site following links with BFS (breadth-first search):
106
+
107
+ ```ruby
108
+ # Crawl up to 100 pages, max 3 links deep
109
+ RubyCrawl.crawl_site("https://example.com", max_pages: 100, max_depth: 3) do |page|
110
+ # Each page is yielded as it's crawled (streaming)
111
+ puts "Crawled: #{page.url} (depth: #{page.depth})"
112
+
113
+ # Save to database
114
+ Page.create!(
115
+ url: page.url,
116
+ html: page.html,
117
+ markdown: page.markdown,
118
+ depth: page.depth
119
+ )
120
+ end
121
+ ```
122
+
123
+ #### Multi-Page Options
124
+
125
+ | Option | Default | Description |
126
+ |--------|---------|-------------|
127
+ | `max_pages` | 50 | Maximum number of pages to crawl |
128
+ | `max_depth` | 3 | Maximum link depth from start URL |
129
+ | `same_host_only` | true | Only follow links on the same domain |
130
+ | `wait_until` | inherited | Page load strategy |
131
+ | `block_resources` | inherited | Block images/fonts/CSS |
132
+
133
+ #### Page Result Object
134
+
135
+ The block receives a `PageResult` with:
136
+
137
+ ```ruby
138
+ page.url # String: Final URL after redirects
139
+ page.html # String: Full HTML content
140
+ page.markdown # String: Lazy-converted Markdown
141
+ page.links # Array: URLs extracted from page
142
+ page.metadata # Hash: HTTP status, final URL, etc.
143
+ page.depth # Integer: Link depth from start URL
144
+ ```
145
+
146
+ ### Configuration
147
+
148
+ #### Global Configuration
149
+
150
+ Set default options that apply to all crawls:
151
+
152
+ ```ruby
153
+ RubyCrawl.configure(
154
+ wait_until: "networkidle", # Wait until network is idle
155
+ block_resources: true # Block images, fonts, CSS for speed
156
+ )
157
+
158
+ # All subsequent crawls use these defaults
159
+ result = RubyCrawl.crawl("https://example.com")
160
+ ```
161
+
162
+ #### Per-Request Options
163
+
164
+ Override defaults for specific requests:
165
+
166
+ ```ruby
167
+ # Use global defaults
168
+ result = RubyCrawl.crawl("https://example.com")
169
+
170
+ # Override for this request only
171
+ result = RubyCrawl.crawl(
172
+ "https://example.com",
173
+ wait_until: "domcontentloaded",
174
+ block_resources: false
175
+ )
176
+ ```
177
+
178
+ #### Configuration Options
179
+
180
+ | Option | Values | Default | Description |
181
+ | ----------------- | ----------------------------------------------- | -------- | ------------------------------------------------- |
182
+ | `wait_until` | `"load"`, `"domcontentloaded"`, `"networkidle"` | `"load"` | When to consider page loaded |
183
+ | `block_resources` | `true`, `false` | `true` | Block images, fonts, CSS, media for faster crawls |
184
+
185
+ **Wait strategies explained:**
186
+
187
+ - `load` — Wait for the load event (fastest, good for static sites)
188
+ - `domcontentloaded` — Wait for DOM ready (medium speed)
189
+ - `networkidle` — Wait until no network requests for 500ms (slowest, best for SPAs)
190
+
191
+ ### Result Object
192
+
193
+ The crawl result is a `RubyCrawl::Result` object with these attributes:
194
+
195
+ ```ruby
196
+ result = RubyCrawl.crawl("https://example.com")
197
+
198
+ result.html # String: Raw HTML content from page
199
+ result.markdown # String: Markdown conversion (lazy-loaded on first access)
200
+ result.links # Array: Extracted links with url and text
201
+ result.text # String: Plain text (coming soon)
202
+ result.metadata # Hash: Comprehensive metadata (see below)
203
+ ```
204
+
205
+ #### Links Format
206
+
207
+ ```ruby
208
+ result.links
209
+ # => [
210
+ # { "url" => "https://example.com/about", "text" => "About Us" },
211
+ # { "url" => "https://example.com/contact", "text" => "Contact" },
212
+ # ...
213
+ # ]
214
+ ```
215
+
216
+ #### Markdown Conversion
217
+
218
+ Markdown is **lazy-loaded** — conversion only happens when you access `.markdown`:
219
+
220
+ ```ruby
221
+ result = RubyCrawl.crawl(url)
222
+ result.html # ✅ No overhead
223
+ result.markdown # ⬅️ Conversion happens here (first call only)
224
+ result.markdown # ✅ Cached, instant
225
+ ```
226
+
227
+ Uses [reverse_markdown](https://github.com/xijo/reverse_markdown) with GitHub-flavored output.
228
+
229
+ #### Metadata Fields
230
+
231
+ The `metadata` hash includes HTTP and HTML metadata:
232
+
233
+ ```ruby
234
+ result.metadata
235
+ # => {
236
+ # "status" => 200, # HTTP status code
237
+ # "final_url" => "https://...", # Final URL after redirects
238
+ # "title" => "Page Title", # <title> tag
239
+ # "description" => "...", # Meta description
240
+ # "keywords" => "ruby, web", # Meta keywords
241
+ # "author" => "Author Name", # Meta author
242
+ # "og_title" => "...", # Open Graph title
243
+ # "og_description" => "...", # Open Graph description
244
+ # "og_image" => "https://...", # Open Graph image
245
+ # "og_url" => "https://...", # Open Graph URL
246
+ # "og_type" => "website", # Open Graph type
247
+ # "twitter_card" => "summary", # Twitter card type
248
+ # "twitter_title" => "...", # Twitter title
249
+ # "twitter_description" => "...", # Twitter description
250
+ # "twitter_image" => "https://...",# Twitter image
251
+ # "canonical" => "https://...", # Canonical URL
252
+ # "lang" => "en", # Page language
253
+ # "charset" => "UTF-8" # Character encoding
254
+ # }
255
+ ```
256
+
257
+ Note: All HTML metadata fields may be `null` if not present on the page.
258
+
259
+ ### Error Handling
260
+
261
+ RubyCrawl provides specific exception classes for different error scenarios:
262
+
263
+ ```ruby
264
+ begin
265
+ result = RubyCrawl.crawl(url)
266
+ rescue RubyCrawl::ConfigurationError => e
267
+ # Invalid URL or configuration
268
+ puts "Configuration error: #{e.message}"
269
+ rescue RubyCrawl::TimeoutError => e
270
+ # Page load timeout or network timeout
271
+ puts "Timeout: #{e.message}"
272
+ rescue RubyCrawl::NavigationError => e
273
+ # Page navigation failed (404, DNS error, SSL error, etc.)
274
+ puts "Navigation failed: #{e.message}"
275
+ rescue RubyCrawl::ServiceError => e
276
+ # Node service unavailable or crashed
277
+ puts "Service error: #{e.message}"
278
+ rescue RubyCrawl::Error => e
279
+ # Catch-all for any RubyCrawl error
280
+ puts "Crawl error: #{e.message}"
281
+ end
282
+ ```
283
+
284
+ **Exception Hierarchy:**
285
+ - `RubyCrawl::Error` (base class)
286
+ - `RubyCrawl::ConfigurationError` - Invalid URL or configuration
287
+ - `RubyCrawl::TimeoutError` - Timeout during crawl
288
+ - `RubyCrawl::NavigationError` - Page navigation failed
289
+ - `RubyCrawl::ServiceError` - Node service issues
290
+
291
+ **Automatic Retry:** RubyCrawl automatically retries transient failures (service errors, timeouts) up to 3 times with exponential backoff (2s, 4s, 8s). Configure with:
292
+
293
+ ```ruby
294
+ RubyCrawl.configure(max_retries: 5)
295
+ # or per-request
296
+ RubyCrawl.crawl(url, retries: 1) # Disable retry
297
+ ```
298
+
299
+ ## Rails Integration
300
+
301
+ ### Installation
302
+
303
+ Run the installer in your Rails app:
304
+
305
+ ```bash
306
+ bundle exec rake rubycrawl:install
307
+ ```
308
+
309
+ This creates `config/initializers/rubycrawl.rb`:
310
+
311
+ ```ruby
312
+ # frozen_string_literal: true
313
+
314
+ # rubycrawl default configuration
315
+ RubyCrawl.configure(
316
+ wait_until: "load",
317
+ block_resources: true
318
+ )
319
+ ```
320
+
321
+ ### Usage in Rails
322
+
323
+ ```ruby
324
+ # In a controller, service, or background job
325
+ class ContentScraperJob < ApplicationJob
326
+ def perform(url)
327
+ result = RubyCrawl.crawl(url)
328
+
329
+ # Save to database
330
+ ScrapedContent.create!(
331
+ url: url,
332
+ html: result.html,
333
+ status: result.metadata[:status]
334
+ )
335
+ end
336
+ end
337
+ ```
338
+
339
+ ## Production Deployment
340
+
341
+ ### Pre-deployment Checklist
342
+
343
+ 1. **Install Node.js** on your production servers (LTS version recommended)
344
+ 2. **Run installer** during deployment:
345
+ ```bash
346
+ bundle exec rake rubycrawl:install
347
+ ```
348
+ 3. **Set environment variables** (optional):
349
+ ```bash
350
+ export RUBYCRAWL_NODE_BIN=/usr/bin/node # Custom Node.js path
351
+ export RUBYCRAWL_NODE_LOG=/var/log/rubycrawl.log # Service logs
352
+ ```
353
+
354
+ ### Docker Example
355
+
356
+ ```dockerfile
357
+ FROM ruby:3.2
358
+
359
+ # Install Node.js LTS
360
+ RUN curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - \
361
+ && apt-get install -y nodejs
362
+
363
+ # Install system dependencies for Playwright
364
+ RUN npx playwright install-deps
365
+
366
+ WORKDIR /app
367
+ COPY Gemfile* ./
368
+ RUN bundle install
369
+
370
+ # Install Playwright browsers
371
+ RUN bundle exec rake rubycrawl:install
372
+
373
+ COPY . .
374
+ CMD ["rails", "server"]
375
+ ```
376
+
377
+ ### Heroku Deployment
378
+
379
+ Add the Node.js buildpack:
380
+
381
+ ```bash
382
+ heroku buildpacks:add heroku/nodejs
383
+ heroku buildpacks:add heroku/ruby
384
+ ```
385
+
386
+ Add to `package.json` in your Rails root:
387
+
388
+ ```json
389
+ {
390
+ "engines": {
391
+ "node": "18.x"
392
+ }
393
+ }
394
+ ```
395
+
396
+ ### Performance Tips
397
+
398
+ - **Reuse instances**: Use the class-level `RubyCrawl.crawl` method (recommended) rather than creating new instances
399
+ - **Resource blocking**: Keep `block_resources: true` for 2-3x faster crawls when you don't need images/CSS
400
+ - **Concurrency**: Use background jobs (Sidekiq, etc.) for parallel crawling
401
+ - **Browser reuse**: The first crawl is slower due to browser launch; subsequent crawls reuse the process
402
+
403
+ ## Architecture
404
+
405
+ RubyCrawl uses a **dual-process architecture**:
406
+
407
+ ```
408
+ ┌─────────────────────────────────────────────┐
409
+ │ Ruby Process (Your Application) │
410
+ │ ┌─────────────────────────────────────┐ │
411
+ │ │ RubyCrawl Gem │ │
412
+ │ │ • Public API │ │
413
+ │ │ • Result normalization │ │
414
+ │ │ • Error handling │ │
415
+ │ └────────────┬────────────────────────┘ │
416
+ └───────────────┼─────────────────────────────┘
417
+ │ HTTP/JSON (localhost:3344)
418
+ ┌───────────────┼─────────────────────────────┐
419
+ │ Node.js Process (Auto-started) │
420
+ │ ┌────────────┴────────────────────────┐ │
421
+ │ │ Playwright Service │ │
422
+ │ │ • Browser management │ │
423
+ │ │ • Page navigation │ │
424
+ │ │ • HTML extraction │ │
425
+ │ │ • Resource blocking │ │
426
+ │ └─────────────────────────────────────┘ │
427
+ └─────────────────────────────────────────────┘
428
+ ```
429
+
430
+ **Why this architecture?**
431
+
432
+ - **Separation of concerns**: Ruby handles orchestration, Node handles browsers
433
+ - **Stability**: Playwright's official Node.js bindings are most reliable
434
+ - **Performance**: Long-running browser process, reused across requests
435
+ - **Simplicity**: No C extensions, pure Ruby + bundled Node service
436
+
437
+ See [.github/copilot-instructions.md](.github/copilot-instructions.md) for detailed architecture documentation.
438
+
439
+ ## Performance
440
+
441
+ ### Benchmarks
442
+
443
+ Typical crawl times (M1 Mac, fast network):
444
+
445
+ | Page Type | First Crawl | Subsequent | Config |
446
+ | ----------- | ----------- | ---------- | --------------------------- |
447
+ | Static HTML | ~2s | ~500ms | `block_resources: true` |
448
+ | SPA (React) | ~3s | ~1.2s | `wait_until: "networkidle"` |
449
+ | Heavy site | ~4s | ~2s | `block_resources: false` |
450
+
451
+ **Note**: First crawl includes browser launch time (~1.5s). Subsequent crawls reuse the browser.
452
+
453
+ ### Optimization Tips
454
+
455
+ 1. **Enable resource blocking** for content-only extraction:
456
+
457
+ ```ruby
458
+ RubyCrawl.configure(block_resources: true)
459
+ ```
460
+
461
+ 2. **Use appropriate wait strategy**:
462
+ - Static sites: `wait_until: "load"`
463
+ - SPAs: `wait_until: "networkidle"`
464
+
465
+ 3. **Batch processing**: Use background jobs for concurrent crawling:
466
+ ```ruby
467
+ urls.each { |url| CrawlJob.perform_later(url) }
468
+ ```
469
+
470
+ ## Development
471
+
472
+ ### Setup
473
+
474
+ ```bash
475
+ git clone git@github.com:craft-wise/rubycrawl.git
476
+ cd rubycrawl
477
+ bin/setup # Installs dependencies and sets up Node service
478
+ ```
479
+
480
+ ### Running Tests
481
+
482
+ ```bash
483
+ bundle exec rspec
484
+ ```
485
+
486
+ ### Manual Testing
487
+
488
+ ```bash
489
+ # Terminal 1: Start Node service manually (optional)
490
+ cd node
491
+ npm start
492
+
493
+ # Terminal 2: Ruby console
494
+ bin/console
495
+ > result = RubyCrawl.crawl("https://example.com")
496
+ > puts result.html
497
+ ```
498
+
499
+ ### Project Structure
500
+
501
+ ```
502
+ rubycrawl/
503
+ ├── lib/
504
+ │ ├── rubycrawl.rb # Main gem entry point
505
+ │ ├── rubycrawl/
506
+ │ │ ├── version.rb # Gem version
507
+ │ │ ├── railtie.rb # Rails integration
508
+ │ │ └── tasks/
509
+ │ │ └── install.rake # Installation task
510
+ ├── node/
511
+ │ ├── src/
512
+ │ │ └── index.js # Playwright HTTP service
513
+ │ ├── package.json
514
+ │ └── README.md
515
+ ├── spec/ # RSpec tests
516
+ ├── .github/
517
+ │ └── copilot-instructions.md # GitHub Copilot guidelines
518
+ ├── CLAUDE.md # Claude AI guidelines
519
+ └── README.md
520
+ ```
521
+
522
+ ## Roadmap
523
+
524
+ ### Current (v0.1.0)
525
+
526
+ - [x] HTML extraction
527
+ - [x] Link extraction
528
+ - [x] Markdown conversion (lazy-loaded)
529
+ - [x] Multi-page crawling with BFS
530
+ - [x] URL normalization and deduplication
531
+ - [x] Basic metadata (status, final URL)
532
+ - [x] Resource blocking
533
+ - [x] Rails integration
534
+
535
+ ### Coming Soon
536
+
537
+ - [ ] Plain text extraction
538
+ - [ ] Screenshot capture
539
+ - [ ] Custom JavaScript execution
540
+ - [ ] Session/cookie support
541
+ - [ ] Proxy support
542
+ - [ ] Robots.txt support
543
+
544
+ ## Contributing
545
+
546
+ Contributions are welcome! Please read our [contribution guidelines](.github/copilot-instructions.md) first.
547
+
548
+ ### Development Philosophy
549
+
550
+ - **Simplicity over cleverness**: Prefer clear, explicit code
551
+ - **Stability over speed**: Correctness first, optimization second
552
+ - **Ruby-first**: Hide Node.js/Playwright complexity from users
553
+ - **No vendor lock-in**: Pure open source, no SaaS dependencies
554
+
555
+ ## Comparison with crawl4ai
556
+
557
+ | Feature | crawl4ai (Python) | rubycrawl (Ruby) |
558
+ | ------------------- | ----------------- | ---------------- |
559
+ | Browser automation | Playwright | Playwright |
560
+ | Language | Python | Ruby |
561
+ | LLM extraction | ✅ | Planned |
562
+ | Markdown extraction | ✅ | ✅ |
563
+ | Link extraction | ✅ | ✅ |
564
+ | Multi-page crawling | ✅ | ✅ |
565
+ | Rails integration | N/A | ✅ |
566
+ | Resource blocking | ✅ | ✅ |
567
+ | Session management | ✅ | Planned |
568
+
569
+ RubyCrawl aims to bring the same level of accuracy and reliability to the Ruby ecosystem.
570
+
571
+ ## License
572
+
573
+ The gem is available as open source under the terms of the [MIT License](LICENSE).
574
+
575
+ ## Credits
576
+
577
+ Inspired by [crawl4ai](https://github.com/unclecode/crawl4ai) by @unclecode.
578
+
579
+ Built with [Playwright](https://playwright.dev/) by Microsoft.
580
+
581
+ ## Support
582
+
583
+ - **Issues**: [GitHub Issues](https://github.com/craft-wise/rubycrawl/issues)
584
+ - **Discussions**: [GitHub Discussions](https://github.com/your-org/rubycrawl/discussions)
585
+ - **Email**: ganesh.navale@zohomail.in
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
data/bin/console ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'rubycrawl'
6
+
7
+ require 'irb'
8
+ ARGV.clear
9
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ bundle install
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RubyCrawl
4
+ # Base error class for all RubyCrawl errors
5
+ class Error < StandardError; end
6
+
7
+ # Raised when the Node.js service fails to start or is unavailable
8
+ class ServiceError < Error; end
9
+
10
+ # Raised when page navigation fails (timeout, DNS, SSL, etc.)
11
+ class NavigationError < Error; end
12
+
13
+ # Raised when a crawl operation times out
14
+ class TimeoutError < Error; end
15
+
16
+ # Raised when invalid configuration is provided
17
+ class ConfigurationError < Error; end
18
+ end