ghostcrawl 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (371) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +46 -0
  3. data/README.md +127 -0
  4. data/_generated/ghostcrawl.rb +0 -0
  5. data/_generated/ghostcrawl_client.rb +27 -0
  6. data/_generated/models/audit_event_out.rb +802 -0
  7. data/_generated/models/audit_event_out_actor_ip_member1.rb +62 -0
  8. data/_generated/models/audit_event_out_actor_token_id_member1.rb +62 -0
  9. data/_generated/models/audit_event_out_actor_user_agent_member1.rb +62 -0
  10. data/_generated/models/audit_event_out_actor_user_id_member1.rb +62 -0
  11. data/_generated/models/audit_event_out_metadata.rb +62 -0
  12. data/_generated/models/audit_event_out_org_id_member1.rb +62 -0
  13. data/_generated/models/audit_event_out_target_id_member1.rb +62 -0
  14. data/_generated/models/audit_event_out_target_kind_member1.rb +62 -0
  15. data/_generated/models/audit_events_response.rb +173 -0
  16. data/_generated/models/audit_events_response_next_cursor_member1.rb +62 -0
  17. data/_generated/models/batch_scrape_request.rb +623 -0
  18. data/_generated/models/batch_scrape_request_behavior_actions_member1.rb +62 -0
  19. data/_generated/models/batch_scrape_request_behavior_actions_member2.rb +62 -0
  20. data/_generated/models/batch_scrape_request_extraction_strategy_member1.rb +62 -0
  21. data/_generated/models/batch_scrape_request_identity_country_member1.rb +62 -0
  22. data/_generated/models/batch_scrape_request_language_member1.rb +62 -0
  23. data/_generated/models/batch_scrape_request_output_format.rb +9 -0
  24. data/_generated/models/batch_scrape_request_profile_member1.rb +62 -0
  25. data/_generated/models/cdp_frame_request.rb +103 -0
  26. data/_generated/models/cdp_url_request.rb +82 -0
  27. data/_generated/models/checkout_request.rb +124 -0
  28. data/_generated/models/contact_request.rb +143 -0
  29. data/_generated/models/cookie_dict.rb +739 -0
  30. data/_generated/models/cookie_dict_domain_member1.rb +62 -0
  31. data/_generated/models/cookie_dict_expires_member1.rb +62 -0
  32. data/_generated/models/cookie_dict_http_only_member1.rb +62 -0
  33. data/_generated/models/cookie_dict_path_member1.rb +62 -0
  34. data/_generated/models/cookie_dict_same_site_member1.rb +62 -0
  35. data/_generated/models/cookie_dict_secure_member1.rb +62 -0
  36. data/_generated/models/cookie_dict_url_member1.rb +62 -0
  37. data/_generated/models/cookies_delete_body.rb +355 -0
  38. data/_generated/models/cookies_delete_body_domain_member1.rb +62 -0
  39. data/_generated/models/cookies_delete_body_name_member1.rb +62 -0
  40. data/_generated/models/cookies_delete_body_path_member1.rb +62 -0
  41. data/_generated/models/cookies_set_body.rb +102 -0
  42. data/_generated/models/deep_crawl_body.rb +339 -0
  43. data/_generated/models/deep_crawl_body_webhook_url_member1.rb +62 -0
  44. data/_generated/models/dom_snapshot_body.rb +82 -0
  45. data/_generated/models/download_body.rb +102 -0
  46. data/_generated/models/error_code.rb +31 -0
  47. data/_generated/models/eval_body.rb +102 -0
  48. data/_generated/models/extend_body.rb +155 -0
  49. data/_generated/models/extend_body_ttl_seconds_member1.rb +62 -0
  50. data/_generated/models/extract_request.rb +631 -0
  51. data/_generated/models/extract_request_behavior_actions_member1.rb +62 -0
  52. data/_generated/models/extract_request_behavior_actions_member2.rb +62 -0
  53. data/_generated/models/extract_request_engine.rb +12 -0
  54. data/_generated/models/extract_request_identity_country_member1.rb +62 -0
  55. data/_generated/models/extract_request_language_member1.rb +62 -0
  56. data/_generated/models/extract_request_prompt_member1.rb +62 -0
  57. data/_generated/models/extract_request_url_member1.rb +62 -0
  58. data/_generated/models/extract_request_urls_member1.rb +62 -0
  59. data/_generated/models/filter_spec.rb +62 -0
  60. data/_generated/models/h_t_t_p_validation_error.rb +84 -0
  61. data/_generated/models/har_body.rb +82 -0
  62. data/_generated/models/identity_payload.rb +144 -0
  63. data/_generated/models/identity_request.rb +833 -0
  64. data/_generated/models/identity_request_claim_browser_member1.rb +62 -0
  65. data/_generated/models/identity_request_claim_os_member1.rb +62 -0
  66. data/_generated/models/identity_request_device_model_member1.rb +62 -0
  67. data/_generated/models/identity_request_locale_member1.rb +62 -0
  68. data/_generated/models/identity_request_proxy_member1.rb +62 -0
  69. data/_generated/models/identity_request_source_member1.rb +62 -0
  70. data/_generated/models/identity_request_timezone_member1.rb +62 -0
  71. data/_generated/models/identity_request_viewport_member1.rb +62 -0
  72. data/_generated/models/identity_response.rb +357 -0
  73. data/_generated/models/identity_response_expires_at_member1.rb +62 -0
  74. data/_generated/models/identity_response_identity_id_member1.rb +62 -0
  75. data/_generated/models/identity_response_workspace_id_member1.rb +62 -0
  76. data/_generated/models/map_body.rb +238 -0
  77. data/_generated/models/map_body_search_member1.rb +62 -0
  78. data/_generated/models/map_response.rb +195 -0
  79. data/_generated/models/map_response_truncated_to_server_max_member1.rb +62 -0
  80. data/_generated/models/me_response.rb +233 -0
  81. data/_generated/models/me_response_created_at_member1.rb +62 -0
  82. data/_generated/models/models.rb +145 -0
  83. data/_generated/models/policy_upsert_request.rb +417 -0
  84. data/_generated/models/policy_upsert_request_max_crawl_pages_member1.rb +62 -0
  85. data/_generated/models/policy_upsert_request_max_llm_calls_member1.rb +62 -0
  86. data/_generated/models/policy_upsert_request_max_llm_tokens_member1.rb +62 -0
  87. data/_generated/models/problem_details.rb +226 -0
  88. data/_generated/models/profile_create_request.rb +175 -0
  89. data/_generated/models/profile_create_request_storage_state_id_member1.rb +62 -0
  90. data/_generated/models/profile_update_request.rb +246 -0
  91. data/_generated/models/profile_update_request_name_member1.rb +62 -0
  92. data/_generated/models/profile_update_request_storage_state_id_member1.rb +62 -0
  93. data/_generated/models/registry_token_request.rb +82 -0
  94. data/_generated/models/retry_delivery_response.rb +145 -0
  95. data/_generated/models/rotate_secret_response.rb +145 -0
  96. data/_generated/models/schedule_create_request.rb +256 -0
  97. data/_generated/models/schedule_create_request_job_params.rb +64 -0
  98. data/_generated/models/schedule_create_request_notify_webhook_member1.rb +62 -0
  99. data/_generated/models/scorer_spec.rb +102 -0
  100. data/_generated/models/scrape_request.rb +1142 -0
  101. data/_generated/models/scrape_request_batch_identity_mode.rb +10 -0
  102. data/_generated/models/scrape_request_behavior_actions_member1.rb +62 -0
  103. data/_generated/models/scrape_request_behavior_actions_member2.rb +62 -0
  104. data/_generated/models/scrape_request_engine.rb +12 -0
  105. data/_generated/models/scrape_request_format.rb +11 -0
  106. data/_generated/models/scrape_request_identity_country_member1.rb +62 -0
  107. data/_generated/models/scrape_request_identity_member1.rb +62 -0
  108. data/_generated/models/scrape_request_language_member1.rb +62 -0
  109. data/_generated/models/scrape_request_profile_member1.rb +62 -0
  110. data/_generated/models/scrape_request_proxy_member1.rb +62 -0
  111. data/_generated/models/scrape_request_screenshot_selector_member1.rb +62 -0
  112. data/_generated/models/scrape_request_session_member1.rb +62 -0
  113. data/_generated/models/scrape_request_url_member1.rb +62 -0
  114. data/_generated/models/scrape_request_urls_member1.rb +62 -0
  115. data/_generated/models/scroll_body.rb +284 -0
  116. data/_generated/models/scroll_body_direction.rb +11 -0
  117. data/_generated/models/scroll_body_distance_px_member1.rb +62 -0
  118. data/_generated/models/scroll_body_selector_member1.rb +62 -0
  119. data/_generated/models/search_request.rb +469 -0
  120. data/_generated/models/search_request_country_member1.rb +62 -0
  121. data/_generated/models/search_request_engine_member1.rb +62 -0
  122. data/_generated/models/search_request_freshness_member1.rb +62 -0
  123. data/_generated/models/search_request_vertical_member1.rb +62 -0
  124. data/_generated/models/session_create_request.rb +174 -0
  125. data/_generated/models/session_create_request_engine.rb +10 -0
  126. data/_generated/models/session_create_request_profile_member1.rb +62 -0
  127. data/_generated/models/storage_state_attach_request.rb +58 -0
  128. data/_generated/models/storage_state_detach_request.rb +58 -0
  129. data/_generated/models/takeover_action_body.rb +84 -0
  130. data/_generated/models/team_response.rb +142 -0
  131. data/_generated/models/tenant_context.rb +183 -0
  132. data/_generated/models/upload_body.rb +162 -0
  133. data/_generated/models/validation_error.rb +142 -0
  134. data/_generated/models/validation_error_ctx.rb +62 -0
  135. data/_generated/models/viewport_exact.rb +104 -0
  136. data/_generated/models/wait_body.rb +194 -0
  137. data/_generated/models/wait_body_selector_member1.rb +62 -0
  138. data/_generated/models/webhook_create_request.rb +125 -0
  139. data/_generated/models/webhook_create_response.rb +104 -0
  140. data/_generated/models/webhook_delivery_list_response.rb +195 -0
  141. data/_generated/models/webhook_delivery_list_response_next_cursor_member1.rb +62 -0
  142. data/_generated/models/webhook_delivery_public.rb +680 -0
  143. data/_generated/models/webhook_delivery_public_delivered_at_member1.rb +62 -0
  144. data/_generated/models/webhook_delivery_public_error_class_member1.rb +62 -0
  145. data/_generated/models/webhook_delivery_public_replay_of_member1.rb +62 -0
  146. data/_generated/models/webhook_delivery_public_response_body_preview_member1.rb +62 -0
  147. data/_generated/models/webhook_delivery_public_response_status_member1.rb +62 -0
  148. data/_generated/models/webhook_list_response.rb +195 -0
  149. data/_generated/models/webhook_list_response_next_cursor_member1.rb +62 -0
  150. data/_generated/models/webhook_public.rb +316 -0
  151. data/_generated/models/webhook_public_secret_rotated_at_member1.rb +62 -0
  152. data/_generated/v1/audit/audit.rb +0 -0
  153. data/_generated/v1/audit/audit_request_builder.rb +31 -0
  154. data/_generated/v1/audit/events/events.rb +0 -0
  155. data/_generated/v1/audit/events/events_request_builder.rb +93 -0
  156. data/_generated/v1/billing/billing.rb +0 -0
  157. data/_generated/v1/billing/billing_request_builder.rb +49 -0
  158. data/_generated/v1/billing/checkout/checkout.rb +0 -0
  159. data/_generated/v1/billing/checkout/checkout_request_builder.rb +74 -0
  160. data/_generated/v1/billing/portal/portal.rb +0 -0
  161. data/_generated/v1/billing/portal/portal_request_builder.rb +68 -0
  162. data/_generated/v1/billing/subscription/subscription.rb +0 -0
  163. data/_generated/v1/billing/subscription/subscription_request_builder.rb +68 -0
  164. data/_generated/v1/billing/usage/usage.rb +0 -0
  165. data/_generated/v1/billing/usage/usage_request_builder.rb +90 -0
  166. data/_generated/v1/binary/binary.rb +0 -0
  167. data/_generated/v1/binary/binary_request_builder.rb +31 -0
  168. data/_generated/v1/binary/download/download.rb +0 -0
  169. data/_generated/v1/binary/download/download_request_builder.rb +77 -0
  170. data/_generated/v1/budgets/budgets.rb +0 -0
  171. data/_generated/v1/budgets/budgets_request_builder.rb +37 -0
  172. data/_generated/v1/budgets/policy/item/item/item.rb +0 -0
  173. data/_generated/v1/budgets/policy/item/item/with_scope_item_request_builder.rb +74 -0
  174. data/_generated/v1/budgets/policy/item/item.rb +0 -0
  175. data/_generated/v1/budgets/policy/item/with_scope_type_item_request_builder.rb +43 -0
  176. data/_generated/v1/budgets/policy/policy.rb +0 -0
  177. data/_generated/v1/budgets/policy/policy_request_builder.rb +125 -0
  178. data/_generated/v1/budgets/usage/usage.rb +0 -0
  179. data/_generated/v1/budgets/usage/usage_request_builder.rb +77 -0
  180. data/_generated/v1/cdp/cdp.rb +0 -0
  181. data/_generated/v1/cdp/cdp_request_builder.rb +37 -0
  182. data/_generated/v1/cdp/frame/frame.rb +0 -0
  183. data/_generated/v1/cdp/frame/frame_request_builder.rb +74 -0
  184. data/_generated/v1/cdp/url/url.rb +0 -0
  185. data/_generated/v1/cdp/url/url_request_builder.rb +74 -0
  186. data/_generated/v1/contact/contact.rb +1 -0
  187. data/_generated/v1/contact/contact_post_response.rb +65 -0
  188. data/_generated/v1/contact/contact_request_builder.rb +71 -0
  189. data/_generated/v1/crawl/crawl.rb +0 -0
  190. data/_generated/v1/crawl/crawl_request_builder.rb +31 -0
  191. data/_generated/v1/crawl/deep/deep.rb +0 -0
  192. data/_generated/v1/crawl/deep/deep_request_builder.rb +86 -0
  193. data/_generated/v1/crawl/deep/item/item.rb +0 -0
  194. data/_generated/v1/crawl/deep/item/with_run_item_request_builder.rb +71 -0
  195. data/_generated/v1/crawl_runs/crawl_runs.rb +1 -0
  196. data/_generated/v1/crawl_runs/crawl_runs_post_request_body.rb +65 -0
  197. data/_generated/v1/crawl_runs/crawl_runs_request_builder.rb +133 -0
  198. data/_generated/v1/crawl_runs/item/cancel/cancel.rb +0 -0
  199. data/_generated/v1/crawl_runs/item/cancel/cancel_request_builder.rb +71 -0
  200. data/_generated/v1/crawl_runs/item/item.rb +0 -0
  201. data/_generated/v1/crawl_runs/item/resume/resume.rb +0 -0
  202. data/_generated/v1/crawl_runs/item/resume/resume_request_builder.rb +71 -0
  203. data/_generated/v1/crawl_runs/item/with_run_item_request_builder.rb +88 -0
  204. data/_generated/v1/datasets/datasets.rb +1 -0
  205. data/_generated/v1/datasets/datasets_post_request_body.rb +65 -0
  206. data/_generated/v1/datasets/datasets_request_builder.rb +125 -0
  207. data/_generated/v1/datasets/item/item.rb +0 -0
  208. data/_generated/v1/datasets/item/rows/append/append.rb +1 -0
  209. data/_generated/v1/datasets/item/rows/append/append_post_request_body.rb +74 -0
  210. data/_generated/v1/datasets/item/rows/append/append_request_builder.rb +79 -0
  211. data/_generated/v1/datasets/item/rows/rows.rb +0 -0
  212. data/_generated/v1/datasets/item/rows/rows_request_builder.rb +86 -0
  213. data/_generated/v1/datasets/item/with_name_item_request_builder.rb +104 -0
  214. data/_generated/v1/discovery/discovery.rb +0 -0
  215. data/_generated/v1/discovery/discovery_request_builder.rb +65 -0
  216. data/_generated/v1/engines/engines.rb +0 -0
  217. data/_generated/v1/engines/engines_request_builder.rb +31 -0
  218. data/_generated/v1/engines/manifest/manifest.rb +0 -0
  219. data/_generated/v1/engines/manifest/manifest_request_builder.rb +84 -0
  220. data/_generated/v1/extract/extract.rb +1 -0
  221. data/_generated/v1/extract/extract_post_response.rb +65 -0
  222. data/_generated/v1/extract/extract_request_builder.rb +79 -0
  223. data/_generated/v1/identity/identity.rb +0 -0
  224. data/_generated/v1/identity/identity_request_builder.rb +72 -0
  225. data/_generated/v1/kv/item/item.rb +1 -0
  226. data/_generated/v1/kv/item/with_key_item_request_builder.rb +133 -0
  227. data/_generated/v1/kv/item/with_key_put_request_body.rb +68 -0
  228. data/_generated/v1/kv/kv.rb +0 -0
  229. data/_generated/v1/kv/kv_request_builder.rb +37 -0
  230. data/_generated/v1/map/map.rb +0 -0
  231. data/_generated/v1/map/map_request_builder.rb +72 -0
  232. data/_generated/v1/me/export/export.rb +1 -0
  233. data/_generated/v1/me/export/export_get_response.rb +68 -0
  234. data/_generated/v1/me/export/export_request_builder.rb +68 -0
  235. data/_generated/v1/me/me.rb +0 -0
  236. data/_generated/v1/me/me_request_builder.rb +78 -0
  237. data/_generated/v1/me/usage/get_period_query_parameter_type.rb +13 -0
  238. data/_generated/v1/me/usage/usage.rb +2 -0
  239. data/_generated/v1/me/usage/usage_get_response.rb +68 -0
  240. data/_generated/v1/me/usage/usage_request_builder.rb +76 -0
  241. data/_generated/v1/page/cookies/cookies.rb +0 -0
  242. data/_generated/v1/page/cookies/cookies_request_builder.rb +150 -0
  243. data/_generated/v1/page/dom_snapshot/dom_snapshot.rb +0 -0
  244. data/_generated/v1/page/dom_snapshot/dom_snapshot_request_builder.rb +74 -0
  245. data/_generated/v1/page/download/download.rb +0 -0
  246. data/_generated/v1/page/download/download_request_builder.rb +74 -0
  247. data/_generated/v1/page/eval/eval.rb +0 -0
  248. data/_generated/v1/page/eval/eval_request_builder.rb +74 -0
  249. data/_generated/v1/page/har/har.rb +0 -0
  250. data/_generated/v1/page/har/har_request_builder.rb +74 -0
  251. data/_generated/v1/page/page.rb +0 -0
  252. data/_generated/v1/page/page_request_builder.rb +73 -0
  253. data/_generated/v1/page/scroll/scroll.rb +0 -0
  254. data/_generated/v1/page/scroll/scroll_request_builder.rb +74 -0
  255. data/_generated/v1/page/upload/upload.rb +0 -0
  256. data/_generated/v1/page/upload/upload_request_builder.rb +74 -0
  257. data/_generated/v1/page/wait/wait.rb +0 -0
  258. data/_generated/v1/page/wait/wait_request_builder.rb +74 -0
  259. data/_generated/v1/pricing/pricing.rb +0 -0
  260. data/_generated/v1/pricing/pricing_request_builder.rb +31 -0
  261. data/_generated/v1/pricing/public/public.rb +0 -0
  262. data/_generated/v1/pricing/public/public_request_builder.rb +65 -0
  263. data/_generated/v1/profiles/item/item.rb +0 -0
  264. data/_generated/v1/profiles/item/with_name_item_request_builder.rb +134 -0
  265. data/_generated/v1/profiles/profiles.rb +0 -0
  266. data/_generated/v1/profiles/profiles_request_builder.rb +126 -0
  267. data/_generated/v1/proxy_providers/proxy_providers.rb +0 -0
  268. data/_generated/v1/proxy_providers/proxy_providers_request_builder.rb +65 -0
  269. data/_generated/v1/queues/item/ack/ack.rb +1 -0
  270. data/_generated/v1/queues/item/ack/ack_post_request_body.rb +71 -0
  271. data/_generated/v1/queues/item/ack/ack_request_builder.rb +76 -0
  272. data/_generated/v1/queues/item/item.rb +0 -0
  273. data/_generated/v1/queues/item/pop/pop.rb +0 -0
  274. data/_generated/v1/queues/item/pop/pop_request_builder.rb +79 -0
  275. data/_generated/v1/queues/item/push/push.rb +1 -0
  276. data/_generated/v1/queues/item/push/push_post_request_body.rb +71 -0
  277. data/_generated/v1/queues/item/push/push_request_builder.rb +76 -0
  278. data/_generated/v1/queues/item/stats/stats.rb +0 -0
  279. data/_generated/v1/queues/item/stats/stats_request_builder.rb +71 -0
  280. data/_generated/v1/queues/item/with_name_item_request_builder.rb +52 -0
  281. data/_generated/v1/queues/queues.rb +0 -0
  282. data/_generated/v1/queues/queues_request_builder.rb +37 -0
  283. data/_generated/v1/recordings/item/item.rb +0 -0
  284. data/_generated/v1/recordings/item/recording_item_request_builder.rb +104 -0
  285. data/_generated/v1/recordings/item/visual/frames/frames.rb +0 -0
  286. data/_generated/v1/recordings/item/visual/frames/frames_request_builder.rb +93 -0
  287. data/_generated/v1/recordings/item/visual/start/start.rb +0 -0
  288. data/_generated/v1/recordings/item/visual/start/start_request_builder.rb +80 -0
  289. data/_generated/v1/recordings/item/visual/stop/stop.rb +0 -0
  290. data/_generated/v1/recordings/item/visual/stop/stop_request_builder.rb +80 -0
  291. data/_generated/v1/recordings/item/visual/visual.rb +0 -0
  292. data/_generated/v1/recordings/item/visual/visual_request_builder.rb +95 -0
  293. data/_generated/v1/recordings/recordings.rb +0 -0
  294. data/_generated/v1/recordings/recordings_request_builder.rb +90 -0
  295. data/_generated/v1/registry/registry.rb +0 -0
  296. data/_generated/v1/registry/registry_request_builder.rb +31 -0
  297. data/_generated/v1/registry/token/token.rb +0 -0
  298. data/_generated/v1/registry/token/token_request_builder.rb +74 -0
  299. data/_generated/v1/schedules/item/item.rb +0 -0
  300. data/_generated/v1/schedules/item/runs/runs.rb +0 -0
  301. data/_generated/v1/schedules/item/runs/runs_request_builder.rb +71 -0
  302. data/_generated/v1/schedules/item/with_schedule_item_request_builder.rb +104 -0
  303. data/_generated/v1/schedules/schedules.rb +0 -0
  304. data/_generated/v1/schedules/schedules_request_builder.rb +113 -0
  305. data/_generated/v1/scrape/batch/batch.rb +0 -0
  306. data/_generated/v1/scrape/batch/batch_request_builder.rb +82 -0
  307. data/_generated/v1/scrape/scrape.rb +0 -0
  308. data/_generated/v1/scrape/scrape_request_builder.rb +86 -0
  309. data/_generated/v1/screenshot_blobs/item/item.rb +0 -0
  310. data/_generated/v1/screenshot_blobs/item/with_ref_item_request_builder.rb +68 -0
  311. data/_generated/v1/screenshot_blobs/screenshot_blobs.rb +0 -0
  312. data/_generated/v1/screenshot_blobs/screenshot_blobs_request_builder.rb +37 -0
  313. data/_generated/v1/search/search.rb +0 -0
  314. data/_generated/v1/search/search_request_builder.rb +79 -0
  315. data/_generated/v1/sessions/create/create.rb +0 -0
  316. data/_generated/v1/sessions/create/create_request_builder.rb +74 -0
  317. data/_generated/v1/sessions/item/budget_stream/budget_stream.rb +0 -0
  318. data/_generated/v1/sessions/item/budget_stream/budget_stream_request_builder.rb +71 -0
  319. data/_generated/v1/sessions/item/extend/extend.rb +0 -0
  320. data/_generated/v1/sessions/item/extend/extend_request_builder.rb +77 -0
  321. data/_generated/v1/sessions/item/item.rb +0 -0
  322. data/_generated/v1/sessions/item/pin/pin.rb +0 -0
  323. data/_generated/v1/sessions/item/pin/pin_request_builder.rb +71 -0
  324. data/_generated/v1/sessions/item/profile_item_request_builder.rb +76 -0
  325. data/_generated/v1/sessions/item/recording/recording.rb +0 -0
  326. data/_generated/v1/sessions/item/recording/recording_request_builder.rb +43 -0
  327. data/_generated/v1/sessions/item/recording/start/start.rb +0 -0
  328. data/_generated/v1/sessions/item/recording/start/start_request_builder.rb +74 -0
  329. data/_generated/v1/sessions/item/recording/stop/stop.rb +0 -0
  330. data/_generated/v1/sessions/item/recording/stop/stop_request_builder.rb +74 -0
  331. data/_generated/v1/sessions/item/release/release.rb +0 -0
  332. data/_generated/v1/sessions/item/release/release_request_builder.rb +71 -0
  333. data/_generated/v1/sessions/item/takeover/takeover.rb +0 -0
  334. data/_generated/v1/sessions/item/takeover/takeover_request_builder.rb +77 -0
  335. data/_generated/v1/sessions/item/takeover_release/takeover_release.rb +0 -0
  336. data/_generated/v1/sessions/item/takeover_release/takeover_release_request_builder.rb +71 -0
  337. data/_generated/v1/sessions/item/takeover_token/takeover_token.rb +0 -0
  338. data/_generated/v1/sessions/item/takeover_token/takeover_token_request_builder.rb +71 -0
  339. data/_generated/v1/sessions/sessions.rb +0 -0
  340. data/_generated/v1/sessions/sessions_request_builder.rb +96 -0
  341. data/_generated/v1/storage_states/detach/detach.rb +0 -0
  342. data/_generated/v1/storage_states/detach/detach_request_builder.rb +74 -0
  343. data/_generated/v1/storage_states/item/attach/attach.rb +0 -0
  344. data/_generated/v1/storage_states/item/attach/attach_request_builder.rb +77 -0
  345. data/_generated/v1/storage_states/item/item.rb +0 -0
  346. data/_generated/v1/storage_states/item/with_id_or_name_item_request_builder.rb +104 -0
  347. data/_generated/v1/storage_states/storage_states.rb +1 -0
  348. data/_generated/v1/storage_states/storage_states_post_request_body.rb +65 -0
  349. data/_generated/v1/storage_states/storage_states_request_builder.rb +127 -0
  350. data/_generated/v1/updates/updates.rb +0 -0
  351. data/_generated/v1/updates/updates_request_builder.rb +75 -0
  352. data/_generated/v1/v1.rb +0 -0
  353. data/_generated/v1/v1_request_builder.rb +208 -0
  354. data/_generated/v1/webhooks/item/deliveries/deliveries.rb +0 -0
  355. data/_generated/v1/webhooks/item/deliveries/deliveries_request_builder.rb +97 -0
  356. data/_generated/v1/webhooks/item/deliveries/item/item.rb +0 -0
  357. data/_generated/v1/webhooks/item/deliveries/item/retry_escaped/retry_escaped.rb +0 -0
  358. data/_generated/v1/webhooks/item/deliveries/item/retry_escaped/retry_request_builder.rb +78 -0
  359. data/_generated/v1/webhooks/item/deliveries/item/with_delivery_item_request_builder.rb +40 -0
  360. data/_generated/v1/webhooks/item/item.rb +0 -0
  361. data/_generated/v1/webhooks/item/rotate_secret/rotate_secret.rb +0 -0
  362. data/_generated/v1/webhooks/item/rotate_secret/rotate_secret_request_builder.rb +72 -0
  363. data/_generated/v1/webhooks/item/with_webhook_item_request_builder.rb +111 -0
  364. data/_generated/v1/webhooks/webhooks.rb +0 -0
  365. data/_generated/v1/webhooks/webhooks_request_builder.rb +128 -0
  366. data/lib/ghostcrawl/client.rb +761 -0
  367. data/lib/ghostcrawl/error_codes.rb +104 -0
  368. data/lib/ghostcrawl/errors.rb +137 -0
  369. data/lib/ghostcrawl/version.rb +5 -0
  370. data/lib/ghostcrawl.rb +17 -0
  371. metadata +460 -0
@@ -0,0 +1,761 @@
1
+ # frozen_string_literal: true
2
+
3
+ # GhostCrawl Ruby SDK — idiomatic facade over the Kiota-generated core.
4
+ #
5
+ # Architecture:
6
+ #
7
+ # _generated/ Kiota core — spec-faithful 98-op request-builder (models, transport, auth)
8
+ # This FACADE — thin idiomatic layer delegating to the generated builders
9
+ #
10
+ # All HTTP transport, URL routing, serialization, and auth come from the generated core.
11
+ # The facade maps idiomatic calls (client.scrape) to generated builders via
12
+ # BaseBearerTokenAuthenticationProvider + FaradayRequestAdapter.
13
+ #
14
+ # Usage:
15
+ # require "ghostcrawl"
16
+ # client = Ghostcrawl::Client.new(token: "gck_live_YOUR_KEY")
17
+ # result = client.scrape(url: "https://example.com")
18
+
19
+ require "microsoft_kiota_abstractions"
20
+ require "microsoft_kiota_faraday"
21
+ require "microsoft_kiota_serialization_json"
22
+ require_relative "../../_generated/ghostcrawl_client"
23
+ require_relative "../../_generated/models/scrape_request"
24
+ require_relative "../../_generated/models/search_request"
25
+ require_relative "../../_generated/models/extract_request"
26
+ require_relative "../../_generated/models/deep_crawl_body"
27
+ require_relative "../../_generated/models/map_body"
28
+ require_relative "../../_generated/models/session_create_request"
29
+ require_relative "../../_generated/models/extend_body"
30
+ require_relative "../../_generated/models/profile_create_request"
31
+ require_relative "../../_generated/models/profile_update_request"
32
+ require_relative "../../_generated/models/webhook_create_request"
33
+ require_relative "../../_generated/models/schedule_create_request"
34
+ require_relative "errors"
35
+
36
+ module Ghostcrawl
37
+ # ---------------------------------------------------------------------------
38
+ # Fix: microsoft_kiota_serialization_json 0.9.2 has a bug where
39
+ # write_object_value(nil, body) creates a temp writer, serializes body into
40
+ # temp, then RETURNS temp without merging temp's content into self.@writer.
41
+ # set_content_from_parsable calls write_object_value(nil, body) and discards
42
+ # the return value, so @content = {}.to_json = "{}".
43
+ #
44
+ # Fix: monkey-patch write_object_value so that when key is nil, it serializes
45
+ # body into self directly (not a temp writer) — same as what the non-nil branch
46
+ # does, but without a key.
47
+ # ---------------------------------------------------------------------------
48
+
49
+ # @api private
50
+ module KiotaWriterFix
51
+ def write_object_value(key, value)
52
+ return unless value
53
+ if key.nil?
54
+ # Fix: serialize into self, not a temp. Merges all fields into @writer.
55
+ value.serialize(self)
56
+ else
57
+ super
58
+ end
59
+ end
60
+ end
61
+
62
+ # Patch the writer class once, idempotently
63
+ MicrosoftKiotaSerializationJson::JsonSerializationWriter.prepend(KiotaWriterFix) \
64
+ unless MicrosoftKiotaSerializationJson::JsonSerializationWriter.ancestors.include?(KiotaWriterFix)
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # Fix 2: Some generated request builders pass a Module (e.g. `Binary`) as
68
+ # the factory to `send_async`. The parse node's `get_object_value(factory)`
69
+ # calls `factory.call(self)`, which fails for a Module.
70
+ #
71
+ # Fix: when factory doesn't respond to `call`, fall back to parsing the
72
+ # response body as JSON and returning the raw hash. This gives us plain
73
+ # Ruby hashes (which ResponseHelper.to_hash handles correctly) without
74
+ # requiring generated response models.
75
+ # ---------------------------------------------------------------------------
76
+
77
+ # @api private
78
+ module KiotaParseNodeFix
79
+ def get_object_value(factory)
80
+ return super if factory.respond_to?(:call)
81
+ # Fallback: return the raw parsed node for non-callable factories (e.g. Binary).
82
+ # @current_node is already a Ruby Hash/Array/primitive parsed from JSON.
83
+ @current_node
84
+ rescue StandardError
85
+ super
86
+ end
87
+ end
88
+
89
+ MicrosoftKiotaSerializationJson::JsonParseNode.prepend(KiotaParseNodeFix) \
90
+ unless MicrosoftKiotaSerializationJson::JsonParseNode.ancestors.include?(KiotaParseNodeFix)
91
+
92
+ # ---------------------------------------------------------------------------
93
+ # AdditionalDataBody — a minimal Parsable that wraps an arbitrary Hash.
94
+ #
95
+ # Used for all POST bodies. The generated models set typed defaults in their
96
+ # constructors that would be serialized as null/empty-enum values and cause
97
+ # 422 validation errors. AdditionalDataBody only serializes the fields we
98
+ # explicitly pass, producing clean JSON.
99
+ #
100
+ # Depends on KiotaWriterFix above to correctly handle the nil-key
101
+ # write_object_value(nil, body) call from set_content_from_parsable.
102
+ # ---------------------------------------------------------------------------
103
+
104
+ # @api private
105
+ class AdditionalDataBody
106
+ include MicrosoftKiotaAbstractions::Parsable
107
+
108
+ def initialize(data = {})
109
+ @data = data.transform_keys(&:to_s)
110
+ end
111
+
112
+ def get_field_deserializers
113
+ {}
114
+ end
115
+
116
+ def serialize(writer)
117
+ writer.write_additional_data(@data)
118
+ end
119
+
120
+ def additional_data
121
+ @data
122
+ end
123
+
124
+ def additional_data=(hash)
125
+ @data = hash.transform_keys(&:to_s)
126
+ end
127
+
128
+ def self.create_from_discriminator_value(_parse_node)
129
+ AdditionalDataBody.new
130
+ end
131
+ end
132
+ end
133
+
134
+ module Ghostcrawl
135
+ DEFAULT_BASE_URL = "https://api.ghostcrawl.io"
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # Static bearer token provider — implements AccessTokenProvider
139
+ # ---------------------------------------------------------------------------
140
+
141
+ # @api private
142
+ class StaticTokenProvider
143
+ include MicrosoftKiotaAbstractions::AccessTokenProvider
144
+
145
+ def initialize(token)
146
+ @token = token
147
+ end
148
+
149
+ # The Kiota Faraday adapter calls .resume on the return value of this method,
150
+ # expecting a Fiber. Wrap the token in a Fiber to satisfy the contract.
151
+ def get_authorization_token(_uri, _additional_authentication_context = nil)
152
+ token = @token
153
+ Fiber.new { token }
154
+ end
155
+
156
+ def get_allowed_hosts_validator
157
+ MicrosoftKiotaAbstractions::AllowedHostsValidator.new([])
158
+ end
159
+ end
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # Response helpers — convert Kiota Parsable/Hash responses to plain Hash
163
+ # ---------------------------------------------------------------------------
164
+
165
+ # @api private
166
+ module ResponseHelper
167
+ # Converts any Kiota response value to a plain Hash or Array.
168
+ # The Kiota Faraday adapter returns Fibers for async responses; call .resume
169
+ # to execute the request synchronously and get the actual response value.
170
+ def self.to_hash(value)
171
+ # Resolve Fibers (the Kiota Faraday adapter returns a Fiber for every
172
+ # request; .resume is what actually executes the HTTP call). Any non-2xx
173
+ # surfaces here as a raw transport exception — translate it into a typed
174
+ # Ghostcrawl error so the documented rescue contract works.
175
+ if value.is_a?(Fiber)
176
+ begin
177
+ value = value.resume
178
+ rescue Ghostcrawl::GhostcrawlError
179
+ raise
180
+ rescue StandardError => e
181
+ Ghostcrawl.raise_translated(e)
182
+ end
183
+ end
184
+
185
+ case value
186
+ when Hash
187
+ value.transform_values { |v| to_hash(v) }
188
+ when Array
189
+ value.map { |v| to_hash(v) }
190
+ when NilClass
191
+ {}
192
+ else
193
+ # Typed Parsable: try serializing to JSON then parsing back
194
+ if value.respond_to?(:additional_data) && value.additional_data
195
+ value.additional_data.transform_values { |v| to_hash(v) }
196
+ elsif value.respond_to?(:to_h)
197
+ value.to_h.transform_values { |v| to_hash(v) }
198
+ else
199
+ value
200
+ end
201
+ end
202
+ end
203
+
204
+ # Inspects a decoded HTTP-200 response hash for a RESULT-channel failure (the
205
+ # target page could not be scraped) and raises {Ghostcrawl::ScrapeError} when
206
+ # one is present. This is the reliable, highest-value error path: the body is
207
+ # always available here (unlike the dropped problem+json body on non-2xx).
208
+ #
209
+ # A failure is signalled by any of:
210
+ # * a +result_error+ Hash carrying a +code+
211
+ # * +ok+ explicitly +false+ (always a failure, even with no code)
212
+ # * a top-level +code+ that is a known RESULT-channel code
213
+ #
214
+ # A genuinely OK hash (ok: true, no +ok+ key, or no error code) is returned
215
+ # untouched and never raises.
216
+ #
217
+ # @param hash [Hash] the decoded response
218
+ # @return [Hash] the same hash, when it is not a failure
219
+ # @api private
220
+ def self.raise_on_result_error!(hash)
221
+ return hash unless hash.is_a?(Hash)
222
+
223
+ # Descend into a `results` envelope (scrape/extract wrap per-URL results) —
224
+ # the target failure lives on the INNER result, not the envelope top level.
225
+ inner = hash["results"]
226
+ if inner.is_a?(Array)
227
+ inner.each { |item| raise_on_result_error!(item) }
228
+ return hash
229
+ end
230
+
231
+ result_error = hash["result_error"]
232
+ result_error = nil unless result_error.is_a?(Hash)
233
+ top_code = hash["code"]
234
+ ok_false = hash["ok"] == false
235
+
236
+ # Pull the code: result_error wins, then a top-level RESULT-channel code.
237
+ code = nil
238
+ code = result_error["code"] if result_error
239
+ code ||= top_code if Ghostcrawl::ErrorCodes.result_channel?(top_code)
240
+
241
+ # The flat markdown-build envelope reports a target failure ONLY via
242
+ # status="failed" (no ok/result_error) — don't count it as a success.
243
+ status_failed = hash["status"] == "failed"
244
+ code ||= top_code if status_failed && top_code.is_a?(String)
245
+
246
+ # Only raise when there is a concrete result-channel failure signal.
247
+ return hash unless code || ok_false || status_failed
248
+
249
+ # ok: false with no usable code -> treat as empty/unusable content.
250
+ code ||= Ghostcrawl::ErrorCodes::EMPTY_CONTENT
251
+
252
+ retryable =
253
+ if result_error && result_error.key?("retryable")
254
+ result_error["retryable"]
255
+ else
256
+ Ghostcrawl::ErrorCodes::RETRYABLE.fetch(code, false)
257
+ end
258
+
259
+ target_status = nil
260
+ target_status = result_error["target_status"] if result_error
261
+ target_status ||= hash["target_status"] # flat markdown-envelope path
262
+ reason = result_error && result_error["reason"]
263
+
264
+ msg = "scrape failed (#{code})"
265
+ msg += ": #{reason}" if reason && !reason.to_s.empty?
266
+ msg += " (target HTTP #{target_status})" if target_status
267
+
268
+ raise Ghostcrawl::ScrapeError.new(
269
+ msg,
270
+ status_code: 200,
271
+ body: nil,
272
+ code: code,
273
+ retryable: retryable,
274
+ request_id: hash["request_id"],
275
+ target_status: target_status
276
+ )
277
+ end
278
+ end
279
+
280
+ # ---------------------------------------------------------------------------
281
+ # Sub-clients — each delegates to the generated v1 request builders
282
+ # ---------------------------------------------------------------------------
283
+
284
+ # Manage crawl runs — /v1/crawl-runs.
285
+ class CrawlRunsClient
286
+ def initialize(v1)
287
+ @v1 = v1
288
+ end
289
+
290
+ # Start a new crawl run from a seed URL.
291
+ # Delegates to POST /v1/crawl-runs via the generated CrawlRunsRequestBuilder.
292
+ # The endpoint is a tagged union: a start request requires +action: "start"+
293
+ # and a +seed_urls+ array (not a bare +url+).
294
+ # @param url [String] seed URL
295
+ # @param max_depth [Integer] maximum crawl depth (default 2)
296
+ # @param max_pages [Integer] maximum pages (default 100)
297
+ # @param raise_on_result_error [Boolean] raise {Ghostcrawl::ScrapeError} on a
298
+ # target-side (HTTP-200) failure instead of returning the raw hash (default true)
299
+ # @return [Hash] crawl run record with +run_id+ and +status+
300
+ def start(url:, max_depth: 2, max_pages: 100, raise_on_result_error: true, **opts)
301
+ body = AdditionalDataBody.new({ "action" => "start", "seed_urls" => [url],
302
+ "max_depth" => max_depth, "max_pages" => max_pages }
303
+ .merge(opts.transform_keys(&:to_s)))
304
+ hash = ResponseHelper.to_hash(@v1.crawl_runs.post(body))
305
+ raise_on_result_error ? ResponseHelper.raise_on_result_error!(hash) : hash
306
+ end
307
+
308
+ # List crawl runs.
309
+ # Delegates to GET /v1/crawl-runs via the generated CrawlRunsRequestBuilder.
310
+ def list
311
+ ResponseHelper.to_hash(@v1.crawl_runs.get)
312
+ end
313
+
314
+ # Get a single crawl run by ID.
315
+ # Delegates to GET /v1/crawl-runs/{run_id} via the generated builder.
316
+ def get(run_id)
317
+ ResponseHelper.to_hash(@v1.crawl_runs.by_run_id(run_id).get)
318
+ end
319
+
320
+ # Cancel a running crawl run.
321
+ # Delegates to POST /v1/crawl-runs/{run_id}/cancel via the generated builder.
322
+ def cancel(run_id)
323
+ ResponseHelper.to_hash(@v1.crawl_runs.by_run_id(run_id).cancel.post)
324
+ end
325
+ end
326
+
327
+ # Manage browser sessions — /v1/sessions.
328
+ class SessionsClient
329
+ def initialize(v1)
330
+ @v1 = v1
331
+ end
332
+
333
+ # List all active sessions.
334
+ # Delegates to GET /v1/sessions via the generated SessionsRequestBuilder.
335
+ def list
336
+ ResponseHelper.to_hash(@v1.sessions.get)
337
+ end
338
+
339
+ # Create a new browser session.
340
+ # Delegates to POST /v1/sessions/create via the generated builder.
341
+ # @param profile_name [String] identity profile to use
342
+ def create(profile_name:, **opts)
343
+ data = { "profile" => profile_name }.merge(opts.transform_keys(&:to_s))
344
+ ResponseHelper.to_hash(@v1.sessions.create.post(AdditionalDataBody.new(data)))
345
+ end
346
+
347
+ # Extend a session's TTL.
348
+ # Delegates to POST /v1/sessions/{id}/extend via the generated builder.
349
+ def extend(session_id, duration_seconds: 300)
350
+ ResponseHelper.to_hash(@v1.sessions.by_profile__id(session_id).extend.post(
351
+ AdditionalDataBody.new({ "ttl_seconds" => duration_seconds })
352
+ ))
353
+ end
354
+
355
+ # Release a session back to the pool.
356
+ # Delegates to POST /v1/sessions/{id}/release via the generated builder.
357
+ def release(session_id)
358
+ ResponseHelper.to_hash(@v1.sessions.by_profile__id(session_id).release.post)
359
+ end
360
+ end
361
+
362
+ # Manage identity profiles — /v1/profiles.
363
+ class ProfilesClient
364
+ def initialize(v1)
365
+ @v1 = v1
366
+ end
367
+
368
+ # List all profiles.
369
+ # Delegates to GET /v1/profiles via the generated ProfilesRequestBuilder.
370
+ def list
371
+ ResponseHelper.to_hash(@v1.profiles.get)
372
+ end
373
+
374
+ # Get a profile by name.
375
+ # Delegates to GET /v1/profiles/{name} via the generated builder.
376
+ def get(name)
377
+ ResponseHelper.to_hash(@v1.profiles.by_name(name).get)
378
+ end
379
+
380
+ # Create a new profile.
381
+ # Delegates to POST /v1/profiles via the generated ProfilesRequestBuilder.
382
+ def create(name:, **config)
383
+ body = AdditionalDataBody.new({ "name" => name }.merge(config.transform_keys(&:to_s)))
384
+ ResponseHelper.to_hash(@v1.profiles.post(body))
385
+ end
386
+
387
+ # Update a profile.
388
+ # Delegates to PUT /v1/profiles/{name} via the generated builder.
389
+ def update(name, **config)
390
+ ResponseHelper.to_hash(@v1.profiles.by_name(name).put(AdditionalDataBody.new(config.transform_keys(&:to_s))))
391
+ end
392
+
393
+ # Delete a profile.
394
+ # Delegates to DELETE /v1/profiles/{name} via the generated builder.
395
+ def delete(name)
396
+ ResponseHelper.to_hash(@v1.profiles.by_name(name).delete)
397
+ end
398
+ end
399
+
400
+ # Manage webhooks — /v1/webhooks.
401
+ class WebhooksClient
402
+ def initialize(v1)
403
+ @v1 = v1
404
+ end
405
+
406
+ # List all webhooks.
407
+ # Delegates to GET /v1/webhooks via the generated WebhooksRequestBuilder.
408
+ def list
409
+ ResponseHelper.to_hash(@v1.webhooks.get)
410
+ end
411
+
412
+ # Get a webhook by ID.
413
+ # Delegates to GET /v1/webhooks/{id} via the generated builder.
414
+ def get(webhook_id)
415
+ ResponseHelper.to_hash(@v1.webhooks.by_webhook_id(webhook_id).get)
416
+ end
417
+
418
+ # Register a new webhook endpoint.
419
+ # Delegates to POST /v1/webhooks via the generated WebhooksRequestBuilder.
420
+ def create(url:, event_types: nil, events: nil, **opts)
421
+ data = { "url" => url }.merge(opts.transform_keys(&:to_s))
422
+ # API field is "event_types"; "events" kept as a back-compat alias.
423
+ et = event_types.nil? ? events : event_types
424
+ data["event_types"] = et unless et.nil?
425
+ ResponseHelper.to_hash(@v1.webhooks.post(AdditionalDataBody.new(data)))
426
+ end
427
+
428
+ # Delete a webhook.
429
+ # Delegates to DELETE /v1/webhooks/{id} via the generated builder.
430
+ def delete(webhook_id)
431
+ @v1.webhooks.by_webhook_id(webhook_id).delete
432
+ {}
433
+ end
434
+
435
+ # Rotate the signing secret for a webhook.
436
+ # Delegates to POST /v1/webhooks/{id}/rotate-secret via the generated builder.
437
+ def rotate_secret(webhook_id)
438
+ ResponseHelper.to_hash(@v1.webhooks.by_webhook_id(webhook_id).rotate_secret.post)
439
+ end
440
+ end
441
+
442
+ # Manage schedules — /v1/schedules.
443
+ class SchedulesClient
444
+ def initialize(v1)
445
+ @v1 = v1
446
+ end
447
+
448
+ # List all schedules.
449
+ # Delegates to GET /v1/schedules via the generated SchedulesRequestBuilder.
450
+ def list
451
+ ResponseHelper.to_hash(@v1.schedules.get)
452
+ end
453
+
454
+ # Get a schedule by ID.
455
+ # Delegates to GET /v1/schedules/{id} via the generated builder.
456
+ def get(schedule_id)
457
+ ResponseHelper.to_hash(@v1.schedules.by_schedule_id(schedule_id).get)
458
+ end
459
+
460
+ # Create a new schedule.
461
+ # Delegates to POST /v1/schedules via the generated SchedulesRequestBuilder.
462
+ def create(cron:, task:, **opts)
463
+ data = { "cron_expr" => cron, "task" => task }.merge(opts.transform_keys(&:to_s))
464
+ ResponseHelper.to_hash(@v1.schedules.post(AdditionalDataBody.new(data)))
465
+ end
466
+
467
+ # Delete a schedule.
468
+ # Delegates to DELETE /v1/schedules/{id} via the generated builder.
469
+ def delete(schedule_id)
470
+ @v1.schedules.by_schedule_id(schedule_id).delete
471
+ {}
472
+ end
473
+ end
474
+
475
+ # Manage datasets — /v1/datasets.
476
+ class DatasetsClient
477
+ def initialize(v1)
478
+ @v1 = v1
479
+ end
480
+
481
+ # List all datasets.
482
+ # Delegates to GET /v1/datasets via the generated DatasetsRequestBuilder.
483
+ def list
484
+ ResponseHelper.to_hash(@v1.datasets.get)
485
+ end
486
+
487
+ # Get a dataset by name.
488
+ # Delegates to GET /v1/datasets/{name} via the generated builder.
489
+ def get(name)
490
+ ResponseHelper.to_hash(@v1.datasets.by_name(name).get)
491
+ end
492
+
493
+ # Create a new dataset.
494
+ # Delegates to POST /v1/datasets via the generated DatasetsRequestBuilder.
495
+ def create(name:, **opts)
496
+ body = AdditionalDataBody.new({ "name" => name }.merge(opts.transform_keys(&:to_s)))
497
+ ResponseHelper.to_hash(@v1.datasets.post(body))
498
+ end
499
+
500
+ # Delete a dataset.
501
+ # Delegates to DELETE /v1/datasets/{name} via the generated builder.
502
+ def delete(name)
503
+ ResponseHelper.to_hash(@v1.datasets.by_name(name).delete)
504
+ end
505
+
506
+ # Get rows from a dataset.
507
+ # Delegates to GET /v1/datasets/{name}/rows via the generated builder.
508
+ def rows(name)
509
+ ResponseHelper.to_hash(@v1.datasets.by_name(name).rows.get)
510
+ end
511
+
512
+ # Append rows to a dataset.
513
+ # Delegates to POST /v1/datasets/{name}/rows/append via the generated builder.
514
+ def append(name, rows)
515
+ body = AdditionalDataBody.new({ "rows" => rows })
516
+ ResponseHelper.to_hash(@v1.datasets.by_name(name).rows.append.post(body))
517
+ end
518
+ end
519
+
520
+ # Manage session recordings — /v1/recordings.
521
+ class RecordingsClient
522
+ def initialize(v1)
523
+ @v1 = v1
524
+ end
525
+
526
+ # List all recordings.
527
+ # Delegates to GET /v1/recordings via the generated RecordingsRequestBuilder.
528
+ def list
529
+ ResponseHelper.to_hash(@v1.recordings.get)
530
+ end
531
+
532
+ # Get a recording by ID.
533
+ # Delegates to GET /v1/recordings/{id} via the generated builder.
534
+ def get(recording_id)
535
+ ResponseHelper.to_hash(@v1.recordings.by_recording__id(recording_id).get)
536
+ end
537
+
538
+ # Delete a recording.
539
+ # Delegates to DELETE /v1/recordings/{id} via the generated builder.
540
+ def delete(recording_id)
541
+ @v1.recordings.by_recording__id(recording_id).delete
542
+ {}
543
+ end
544
+ end
545
+
546
+ # Key-value store — /v1/kv.
547
+ class KVClient
548
+ def initialize(v1)
549
+ @v1 = v1
550
+ end
551
+
552
+ # Get a value by key.
553
+ # Delegates to GET /v1/kv/{key} via the generated KvRequestBuilder.
554
+ def get(key)
555
+ ResponseHelper.to_hash(@v1.kv.by_key(key).get)
556
+ end
557
+
558
+ # Set a key-value pair.
559
+ # Delegates to PUT /v1/kv/{key} via the generated builder.
560
+ def set(key, value)
561
+ body = AdditionalDataBody.new({ "value" => value })
562
+ ResponseHelper.to_hash(@v1.kv.by_key(key).put(body))
563
+ end
564
+
565
+ # Delete a key.
566
+ # Delegates to DELETE /v1/kv/{key} via the generated builder.
567
+ def delete(key)
568
+ ResponseHelper.to_hash(@v1.kv.by_key(key).delete)
569
+ end
570
+ end
571
+
572
+ # ---------------------------------------------------------------------------
573
+ # Main facade — Client
574
+ # ---------------------------------------------------------------------------
575
+
576
+ # GhostCrawl idiomatic API client.
577
+ #
578
+ # Delegates all HTTP transport, URL routing, serialization, and auth to the
579
+ # Kiota-generated canonical core (_generated/). This facade is the shipped API.
580
+ #
581
+ # @example
582
+ # require "ghostcrawl"
583
+ # client = Ghostcrawl::Client.new(token: "gck_live_YOUR_KEY")
584
+ # result = client.scrape(url: "https://example.com")
585
+ class Client
586
+ # Default read timeout (seconds). Browser-rendered scrapes/crawls are slow,
587
+ # and the underlying net/http default (60s) is too short for them.
588
+ DEFAULT_TIMEOUT = 300
589
+
590
+ # @param token [String, nil] API key. Falls back to +GHOSTCRAWL_API_KEY+ env var.
591
+ # @param base_url [String, nil] Override API base URL. Falls back to +GHOSTCRAWL_BASE_URL+ env var.
592
+ # @param timeout [Integer, nil] Per-request read timeout in seconds. Falls back
593
+ # to +GHOSTCRAWL_TIMEOUT+ env var, then {DEFAULT_TIMEOUT}.
594
+ def initialize(token: nil, base_url: nil, timeout: nil)
595
+ resolved_token = token || ENV.fetch("GHOSTCRAWL_API_KEY", nil)
596
+ if resolved_token.nil? || resolved_token.empty?
597
+ raise ArgumentError,
598
+ "token is required — pass token: or set GHOSTCRAWL_API_KEY. " \
599
+ "Get your key at https://ghostcrawl.io"
600
+ end
601
+
602
+ resolved_base = (base_url ||
603
+ ENV.fetch("GHOSTCRAWL_BASE_URL", nil) ||
604
+ DEFAULT_BASE_URL).gsub(%r{/+$}, "")
605
+
606
+ resolved_timeout = (timeout ||
607
+ ENV.fetch("GHOSTCRAWL_TIMEOUT", nil) ||
608
+ DEFAULT_TIMEOUT).to_i
609
+
610
+ # Build the Kiota core via BaseBearerTokenAuthenticationProvider + FaradayRequestAdapter.
611
+ # All HTTP, auth, serialization, and URL routing delegate to the generated core.
612
+ auth_provider = MicrosoftKiotaAbstractions::BaseBearerTokenAuthenticationProvider.new(
613
+ StaticTokenProvider.new(resolved_token)
614
+ )
615
+ adapter = MicrosoftKiotaFaraday::FaradayRequestAdapter.new(auth_provider)
616
+ adapter.set_base_url(resolved_base)
617
+
618
+ # The default Faraday connection sets no timeout, so it inherits net/http's
619
+ # 60s read timeout — too short for browser-rendered work. Raise it.
620
+ if adapter.client.respond_to?(:options) && resolved_timeout.positive?
621
+ adapter.client.options.timeout = resolved_timeout
622
+ adapter.client.options.open_timeout = 30
623
+ end
624
+
625
+ @core = Ghostcrawl::GhostcrawlClient.new(adapter)
626
+ @v1 = @core.v1
627
+ # Kept so #me can re-issue the GET /v1/me request with the raw-JSON
628
+ # (Binary) response factory instead of the typed MeResponse model.
629
+ @adapter = adapter
630
+
631
+ @crawl_runs = CrawlRunsClient.new(@v1)
632
+ @sessions = SessionsClient.new(@v1)
633
+ @profiles = ProfilesClient.new(@v1)
634
+ @webhooks = WebhooksClient.new(@v1)
635
+ @schedules = SchedulesClient.new(@v1)
636
+ @datasets = DatasetsClient.new(@v1)
637
+ @recordings = RecordingsClient.new(@v1)
638
+ @kv = KVClient.new(@v1)
639
+ end
640
+
641
+ # @return [CrawlRunsClient]
642
+ attr_reader :crawl_runs
643
+ # @return [SessionsClient]
644
+ attr_reader :sessions
645
+ # @return [ProfilesClient]
646
+ attr_reader :profiles
647
+ # @return [WebhooksClient]
648
+ attr_reader :webhooks
649
+ # @return [SchedulesClient]
650
+ attr_reader :schedules
651
+ # @return [DatasetsClient]
652
+ attr_reader :datasets
653
+ # @return [RecordingsClient]
654
+ attr_reader :recordings
655
+ # @return [KVClient]
656
+ attr_reader :kv
657
+
658
+ # ---------------------------------------------------------------------------
659
+ # Top-level facade methods — delegate to generated builders
660
+ # ---------------------------------------------------------------------------
661
+
662
+ # Scrape a single URL and return the rendered content.
663
+ # Delegates to POST /v1/scrape via the generated ScrapeRequestBuilder.
664
+ # @param url [String] target URL
665
+ # @param format [String] output format: "markdown" (default), "html", "text"
666
+ # @param engine [String] browser engine: "auto" (default), "chrome", "firefox", "webkit"
667
+ # @param javascript [Boolean] enable JavaScript rendering (default true)
668
+ # @param extract_schema [Hash, nil] JSON Schema for structured extraction
669
+ # @param raise_on_result_error [Boolean] raise {Ghostcrawl::ScrapeError} on a
670
+ # target-side (HTTP-200) failure instead of returning the raw hash (default true)
671
+ # @return [Hash] response with +markdown+, +status+, and other fields
672
+ def scrape(url:, format: "markdown", engine: "auto", javascript: true, extract_schema: nil,
673
+ raise_on_result_error: true, **opts)
674
+ # Use AdditionalDataBody to send only the fields we specify — the generated
675
+ # ScrapeRequest model would serialize typed defaults (nulls + empty enums) that
676
+ # cause 422 validation errors on the server.
677
+ data = { "url" => url, "format" => format, "engine" => engine,
678
+ "javascript_enabled" => javascript }.merge(opts.transform_keys(&:to_s))
679
+ data["extract_schema"] = extract_schema unless extract_schema.nil?
680
+ hash = ResponseHelper.to_hash(@v1.scrape.post(AdditionalDataBody.new(data)))
681
+ raise_on_result_error ? ResponseHelper.raise_on_result_error!(hash) : hash
682
+ end
683
+
684
+ # Search the web and return results.
685
+ # Delegates to POST /v1/search via the generated SearchRequestBuilder.
686
+ #
687
+ # /v1/search requires your own search-backend API key (BYO; GhostCrawl
688
+ # charges no markup). Pass it as +provider_key+ — it is sent as the
689
+ # +X-Provider-Authorization: Bearer <provider_key>+ header the backend
690
+ # requires. Without it the API replies 401 search_backend_key_missing.
691
+ # @param query [String] search query
692
+ # @param engine [String] search engine: "google" (default), "bing", "duckduckgo"
693
+ # @param limit [Integer] maximum results (default 10)
694
+ # @param provider_key [String, nil] BYO search-backend key (sent as X-Provider-Authorization)
695
+ # @return [Hash] response with +results+ list
696
+ def search(query:, engine: "google", limit: 10, provider_key: nil, **opts)
697
+ data = { "query" => query, "engine" => engine,
698
+ "limit" => limit }.merge(opts.transform_keys(&:to_s))
699
+ config = nil
700
+ unless provider_key.nil?
701
+ config = MicrosoftKiotaAbstractions::RequestConfiguration.new
702
+ headers = MicrosoftKiotaAbstractions::RequestHeaders.new
703
+ headers.add("X-Provider-Authorization", "Bearer #{provider_key}")
704
+ config.headers = headers
705
+ end
706
+ ResponseHelper.to_hash(@v1.search.post(AdditionalDataBody.new(data), config))
707
+ end
708
+
709
+ # Extract structured data from a URL using a JSON Schema.
710
+ # Delegates to POST /v1/extract via the generated ExtractRequestBuilder.
711
+ # @param url [String] target URL
712
+ # @param schema [Hash] JSON Schema describing the shape to extract
713
+ # @param raise_on_result_error [Boolean] raise {Ghostcrawl::ScrapeError} on a
714
+ # target-side (HTTP-200) failure instead of returning the raw hash (default true)
715
+ # @return [Hash] extracted data
716
+ def extract(url:, schema:, raise_on_result_error: true, **opts)
717
+ data = { "url" => url, "schema" => schema }.merge(opts.transform_keys(&:to_s))
718
+ hash = ResponseHelper.to_hash(@v1.extract.post(AdditionalDataBody.new(data)))
719
+ raise_on_result_error ? ResponseHelper.raise_on_result_error!(hash) : hash
720
+ end
721
+
722
+ # Start a deep crawl from a seed URL.
723
+ # Delegates to POST /v1/crawl/deep via the generated CrawlDeepRequestBuilder.
724
+ # @param url [String] seed URL
725
+ # @param max_depth [Integer] maximum crawl depth (default 2)
726
+ # @param max_pages [Integer] maximum pages (default 100)
727
+ # @param raise_on_result_error [Boolean] raise {Ghostcrawl::ScrapeError} on a
728
+ # target-side (HTTP-200) failure instead of returning the raw hash (default true)
729
+ # @return [Hash] crawl run record
730
+ def crawl(url:, max_depth: 2, max_pages: 100, raise_on_result_error: true, **opts)
731
+ data = { "seed_urls" => [url], "max_depth" => max_depth,
732
+ "max_urls" => max_pages }.merge(opts.transform_keys(&:to_s))
733
+ hash = ResponseHelper.to_hash(@v1.crawl.deep.post(AdditionalDataBody.new(data)))
734
+ raise_on_result_error ? ResponseHelper.raise_on_result_error!(hash) : hash
735
+ end
736
+
737
+ # Map all URLs reachable from a seed URL.
738
+ # Delegates to POST /v1/map via the generated MapRequestBuilder.
739
+ # @param url [String] seed URL
740
+ # @return [Hash] response with +urls+ list
741
+ def map(url:, **opts)
742
+ data = { "url" => url }.merge(opts.transform_keys(&:to_s))
743
+ ResponseHelper.to_hash(@v1.map.post(AdditionalDataBody.new(data)))
744
+ end
745
+
746
+ # Get the current account's profile.
747
+ # Delegates to GET /v1/me via the generated MeRequestBuilder.
748
+ #
749
+ # The generated +@v1.me.get+ deserializes into the typed +MeResponse+ model,
750
+ # whose +created_at+ composed-type member fails under the pinned Kiota JSON
751
+ # parser ("Error during deserialization"). To avoid that, we re-issue the
752
+ # SAME request (reusing the builder's URL/auth/header wiring via
753
+ # +to_get_request_information+) with the +Binary+ response factory — the
754
+ # exact raw-JSON path the +scrape+ builder already uses (see KiotaParseNodeFix).
755
+ # @return [Hash] account info with +user_id+, +email+, +created_at+, +primary_team+
756
+ def me
757
+ request_info = @v1.me.to_get_request_information(nil)
758
+ ResponseHelper.to_hash(@adapter.send_async(request_info, Ghostcrawl::V1::Binary, {}))
759
+ end
760
+ end
761
+ end