ghostcrawl 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +46 -0
- data/README.md +127 -0
- data/_generated/ghostcrawl.rb +0 -0
- data/_generated/ghostcrawl_client.rb +27 -0
- data/_generated/models/audit_event_out.rb +802 -0
- data/_generated/models/audit_event_out_actor_ip_member1.rb +62 -0
- data/_generated/models/audit_event_out_actor_token_id_member1.rb +62 -0
- data/_generated/models/audit_event_out_actor_user_agent_member1.rb +62 -0
- data/_generated/models/audit_event_out_actor_user_id_member1.rb +62 -0
- data/_generated/models/audit_event_out_metadata.rb +62 -0
- data/_generated/models/audit_event_out_org_id_member1.rb +62 -0
- data/_generated/models/audit_event_out_target_id_member1.rb +62 -0
- data/_generated/models/audit_event_out_target_kind_member1.rb +62 -0
- data/_generated/models/audit_events_response.rb +173 -0
- data/_generated/models/audit_events_response_next_cursor_member1.rb +62 -0
- data/_generated/models/batch_scrape_request.rb +623 -0
- data/_generated/models/batch_scrape_request_behavior_actions_member1.rb +62 -0
- data/_generated/models/batch_scrape_request_behavior_actions_member2.rb +62 -0
- data/_generated/models/batch_scrape_request_extraction_strategy_member1.rb +62 -0
- data/_generated/models/batch_scrape_request_identity_country_member1.rb +62 -0
- data/_generated/models/batch_scrape_request_language_member1.rb +62 -0
- data/_generated/models/batch_scrape_request_output_format.rb +9 -0
- data/_generated/models/batch_scrape_request_profile_member1.rb +62 -0
- data/_generated/models/cdp_frame_request.rb +103 -0
- data/_generated/models/cdp_url_request.rb +82 -0
- data/_generated/models/checkout_request.rb +124 -0
- data/_generated/models/contact_request.rb +143 -0
- data/_generated/models/cookie_dict.rb +739 -0
- data/_generated/models/cookie_dict_domain_member1.rb +62 -0
- data/_generated/models/cookie_dict_expires_member1.rb +62 -0
- data/_generated/models/cookie_dict_http_only_member1.rb +62 -0
- data/_generated/models/cookie_dict_path_member1.rb +62 -0
- data/_generated/models/cookie_dict_same_site_member1.rb +62 -0
- data/_generated/models/cookie_dict_secure_member1.rb +62 -0
- data/_generated/models/cookie_dict_url_member1.rb +62 -0
- data/_generated/models/cookies_delete_body.rb +355 -0
- data/_generated/models/cookies_delete_body_domain_member1.rb +62 -0
- data/_generated/models/cookies_delete_body_name_member1.rb +62 -0
- data/_generated/models/cookies_delete_body_path_member1.rb +62 -0
- data/_generated/models/cookies_set_body.rb +102 -0
- data/_generated/models/deep_crawl_body.rb +339 -0
- data/_generated/models/deep_crawl_body_webhook_url_member1.rb +62 -0
- data/_generated/models/dom_snapshot_body.rb +82 -0
- data/_generated/models/download_body.rb +102 -0
- data/_generated/models/error_code.rb +31 -0
- data/_generated/models/eval_body.rb +102 -0
- data/_generated/models/extend_body.rb +155 -0
- data/_generated/models/extend_body_ttl_seconds_member1.rb +62 -0
- data/_generated/models/extract_request.rb +631 -0
- data/_generated/models/extract_request_behavior_actions_member1.rb +62 -0
- data/_generated/models/extract_request_behavior_actions_member2.rb +62 -0
- data/_generated/models/extract_request_engine.rb +12 -0
- data/_generated/models/extract_request_identity_country_member1.rb +62 -0
- data/_generated/models/extract_request_language_member1.rb +62 -0
- data/_generated/models/extract_request_prompt_member1.rb +62 -0
- data/_generated/models/extract_request_url_member1.rb +62 -0
- data/_generated/models/extract_request_urls_member1.rb +62 -0
- data/_generated/models/filter_spec.rb +62 -0
- data/_generated/models/h_t_t_p_validation_error.rb +84 -0
- data/_generated/models/har_body.rb +82 -0
- data/_generated/models/identity_payload.rb +144 -0
- data/_generated/models/identity_request.rb +833 -0
- data/_generated/models/identity_request_claim_browser_member1.rb +62 -0
- data/_generated/models/identity_request_claim_os_member1.rb +62 -0
- data/_generated/models/identity_request_device_model_member1.rb +62 -0
- data/_generated/models/identity_request_locale_member1.rb +62 -0
- data/_generated/models/identity_request_proxy_member1.rb +62 -0
- data/_generated/models/identity_request_source_member1.rb +62 -0
- data/_generated/models/identity_request_timezone_member1.rb +62 -0
- data/_generated/models/identity_request_viewport_member1.rb +62 -0
- data/_generated/models/identity_response.rb +357 -0
- data/_generated/models/identity_response_expires_at_member1.rb +62 -0
- data/_generated/models/identity_response_identity_id_member1.rb +62 -0
- data/_generated/models/identity_response_workspace_id_member1.rb +62 -0
- data/_generated/models/map_body.rb +238 -0
- data/_generated/models/map_body_search_member1.rb +62 -0
- data/_generated/models/map_response.rb +195 -0
- data/_generated/models/map_response_truncated_to_server_max_member1.rb +62 -0
- data/_generated/models/me_response.rb +233 -0
- data/_generated/models/me_response_created_at_member1.rb +62 -0
- data/_generated/models/models.rb +145 -0
- data/_generated/models/policy_upsert_request.rb +417 -0
- data/_generated/models/policy_upsert_request_max_crawl_pages_member1.rb +62 -0
- data/_generated/models/policy_upsert_request_max_llm_calls_member1.rb +62 -0
- data/_generated/models/policy_upsert_request_max_llm_tokens_member1.rb +62 -0
- data/_generated/models/problem_details.rb +226 -0
- data/_generated/models/profile_create_request.rb +175 -0
- data/_generated/models/profile_create_request_storage_state_id_member1.rb +62 -0
- data/_generated/models/profile_update_request.rb +246 -0
- data/_generated/models/profile_update_request_name_member1.rb +62 -0
- data/_generated/models/profile_update_request_storage_state_id_member1.rb +62 -0
- data/_generated/models/registry_token_request.rb +82 -0
- data/_generated/models/retry_delivery_response.rb +145 -0
- data/_generated/models/rotate_secret_response.rb +145 -0
- data/_generated/models/schedule_create_request.rb +256 -0
- data/_generated/models/schedule_create_request_job_params.rb +64 -0
- data/_generated/models/schedule_create_request_notify_webhook_member1.rb +62 -0
- data/_generated/models/scorer_spec.rb +102 -0
- data/_generated/models/scrape_request.rb +1142 -0
- data/_generated/models/scrape_request_batch_identity_mode.rb +10 -0
- data/_generated/models/scrape_request_behavior_actions_member1.rb +62 -0
- data/_generated/models/scrape_request_behavior_actions_member2.rb +62 -0
- data/_generated/models/scrape_request_engine.rb +12 -0
- data/_generated/models/scrape_request_format.rb +11 -0
- data/_generated/models/scrape_request_identity_country_member1.rb +62 -0
- data/_generated/models/scrape_request_identity_member1.rb +62 -0
- data/_generated/models/scrape_request_language_member1.rb +62 -0
- data/_generated/models/scrape_request_profile_member1.rb +62 -0
- data/_generated/models/scrape_request_proxy_member1.rb +62 -0
- data/_generated/models/scrape_request_screenshot_selector_member1.rb +62 -0
- data/_generated/models/scrape_request_session_member1.rb +62 -0
- data/_generated/models/scrape_request_url_member1.rb +62 -0
- data/_generated/models/scrape_request_urls_member1.rb +62 -0
- data/_generated/models/scroll_body.rb +284 -0
- data/_generated/models/scroll_body_direction.rb +11 -0
- data/_generated/models/scroll_body_distance_px_member1.rb +62 -0
- data/_generated/models/scroll_body_selector_member1.rb +62 -0
- data/_generated/models/search_request.rb +469 -0
- data/_generated/models/search_request_country_member1.rb +62 -0
- data/_generated/models/search_request_engine_member1.rb +62 -0
- data/_generated/models/search_request_freshness_member1.rb +62 -0
- data/_generated/models/search_request_vertical_member1.rb +62 -0
- data/_generated/models/session_create_request.rb +174 -0
- data/_generated/models/session_create_request_engine.rb +10 -0
- data/_generated/models/session_create_request_profile_member1.rb +62 -0
- data/_generated/models/storage_state_attach_request.rb +58 -0
- data/_generated/models/storage_state_detach_request.rb +58 -0
- data/_generated/models/takeover_action_body.rb +84 -0
- data/_generated/models/team_response.rb +142 -0
- data/_generated/models/tenant_context.rb +183 -0
- data/_generated/models/upload_body.rb +162 -0
- data/_generated/models/validation_error.rb +142 -0
- data/_generated/models/validation_error_ctx.rb +62 -0
- data/_generated/models/viewport_exact.rb +104 -0
- data/_generated/models/wait_body.rb +194 -0
- data/_generated/models/wait_body_selector_member1.rb +62 -0
- data/_generated/models/webhook_create_request.rb +125 -0
- data/_generated/models/webhook_create_response.rb +104 -0
- data/_generated/models/webhook_delivery_list_response.rb +195 -0
- data/_generated/models/webhook_delivery_list_response_next_cursor_member1.rb +62 -0
- data/_generated/models/webhook_delivery_public.rb +680 -0
- data/_generated/models/webhook_delivery_public_delivered_at_member1.rb +62 -0
- data/_generated/models/webhook_delivery_public_error_class_member1.rb +62 -0
- data/_generated/models/webhook_delivery_public_replay_of_member1.rb +62 -0
- data/_generated/models/webhook_delivery_public_response_body_preview_member1.rb +62 -0
- data/_generated/models/webhook_delivery_public_response_status_member1.rb +62 -0
- data/_generated/models/webhook_list_response.rb +195 -0
- data/_generated/models/webhook_list_response_next_cursor_member1.rb +62 -0
- data/_generated/models/webhook_public.rb +316 -0
- data/_generated/models/webhook_public_secret_rotated_at_member1.rb +62 -0
- data/_generated/v1/audit/audit.rb +0 -0
- data/_generated/v1/audit/audit_request_builder.rb +31 -0
- data/_generated/v1/audit/events/events.rb +0 -0
- data/_generated/v1/audit/events/events_request_builder.rb +93 -0
- data/_generated/v1/billing/billing.rb +0 -0
- data/_generated/v1/billing/billing_request_builder.rb +49 -0
- data/_generated/v1/billing/checkout/checkout.rb +0 -0
- data/_generated/v1/billing/checkout/checkout_request_builder.rb +74 -0
- data/_generated/v1/billing/portal/portal.rb +0 -0
- data/_generated/v1/billing/portal/portal_request_builder.rb +68 -0
- data/_generated/v1/billing/subscription/subscription.rb +0 -0
- data/_generated/v1/billing/subscription/subscription_request_builder.rb +68 -0
- data/_generated/v1/billing/usage/usage.rb +0 -0
- data/_generated/v1/billing/usage/usage_request_builder.rb +90 -0
- data/_generated/v1/binary/binary.rb +0 -0
- data/_generated/v1/binary/binary_request_builder.rb +31 -0
- data/_generated/v1/binary/download/download.rb +0 -0
- data/_generated/v1/binary/download/download_request_builder.rb +77 -0
- data/_generated/v1/budgets/budgets.rb +0 -0
- data/_generated/v1/budgets/budgets_request_builder.rb +37 -0
- data/_generated/v1/budgets/policy/item/item/item.rb +0 -0
- data/_generated/v1/budgets/policy/item/item/with_scope_item_request_builder.rb +74 -0
- data/_generated/v1/budgets/policy/item/item.rb +0 -0
- data/_generated/v1/budgets/policy/item/with_scope_type_item_request_builder.rb +43 -0
- data/_generated/v1/budgets/policy/policy.rb +0 -0
- data/_generated/v1/budgets/policy/policy_request_builder.rb +125 -0
- data/_generated/v1/budgets/usage/usage.rb +0 -0
- data/_generated/v1/budgets/usage/usage_request_builder.rb +77 -0
- data/_generated/v1/cdp/cdp.rb +0 -0
- data/_generated/v1/cdp/cdp_request_builder.rb +37 -0
- data/_generated/v1/cdp/frame/frame.rb +0 -0
- data/_generated/v1/cdp/frame/frame_request_builder.rb +74 -0
- data/_generated/v1/cdp/url/url.rb +0 -0
- data/_generated/v1/cdp/url/url_request_builder.rb +74 -0
- data/_generated/v1/contact/contact.rb +1 -0
- data/_generated/v1/contact/contact_post_response.rb +65 -0
- data/_generated/v1/contact/contact_request_builder.rb +71 -0
- data/_generated/v1/crawl/crawl.rb +0 -0
- data/_generated/v1/crawl/crawl_request_builder.rb +31 -0
- data/_generated/v1/crawl/deep/deep.rb +0 -0
- data/_generated/v1/crawl/deep/deep_request_builder.rb +86 -0
- data/_generated/v1/crawl/deep/item/item.rb +0 -0
- data/_generated/v1/crawl/deep/item/with_run_item_request_builder.rb +71 -0
- data/_generated/v1/crawl_runs/crawl_runs.rb +1 -0
- data/_generated/v1/crawl_runs/crawl_runs_post_request_body.rb +65 -0
- data/_generated/v1/crawl_runs/crawl_runs_request_builder.rb +133 -0
- data/_generated/v1/crawl_runs/item/cancel/cancel.rb +0 -0
- data/_generated/v1/crawl_runs/item/cancel/cancel_request_builder.rb +71 -0
- data/_generated/v1/crawl_runs/item/item.rb +0 -0
- data/_generated/v1/crawl_runs/item/resume/resume.rb +0 -0
- data/_generated/v1/crawl_runs/item/resume/resume_request_builder.rb +71 -0
- data/_generated/v1/crawl_runs/item/with_run_item_request_builder.rb +88 -0
- data/_generated/v1/datasets/datasets.rb +1 -0
- data/_generated/v1/datasets/datasets_post_request_body.rb +65 -0
- data/_generated/v1/datasets/datasets_request_builder.rb +125 -0
- data/_generated/v1/datasets/item/item.rb +0 -0
- data/_generated/v1/datasets/item/rows/append/append.rb +1 -0
- data/_generated/v1/datasets/item/rows/append/append_post_request_body.rb +74 -0
- data/_generated/v1/datasets/item/rows/append/append_request_builder.rb +79 -0
- data/_generated/v1/datasets/item/rows/rows.rb +0 -0
- data/_generated/v1/datasets/item/rows/rows_request_builder.rb +86 -0
- data/_generated/v1/datasets/item/with_name_item_request_builder.rb +104 -0
- data/_generated/v1/discovery/discovery.rb +0 -0
- data/_generated/v1/discovery/discovery_request_builder.rb +65 -0
- data/_generated/v1/engines/engines.rb +0 -0
- data/_generated/v1/engines/engines_request_builder.rb +31 -0
- data/_generated/v1/engines/manifest/manifest.rb +0 -0
- data/_generated/v1/engines/manifest/manifest_request_builder.rb +84 -0
- data/_generated/v1/extract/extract.rb +1 -0
- data/_generated/v1/extract/extract_post_response.rb +65 -0
- data/_generated/v1/extract/extract_request_builder.rb +79 -0
- data/_generated/v1/identity/identity.rb +0 -0
- data/_generated/v1/identity/identity_request_builder.rb +72 -0
- data/_generated/v1/kv/item/item.rb +1 -0
- data/_generated/v1/kv/item/with_key_item_request_builder.rb +133 -0
- data/_generated/v1/kv/item/with_key_put_request_body.rb +68 -0
- data/_generated/v1/kv/kv.rb +0 -0
- data/_generated/v1/kv/kv_request_builder.rb +37 -0
- data/_generated/v1/map/map.rb +0 -0
- data/_generated/v1/map/map_request_builder.rb +72 -0
- data/_generated/v1/me/export/export.rb +1 -0
- data/_generated/v1/me/export/export_get_response.rb +68 -0
- data/_generated/v1/me/export/export_request_builder.rb +68 -0
- data/_generated/v1/me/me.rb +0 -0
- data/_generated/v1/me/me_request_builder.rb +78 -0
- data/_generated/v1/me/usage/get_period_query_parameter_type.rb +13 -0
- data/_generated/v1/me/usage/usage.rb +2 -0
- data/_generated/v1/me/usage/usage_get_response.rb +68 -0
- data/_generated/v1/me/usage/usage_request_builder.rb +76 -0
- data/_generated/v1/page/cookies/cookies.rb +0 -0
- data/_generated/v1/page/cookies/cookies_request_builder.rb +150 -0
- data/_generated/v1/page/dom_snapshot/dom_snapshot.rb +0 -0
- data/_generated/v1/page/dom_snapshot/dom_snapshot_request_builder.rb +74 -0
- data/_generated/v1/page/download/download.rb +0 -0
- data/_generated/v1/page/download/download_request_builder.rb +74 -0
- data/_generated/v1/page/eval/eval.rb +0 -0
- data/_generated/v1/page/eval/eval_request_builder.rb +74 -0
- data/_generated/v1/page/har/har.rb +0 -0
- data/_generated/v1/page/har/har_request_builder.rb +74 -0
- data/_generated/v1/page/page.rb +0 -0
- data/_generated/v1/page/page_request_builder.rb +73 -0
- data/_generated/v1/page/scroll/scroll.rb +0 -0
- data/_generated/v1/page/scroll/scroll_request_builder.rb +74 -0
- data/_generated/v1/page/upload/upload.rb +0 -0
- data/_generated/v1/page/upload/upload_request_builder.rb +74 -0
- data/_generated/v1/page/wait/wait.rb +0 -0
- data/_generated/v1/page/wait/wait_request_builder.rb +74 -0
- data/_generated/v1/pricing/pricing.rb +0 -0
- data/_generated/v1/pricing/pricing_request_builder.rb +31 -0
- data/_generated/v1/pricing/public/public.rb +0 -0
- data/_generated/v1/pricing/public/public_request_builder.rb +65 -0
- data/_generated/v1/profiles/item/item.rb +0 -0
- data/_generated/v1/profiles/item/with_name_item_request_builder.rb +134 -0
- data/_generated/v1/profiles/profiles.rb +0 -0
- data/_generated/v1/profiles/profiles_request_builder.rb +126 -0
- data/_generated/v1/proxy_providers/proxy_providers.rb +0 -0
- data/_generated/v1/proxy_providers/proxy_providers_request_builder.rb +65 -0
- data/_generated/v1/queues/item/ack/ack.rb +1 -0
- data/_generated/v1/queues/item/ack/ack_post_request_body.rb +71 -0
- data/_generated/v1/queues/item/ack/ack_request_builder.rb +76 -0
- data/_generated/v1/queues/item/item.rb +0 -0
- data/_generated/v1/queues/item/pop/pop.rb +0 -0
- data/_generated/v1/queues/item/pop/pop_request_builder.rb +79 -0
- data/_generated/v1/queues/item/push/push.rb +1 -0
- data/_generated/v1/queues/item/push/push_post_request_body.rb +71 -0
- data/_generated/v1/queues/item/push/push_request_builder.rb +76 -0
- data/_generated/v1/queues/item/stats/stats.rb +0 -0
- data/_generated/v1/queues/item/stats/stats_request_builder.rb +71 -0
- data/_generated/v1/queues/item/with_name_item_request_builder.rb +52 -0
- data/_generated/v1/queues/queues.rb +0 -0
- data/_generated/v1/queues/queues_request_builder.rb +37 -0
- data/_generated/v1/recordings/item/item.rb +0 -0
- data/_generated/v1/recordings/item/recording_item_request_builder.rb +104 -0
- data/_generated/v1/recordings/item/visual/frames/frames.rb +0 -0
- data/_generated/v1/recordings/item/visual/frames/frames_request_builder.rb +93 -0
- data/_generated/v1/recordings/item/visual/start/start.rb +0 -0
- data/_generated/v1/recordings/item/visual/start/start_request_builder.rb +80 -0
- data/_generated/v1/recordings/item/visual/stop/stop.rb +0 -0
- data/_generated/v1/recordings/item/visual/stop/stop_request_builder.rb +80 -0
- data/_generated/v1/recordings/item/visual/visual.rb +0 -0
- data/_generated/v1/recordings/item/visual/visual_request_builder.rb +95 -0
- data/_generated/v1/recordings/recordings.rb +0 -0
- data/_generated/v1/recordings/recordings_request_builder.rb +90 -0
- data/_generated/v1/registry/registry.rb +0 -0
- data/_generated/v1/registry/registry_request_builder.rb +31 -0
- data/_generated/v1/registry/token/token.rb +0 -0
- data/_generated/v1/registry/token/token_request_builder.rb +74 -0
- data/_generated/v1/schedules/item/item.rb +0 -0
- data/_generated/v1/schedules/item/runs/runs.rb +0 -0
- data/_generated/v1/schedules/item/runs/runs_request_builder.rb +71 -0
- data/_generated/v1/schedules/item/with_schedule_item_request_builder.rb +104 -0
- data/_generated/v1/schedules/schedules.rb +0 -0
- data/_generated/v1/schedules/schedules_request_builder.rb +113 -0
- data/_generated/v1/scrape/batch/batch.rb +0 -0
- data/_generated/v1/scrape/batch/batch_request_builder.rb +82 -0
- data/_generated/v1/scrape/scrape.rb +0 -0
- data/_generated/v1/scrape/scrape_request_builder.rb +86 -0
- data/_generated/v1/screenshot_blobs/item/item.rb +0 -0
- data/_generated/v1/screenshot_blobs/item/with_ref_item_request_builder.rb +68 -0
- data/_generated/v1/screenshot_blobs/screenshot_blobs.rb +0 -0
- data/_generated/v1/screenshot_blobs/screenshot_blobs_request_builder.rb +37 -0
- data/_generated/v1/search/search.rb +0 -0
- data/_generated/v1/search/search_request_builder.rb +79 -0
- data/_generated/v1/sessions/create/create.rb +0 -0
- data/_generated/v1/sessions/create/create_request_builder.rb +74 -0
- data/_generated/v1/sessions/item/budget_stream/budget_stream.rb +0 -0
- data/_generated/v1/sessions/item/budget_stream/budget_stream_request_builder.rb +71 -0
- data/_generated/v1/sessions/item/extend/extend.rb +0 -0
- data/_generated/v1/sessions/item/extend/extend_request_builder.rb +77 -0
- data/_generated/v1/sessions/item/item.rb +0 -0
- data/_generated/v1/sessions/item/pin/pin.rb +0 -0
- data/_generated/v1/sessions/item/pin/pin_request_builder.rb +71 -0
- data/_generated/v1/sessions/item/profile_item_request_builder.rb +76 -0
- data/_generated/v1/sessions/item/recording/recording.rb +0 -0
- data/_generated/v1/sessions/item/recording/recording_request_builder.rb +43 -0
- data/_generated/v1/sessions/item/recording/start/start.rb +0 -0
- data/_generated/v1/sessions/item/recording/start/start_request_builder.rb +74 -0
- data/_generated/v1/sessions/item/recording/stop/stop.rb +0 -0
- data/_generated/v1/sessions/item/recording/stop/stop_request_builder.rb +74 -0
- data/_generated/v1/sessions/item/release/release.rb +0 -0
- data/_generated/v1/sessions/item/release/release_request_builder.rb +71 -0
- data/_generated/v1/sessions/item/takeover/takeover.rb +0 -0
- data/_generated/v1/sessions/item/takeover/takeover_request_builder.rb +77 -0
- data/_generated/v1/sessions/item/takeover_release/takeover_release.rb +0 -0
- data/_generated/v1/sessions/item/takeover_release/takeover_release_request_builder.rb +71 -0
- data/_generated/v1/sessions/item/takeover_token/takeover_token.rb +0 -0
- data/_generated/v1/sessions/item/takeover_token/takeover_token_request_builder.rb +71 -0
- data/_generated/v1/sessions/sessions.rb +0 -0
- data/_generated/v1/sessions/sessions_request_builder.rb +96 -0
- data/_generated/v1/storage_states/detach/detach.rb +0 -0
- data/_generated/v1/storage_states/detach/detach_request_builder.rb +74 -0
- data/_generated/v1/storage_states/item/attach/attach.rb +0 -0
- data/_generated/v1/storage_states/item/attach/attach_request_builder.rb +77 -0
- data/_generated/v1/storage_states/item/item.rb +0 -0
- data/_generated/v1/storage_states/item/with_id_or_name_item_request_builder.rb +104 -0
- data/_generated/v1/storage_states/storage_states.rb +1 -0
- data/_generated/v1/storage_states/storage_states_post_request_body.rb +65 -0
- data/_generated/v1/storage_states/storage_states_request_builder.rb +127 -0
- data/_generated/v1/updates/updates.rb +0 -0
- data/_generated/v1/updates/updates_request_builder.rb +75 -0
- data/_generated/v1/v1.rb +0 -0
- data/_generated/v1/v1_request_builder.rb +208 -0
- data/_generated/v1/webhooks/item/deliveries/deliveries.rb +0 -0
- data/_generated/v1/webhooks/item/deliveries/deliveries_request_builder.rb +97 -0
- data/_generated/v1/webhooks/item/deliveries/item/item.rb +0 -0
- data/_generated/v1/webhooks/item/deliveries/item/retry_escaped/retry_escaped.rb +0 -0
- data/_generated/v1/webhooks/item/deliveries/item/retry_escaped/retry_request_builder.rb +78 -0
- data/_generated/v1/webhooks/item/deliveries/item/with_delivery_item_request_builder.rb +40 -0
- data/_generated/v1/webhooks/item/item.rb +0 -0
- data/_generated/v1/webhooks/item/rotate_secret/rotate_secret.rb +0 -0
- data/_generated/v1/webhooks/item/rotate_secret/rotate_secret_request_builder.rb +72 -0
- data/_generated/v1/webhooks/item/with_webhook_item_request_builder.rb +111 -0
- data/_generated/v1/webhooks/webhooks.rb +0 -0
- data/_generated/v1/webhooks/webhooks_request_builder.rb +128 -0
- data/lib/ghostcrawl/client.rb +761 -0
- data/lib/ghostcrawl/error_codes.rb +104 -0
- data/lib/ghostcrawl/errors.rb +137 -0
- data/lib/ghostcrawl/version.rb +5 -0
- data/lib/ghostcrawl.rb +17 -0
- metadata +460 -0
|
@@ -0,0 +1,761 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# GhostCrawl Ruby SDK — idiomatic facade over the Kiota-generated core.
|
|
4
|
+
#
|
|
5
|
+
# Architecture:
|
|
6
|
+
#
|
|
7
|
+
# _generated/ Kiota core — spec-faithful 98-op request-builder (models, transport, auth)
|
|
8
|
+
# This FACADE — thin idiomatic layer delegating to the generated builders
|
|
9
|
+
#
|
|
10
|
+
# All HTTP transport, URL routing, serialization, and auth come from the generated core.
|
|
11
|
+
# The facade maps idiomatic calls (client.scrape) to generated builders via
|
|
12
|
+
# BaseBearerTokenAuthenticationProvider + FaradayRequestAdapter.
|
|
13
|
+
#
|
|
14
|
+
# Usage:
|
|
15
|
+
# require "ghostcrawl"
|
|
16
|
+
# client = Ghostcrawl::Client.new(token: "gck_live_YOUR_KEY")
|
|
17
|
+
# result = client.scrape(url: "https://example.com")
|
|
18
|
+
|
|
19
|
+
require "microsoft_kiota_abstractions"
|
|
20
|
+
require "microsoft_kiota_faraday"
|
|
21
|
+
require "microsoft_kiota_serialization_json"
|
|
22
|
+
require_relative "../../_generated/ghostcrawl_client"
|
|
23
|
+
require_relative "../../_generated/models/scrape_request"
|
|
24
|
+
require_relative "../../_generated/models/search_request"
|
|
25
|
+
require_relative "../../_generated/models/extract_request"
|
|
26
|
+
require_relative "../../_generated/models/deep_crawl_body"
|
|
27
|
+
require_relative "../../_generated/models/map_body"
|
|
28
|
+
require_relative "../../_generated/models/session_create_request"
|
|
29
|
+
require_relative "../../_generated/models/extend_body"
|
|
30
|
+
require_relative "../../_generated/models/profile_create_request"
|
|
31
|
+
require_relative "../../_generated/models/profile_update_request"
|
|
32
|
+
require_relative "../../_generated/models/webhook_create_request"
|
|
33
|
+
require_relative "../../_generated/models/schedule_create_request"
|
|
34
|
+
require_relative "errors"
|
|
35
|
+
|
|
36
|
+
module Ghostcrawl
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Fix: microsoft_kiota_serialization_json 0.9.2 has a bug where
|
|
39
|
+
# write_object_value(nil, body) creates a temp writer, serializes body into
|
|
40
|
+
# temp, then RETURNS temp without merging temp's content into self.@writer.
|
|
41
|
+
# set_content_from_parsable calls write_object_value(nil, body) and discards
|
|
42
|
+
# the return value, so @content = {}.to_json = "{}".
|
|
43
|
+
#
|
|
44
|
+
# Fix: monkey-patch write_object_value so that when key is nil, it serializes
|
|
45
|
+
# body into self directly (not a temp writer) — same as what the non-nil branch
|
|
46
|
+
# does, but without a key.
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
# @api private
|
|
50
|
+
module KiotaWriterFix
|
|
51
|
+
def write_object_value(key, value)
|
|
52
|
+
return unless value
|
|
53
|
+
if key.nil?
|
|
54
|
+
# Fix: serialize into self, not a temp. Merges all fields into @writer.
|
|
55
|
+
value.serialize(self)
|
|
56
|
+
else
|
|
57
|
+
super
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Patch the writer class once, idempotently
|
|
63
|
+
MicrosoftKiotaSerializationJson::JsonSerializationWriter.prepend(KiotaWriterFix) \
|
|
64
|
+
unless MicrosoftKiotaSerializationJson::JsonSerializationWriter.ancestors.include?(KiotaWriterFix)
|
|
65
|
+
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
# Fix 2: Some generated request builders pass a Module (e.g. `Binary`) as
|
|
68
|
+
# the factory to `send_async`. The parse node's `get_object_value(factory)`
|
|
69
|
+
# calls `factory.call(self)`, which fails for a Module.
|
|
70
|
+
#
|
|
71
|
+
# Fix: when factory doesn't respond to `call`, fall back to parsing the
|
|
72
|
+
# response body as JSON and returning the raw hash. This gives us plain
|
|
73
|
+
# Ruby hashes (which ResponseHelper.to_hash handles correctly) without
|
|
74
|
+
# requiring generated response models.
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
# @api private
|
|
78
|
+
module KiotaParseNodeFix
|
|
79
|
+
def get_object_value(factory)
|
|
80
|
+
return super if factory.respond_to?(:call)
|
|
81
|
+
# Fallback: return the raw parsed node for non-callable factories (e.g. Binary).
|
|
82
|
+
# @current_node is already a Ruby Hash/Array/primitive parsed from JSON.
|
|
83
|
+
@current_node
|
|
84
|
+
rescue StandardError
|
|
85
|
+
super
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
MicrosoftKiotaSerializationJson::JsonParseNode.prepend(KiotaParseNodeFix) \
|
|
90
|
+
unless MicrosoftKiotaSerializationJson::JsonParseNode.ancestors.include?(KiotaParseNodeFix)
|
|
91
|
+
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
# AdditionalDataBody — a minimal Parsable that wraps an arbitrary Hash.
|
|
94
|
+
#
|
|
95
|
+
# Used for all POST bodies. The generated models set typed defaults in their
|
|
96
|
+
# constructors that would be serialized as null/empty-enum values and cause
|
|
97
|
+
# 422 validation errors. AdditionalDataBody only serializes the fields we
|
|
98
|
+
# explicitly pass, producing clean JSON.
|
|
99
|
+
#
|
|
100
|
+
# Depends on KiotaWriterFix above to correctly handle the nil-key
|
|
101
|
+
# write_object_value(nil, body) call from set_content_from_parsable.
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
# @api private
|
|
105
|
+
class AdditionalDataBody
|
|
106
|
+
include MicrosoftKiotaAbstractions::Parsable
|
|
107
|
+
|
|
108
|
+
def initialize(data = {})
|
|
109
|
+
@data = data.transform_keys(&:to_s)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def get_field_deserializers
|
|
113
|
+
{}
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def serialize(writer)
|
|
117
|
+
writer.write_additional_data(@data)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def additional_data
|
|
121
|
+
@data
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def additional_data=(hash)
|
|
125
|
+
@data = hash.transform_keys(&:to_s)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def self.create_from_discriminator_value(_parse_node)
|
|
129
|
+
AdditionalDataBody.new
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
module Ghostcrawl
|
|
135
|
+
DEFAULT_BASE_URL = "https://api.ghostcrawl.io"
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# Static bearer token provider — implements AccessTokenProvider
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
# @api private
|
|
142
|
+
class StaticTokenProvider
|
|
143
|
+
include MicrosoftKiotaAbstractions::AccessTokenProvider
|
|
144
|
+
|
|
145
|
+
def initialize(token)
|
|
146
|
+
@token = token
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# The Kiota Faraday adapter calls .resume on the return value of this method,
|
|
150
|
+
# expecting a Fiber. Wrap the token in a Fiber to satisfy the contract.
|
|
151
|
+
def get_authorization_token(_uri, _additional_authentication_context = nil)
|
|
152
|
+
token = @token
|
|
153
|
+
Fiber.new { token }
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def get_allowed_hosts_validator
|
|
157
|
+
MicrosoftKiotaAbstractions::AllowedHostsValidator.new([])
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
# Response helpers — convert Kiota Parsable/Hash responses to plain Hash
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
# @api private
|
|
166
|
+
module ResponseHelper
|
|
167
|
+
# Converts any Kiota response value to a plain Hash or Array.
|
|
168
|
+
# The Kiota Faraday adapter returns Fibers for async responses; call .resume
|
|
169
|
+
# to execute the request synchronously and get the actual response value.
|
|
170
|
+
def self.to_hash(value)
|
|
171
|
+
# Resolve Fibers (the Kiota Faraday adapter returns a Fiber for every
|
|
172
|
+
# request; .resume is what actually executes the HTTP call). Any non-2xx
|
|
173
|
+
# surfaces here as a raw transport exception — translate it into a typed
|
|
174
|
+
# Ghostcrawl error so the documented rescue contract works.
|
|
175
|
+
if value.is_a?(Fiber)
|
|
176
|
+
begin
|
|
177
|
+
value = value.resume
|
|
178
|
+
rescue Ghostcrawl::GhostcrawlError
|
|
179
|
+
raise
|
|
180
|
+
rescue StandardError => e
|
|
181
|
+
Ghostcrawl.raise_translated(e)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
case value
|
|
186
|
+
when Hash
|
|
187
|
+
value.transform_values { |v| to_hash(v) }
|
|
188
|
+
when Array
|
|
189
|
+
value.map { |v| to_hash(v) }
|
|
190
|
+
when NilClass
|
|
191
|
+
{}
|
|
192
|
+
else
|
|
193
|
+
# Typed Parsable: try serializing to JSON then parsing back
|
|
194
|
+
if value.respond_to?(:additional_data) && value.additional_data
|
|
195
|
+
value.additional_data.transform_values { |v| to_hash(v) }
|
|
196
|
+
elsif value.respond_to?(:to_h)
|
|
197
|
+
value.to_h.transform_values { |v| to_hash(v) }
|
|
198
|
+
else
|
|
199
|
+
value
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Inspects a decoded HTTP-200 response hash for a RESULT-channel failure (the
|
|
205
|
+
# target page could not be scraped) and raises {Ghostcrawl::ScrapeError} when
|
|
206
|
+
# one is present. This is the reliable, highest-value error path: the body is
|
|
207
|
+
# always available here (unlike the dropped problem+json body on non-2xx).
|
|
208
|
+
#
|
|
209
|
+
# A failure is signalled by any of:
|
|
210
|
+
# * a +result_error+ Hash carrying a +code+
|
|
211
|
+
# * +ok+ explicitly +false+ (always a failure, even with no code)
|
|
212
|
+
# * a top-level +code+ that is a known RESULT-channel code
|
|
213
|
+
#
|
|
214
|
+
# A genuinely OK hash (ok: true, no +ok+ key, or no error code) is returned
|
|
215
|
+
# untouched and never raises.
|
|
216
|
+
#
|
|
217
|
+
# @param hash [Hash] the decoded response
|
|
218
|
+
# @return [Hash] the same hash, when it is not a failure
|
|
219
|
+
# @api private
|
|
220
|
+
def self.raise_on_result_error!(hash)
|
|
221
|
+
return hash unless hash.is_a?(Hash)
|
|
222
|
+
|
|
223
|
+
# Descend into a `results` envelope (scrape/extract wrap per-URL results) —
|
|
224
|
+
# the target failure lives on the INNER result, not the envelope top level.
|
|
225
|
+
inner = hash["results"]
|
|
226
|
+
if inner.is_a?(Array)
|
|
227
|
+
inner.each { |item| raise_on_result_error!(item) }
|
|
228
|
+
return hash
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
result_error = hash["result_error"]
|
|
232
|
+
result_error = nil unless result_error.is_a?(Hash)
|
|
233
|
+
top_code = hash["code"]
|
|
234
|
+
ok_false = hash["ok"] == false
|
|
235
|
+
|
|
236
|
+
# Pull the code: result_error wins, then a top-level RESULT-channel code.
|
|
237
|
+
code = nil
|
|
238
|
+
code = result_error["code"] if result_error
|
|
239
|
+
code ||= top_code if Ghostcrawl::ErrorCodes.result_channel?(top_code)
|
|
240
|
+
|
|
241
|
+
# The flat markdown-build envelope reports a target failure ONLY via
|
|
242
|
+
# status="failed" (no ok/result_error) — don't count it as a success.
|
|
243
|
+
status_failed = hash["status"] == "failed"
|
|
244
|
+
code ||= top_code if status_failed && top_code.is_a?(String)
|
|
245
|
+
|
|
246
|
+
# Only raise when there is a concrete result-channel failure signal.
|
|
247
|
+
return hash unless code || ok_false || status_failed
|
|
248
|
+
|
|
249
|
+
# ok: false with no usable code -> treat as empty/unusable content.
|
|
250
|
+
code ||= Ghostcrawl::ErrorCodes::EMPTY_CONTENT
|
|
251
|
+
|
|
252
|
+
retryable =
|
|
253
|
+
if result_error && result_error.key?("retryable")
|
|
254
|
+
result_error["retryable"]
|
|
255
|
+
else
|
|
256
|
+
Ghostcrawl::ErrorCodes::RETRYABLE.fetch(code, false)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
target_status = nil
|
|
260
|
+
target_status = result_error["target_status"] if result_error
|
|
261
|
+
target_status ||= hash["target_status"] # flat markdown-envelope path
|
|
262
|
+
reason = result_error && result_error["reason"]
|
|
263
|
+
|
|
264
|
+
msg = "scrape failed (#{code})"
|
|
265
|
+
msg += ": #{reason}" if reason && !reason.to_s.empty?
|
|
266
|
+
msg += " (target HTTP #{target_status})" if target_status
|
|
267
|
+
|
|
268
|
+
raise Ghostcrawl::ScrapeError.new(
|
|
269
|
+
msg,
|
|
270
|
+
status_code: 200,
|
|
271
|
+
body: nil,
|
|
272
|
+
code: code,
|
|
273
|
+
retryable: retryable,
|
|
274
|
+
request_id: hash["request_id"],
|
|
275
|
+
target_status: target_status
|
|
276
|
+
)
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# ---------------------------------------------------------------------------
|
|
281
|
+
# Sub-clients — each delegates to the generated v1 request builders
|
|
282
|
+
# ---------------------------------------------------------------------------
|
|
283
|
+
|
|
284
|
+
# Manage crawl runs — /v1/crawl-runs.
|
|
285
|
+
class CrawlRunsClient
|
|
286
|
+
def initialize(v1)
|
|
287
|
+
@v1 = v1
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Start a new crawl run from a seed URL.
|
|
291
|
+
# Delegates to POST /v1/crawl-runs via the generated CrawlRunsRequestBuilder.
|
|
292
|
+
# The endpoint is a tagged union: a start request requires +action: "start"+
|
|
293
|
+
# and a +seed_urls+ array (not a bare +url+).
|
|
294
|
+
# @param url [String] seed URL
|
|
295
|
+
# @param max_depth [Integer] maximum crawl depth (default 2)
|
|
296
|
+
# @param max_pages [Integer] maximum pages (default 100)
|
|
297
|
+
# @param raise_on_result_error [Boolean] raise {Ghostcrawl::ScrapeError} on a
|
|
298
|
+
# target-side (HTTP-200) failure instead of returning the raw hash (default true)
|
|
299
|
+
# @return [Hash] crawl run record with +run_id+ and +status+
|
|
300
|
+
def start(url:, max_depth: 2, max_pages: 100, raise_on_result_error: true, **opts)
|
|
301
|
+
body = AdditionalDataBody.new({ "action" => "start", "seed_urls" => [url],
|
|
302
|
+
"max_depth" => max_depth, "max_pages" => max_pages }
|
|
303
|
+
.merge(opts.transform_keys(&:to_s)))
|
|
304
|
+
hash = ResponseHelper.to_hash(@v1.crawl_runs.post(body))
|
|
305
|
+
raise_on_result_error ? ResponseHelper.raise_on_result_error!(hash) : hash
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# List crawl runs.
|
|
309
|
+
# Delegates to GET /v1/crawl-runs via the generated CrawlRunsRequestBuilder.
|
|
310
|
+
def list
|
|
311
|
+
ResponseHelper.to_hash(@v1.crawl_runs.get)
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# Get a single crawl run by ID.
|
|
315
|
+
# Delegates to GET /v1/crawl-runs/{run_id} via the generated builder.
|
|
316
|
+
def get(run_id)
|
|
317
|
+
ResponseHelper.to_hash(@v1.crawl_runs.by_run_id(run_id).get)
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Cancel a running crawl run.
|
|
321
|
+
# Delegates to POST /v1/crawl-runs/{run_id}/cancel via the generated builder.
|
|
322
|
+
def cancel(run_id)
|
|
323
|
+
ResponseHelper.to_hash(@v1.crawl_runs.by_run_id(run_id).cancel.post)
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# Manage browser sessions — /v1/sessions.
|
|
328
|
+
class SessionsClient
|
|
329
|
+
def initialize(v1)
|
|
330
|
+
@v1 = v1
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# List all active sessions.
|
|
334
|
+
# Delegates to GET /v1/sessions via the generated SessionsRequestBuilder.
|
|
335
|
+
def list
|
|
336
|
+
ResponseHelper.to_hash(@v1.sessions.get)
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
# Create a new browser session.
|
|
340
|
+
# Delegates to POST /v1/sessions/create via the generated builder.
|
|
341
|
+
# @param profile_name [String] identity profile to use
|
|
342
|
+
def create(profile_name:, **opts)
|
|
343
|
+
data = { "profile" => profile_name }.merge(opts.transform_keys(&:to_s))
|
|
344
|
+
ResponseHelper.to_hash(@v1.sessions.create.post(AdditionalDataBody.new(data)))
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# Extend a session's TTL.
|
|
348
|
+
# Delegates to POST /v1/sessions/{id}/extend via the generated builder.
|
|
349
|
+
def extend(session_id, duration_seconds: 300)
|
|
350
|
+
ResponseHelper.to_hash(@v1.sessions.by_profile__id(session_id).extend.post(
|
|
351
|
+
AdditionalDataBody.new({ "ttl_seconds" => duration_seconds })
|
|
352
|
+
))
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# Release a session back to the pool.
|
|
356
|
+
# Delegates to POST /v1/sessions/{id}/release via the generated builder.
|
|
357
|
+
def release(session_id)
|
|
358
|
+
ResponseHelper.to_hash(@v1.sessions.by_profile__id(session_id).release.post)
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
# Manage identity profiles — /v1/profiles.
|
|
363
|
+
class ProfilesClient
|
|
364
|
+
def initialize(v1)
|
|
365
|
+
@v1 = v1
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
# List all profiles.
|
|
369
|
+
# Delegates to GET /v1/profiles via the generated ProfilesRequestBuilder.
|
|
370
|
+
def list
|
|
371
|
+
ResponseHelper.to_hash(@v1.profiles.get)
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Get a profile by name.
|
|
375
|
+
# Delegates to GET /v1/profiles/{name} via the generated builder.
|
|
376
|
+
def get(name)
|
|
377
|
+
ResponseHelper.to_hash(@v1.profiles.by_name(name).get)
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
# Create a new profile.
|
|
381
|
+
# Delegates to POST /v1/profiles via the generated ProfilesRequestBuilder.
|
|
382
|
+
def create(name:, **config)
|
|
383
|
+
body = AdditionalDataBody.new({ "name" => name }.merge(config.transform_keys(&:to_s)))
|
|
384
|
+
ResponseHelper.to_hash(@v1.profiles.post(body))
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
# Update a profile.
|
|
388
|
+
# Delegates to PUT /v1/profiles/{name} via the generated builder.
|
|
389
|
+
def update(name, **config)
|
|
390
|
+
ResponseHelper.to_hash(@v1.profiles.by_name(name).put(AdditionalDataBody.new(config.transform_keys(&:to_s))))
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# Delete a profile.
|
|
394
|
+
# Delegates to DELETE /v1/profiles/{name} via the generated builder.
|
|
395
|
+
def delete(name)
|
|
396
|
+
ResponseHelper.to_hash(@v1.profiles.by_name(name).delete)
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
# Manage webhooks — /v1/webhooks.
|
|
401
|
+
class WebhooksClient
|
|
402
|
+
def initialize(v1)
|
|
403
|
+
@v1 = v1
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# List all webhooks.
|
|
407
|
+
# Delegates to GET /v1/webhooks via the generated WebhooksRequestBuilder.
|
|
408
|
+
def list
|
|
409
|
+
ResponseHelper.to_hash(@v1.webhooks.get)
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# Get a webhook by ID.
|
|
413
|
+
# Delegates to GET /v1/webhooks/{id} via the generated builder.
|
|
414
|
+
def get(webhook_id)
|
|
415
|
+
ResponseHelper.to_hash(@v1.webhooks.by_webhook_id(webhook_id).get)
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# Register a new webhook endpoint.
|
|
419
|
+
# Delegates to POST /v1/webhooks via the generated WebhooksRequestBuilder.
|
|
420
|
+
def create(url:, event_types: nil, events: nil, **opts)
|
|
421
|
+
data = { "url" => url }.merge(opts.transform_keys(&:to_s))
|
|
422
|
+
# API field is "event_types"; "events" kept as a back-compat alias.
|
|
423
|
+
et = event_types.nil? ? events : event_types
|
|
424
|
+
data["event_types"] = et unless et.nil?
|
|
425
|
+
ResponseHelper.to_hash(@v1.webhooks.post(AdditionalDataBody.new(data)))
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
# Delete a webhook.
|
|
429
|
+
# Delegates to DELETE /v1/webhooks/{id} via the generated builder.
|
|
430
|
+
def delete(webhook_id)
|
|
431
|
+
@v1.webhooks.by_webhook_id(webhook_id).delete
|
|
432
|
+
{}
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
# Rotate the signing secret for a webhook.
|
|
436
|
+
# Delegates to POST /v1/webhooks/{id}/rotate-secret via the generated builder.
|
|
437
|
+
def rotate_secret(webhook_id)
|
|
438
|
+
ResponseHelper.to_hash(@v1.webhooks.by_webhook_id(webhook_id).rotate_secret.post)
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
# Manage schedules — /v1/schedules.
|
|
443
|
+
class SchedulesClient
|
|
444
|
+
def initialize(v1)
|
|
445
|
+
@v1 = v1
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
# List all schedules.
|
|
449
|
+
# Delegates to GET /v1/schedules via the generated SchedulesRequestBuilder.
|
|
450
|
+
def list
|
|
451
|
+
ResponseHelper.to_hash(@v1.schedules.get)
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
# Get a schedule by ID.
|
|
455
|
+
# Delegates to GET /v1/schedules/{id} via the generated builder.
|
|
456
|
+
def get(schedule_id)
|
|
457
|
+
ResponseHelper.to_hash(@v1.schedules.by_schedule_id(schedule_id).get)
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
# Create a new schedule.
|
|
461
|
+
# Delegates to POST /v1/schedules via the generated SchedulesRequestBuilder.
|
|
462
|
+
def create(cron:, task:, **opts)
|
|
463
|
+
data = { "cron_expr" => cron, "task" => task }.merge(opts.transform_keys(&:to_s))
|
|
464
|
+
ResponseHelper.to_hash(@v1.schedules.post(AdditionalDataBody.new(data)))
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
# Delete a schedule.
|
|
468
|
+
# Delegates to DELETE /v1/schedules/{id} via the generated builder.
|
|
469
|
+
def delete(schedule_id)
|
|
470
|
+
@v1.schedules.by_schedule_id(schedule_id).delete
|
|
471
|
+
{}
|
|
472
|
+
end
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
# Manage datasets — /v1/datasets.
|
|
476
|
+
class DatasetsClient
|
|
477
|
+
def initialize(v1)
|
|
478
|
+
@v1 = v1
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
# List all datasets.
|
|
482
|
+
# Delegates to GET /v1/datasets via the generated DatasetsRequestBuilder.
|
|
483
|
+
def list
|
|
484
|
+
ResponseHelper.to_hash(@v1.datasets.get)
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
# Get a dataset by name.
|
|
488
|
+
# Delegates to GET /v1/datasets/{name} via the generated builder.
|
|
489
|
+
def get(name)
|
|
490
|
+
ResponseHelper.to_hash(@v1.datasets.by_name(name).get)
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
# Create a new dataset.
|
|
494
|
+
# Delegates to POST /v1/datasets via the generated DatasetsRequestBuilder.
|
|
495
|
+
def create(name:, **opts)
|
|
496
|
+
body = AdditionalDataBody.new({ "name" => name }.merge(opts.transform_keys(&:to_s)))
|
|
497
|
+
ResponseHelper.to_hash(@v1.datasets.post(body))
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
# Delete a dataset.
|
|
501
|
+
# Delegates to DELETE /v1/datasets/{name} via the generated builder.
|
|
502
|
+
def delete(name)
|
|
503
|
+
ResponseHelper.to_hash(@v1.datasets.by_name(name).delete)
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
# Get rows from a dataset.
|
|
507
|
+
# Delegates to GET /v1/datasets/{name}/rows via the generated builder.
|
|
508
|
+
def rows(name)
|
|
509
|
+
ResponseHelper.to_hash(@v1.datasets.by_name(name).rows.get)
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
# Append rows to a dataset.
|
|
513
|
+
# Delegates to POST /v1/datasets/{name}/rows/append via the generated builder.
|
|
514
|
+
def append(name, rows)
|
|
515
|
+
body = AdditionalDataBody.new({ "rows" => rows })
|
|
516
|
+
ResponseHelper.to_hash(@v1.datasets.by_name(name).rows.append.post(body))
|
|
517
|
+
end
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
# Manage session recordings — /v1/recordings.
|
|
521
|
+
class RecordingsClient
|
|
522
|
+
def initialize(v1)
|
|
523
|
+
@v1 = v1
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
# List all recordings.
|
|
527
|
+
# Delegates to GET /v1/recordings via the generated RecordingsRequestBuilder.
|
|
528
|
+
def list
|
|
529
|
+
ResponseHelper.to_hash(@v1.recordings.get)
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
# Get a recording by ID.
|
|
533
|
+
# Delegates to GET /v1/recordings/{id} via the generated builder.
|
|
534
|
+
def get(recording_id)
|
|
535
|
+
ResponseHelper.to_hash(@v1.recordings.by_recording__id(recording_id).get)
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
# Delete a recording.
|
|
539
|
+
# Delegates to DELETE /v1/recordings/{id} via the generated builder.
|
|
540
|
+
def delete(recording_id)
|
|
541
|
+
@v1.recordings.by_recording__id(recording_id).delete
|
|
542
|
+
{}
|
|
543
|
+
end
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
# Key-value store — /v1/kv.
|
|
547
|
+
class KVClient
|
|
548
|
+
def initialize(v1)
|
|
549
|
+
@v1 = v1
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
# Get a value by key.
|
|
553
|
+
# Delegates to GET /v1/kv/{key} via the generated KvRequestBuilder.
|
|
554
|
+
def get(key)
|
|
555
|
+
ResponseHelper.to_hash(@v1.kv.by_key(key).get)
|
|
556
|
+
end
|
|
557
|
+
|
|
558
|
+
# Set a key-value pair.
|
|
559
|
+
# Delegates to PUT /v1/kv/{key} via the generated builder.
|
|
560
|
+
def set(key, value)
|
|
561
|
+
body = AdditionalDataBody.new({ "value" => value })
|
|
562
|
+
ResponseHelper.to_hash(@v1.kv.by_key(key).put(body))
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
# Delete a key.
|
|
566
|
+
# Delegates to DELETE /v1/kv/{key} via the generated builder.
|
|
567
|
+
def delete(key)
|
|
568
|
+
ResponseHelper.to_hash(@v1.kv.by_key(key).delete)
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
# ---------------------------------------------------------------------------
|
|
573
|
+
# Main facade — Client
|
|
574
|
+
# ---------------------------------------------------------------------------
|
|
575
|
+
|
|
576
|
+
# GhostCrawl idiomatic API client.
|
|
577
|
+
#
|
|
578
|
+
# Delegates all HTTP transport, URL routing, serialization, and auth to the
|
|
579
|
+
# Kiota-generated canonical core (_generated/). This facade is the shipped API.
|
|
580
|
+
#
|
|
581
|
+
# @example
|
|
582
|
+
# require "ghostcrawl"
|
|
583
|
+
# client = Ghostcrawl::Client.new(token: "gck_live_YOUR_KEY")
|
|
584
|
+
# result = client.scrape(url: "https://example.com")
|
|
585
|
+
class Client
|
|
586
|
+
# Default read timeout (seconds). Browser-rendered scrapes/crawls are slow,
|
|
587
|
+
# and the underlying net/http default (60s) is too short for them.
|
|
588
|
+
DEFAULT_TIMEOUT = 300
|
|
589
|
+
|
|
590
|
+
# @param token [String, nil] API key. Falls back to +GHOSTCRAWL_API_KEY+ env var.
|
|
591
|
+
# @param base_url [String, nil] Override API base URL. Falls back to +GHOSTCRAWL_BASE_URL+ env var.
|
|
592
|
+
# @param timeout [Integer, nil] Per-request read timeout in seconds. Falls back
|
|
593
|
+
# to +GHOSTCRAWL_TIMEOUT+ env var, then {DEFAULT_TIMEOUT}.
|
|
594
|
+
def initialize(token: nil, base_url: nil, timeout: nil)
|
|
595
|
+
resolved_token = token || ENV.fetch("GHOSTCRAWL_API_KEY", nil)
|
|
596
|
+
if resolved_token.nil? || resolved_token.empty?
|
|
597
|
+
raise ArgumentError,
|
|
598
|
+
"token is required — pass token: or set GHOSTCRAWL_API_KEY. " \
|
|
599
|
+
"Get your key at https://ghostcrawl.io"
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
resolved_base = (base_url ||
|
|
603
|
+
ENV.fetch("GHOSTCRAWL_BASE_URL", nil) ||
|
|
604
|
+
DEFAULT_BASE_URL).gsub(%r{/+$}, "")
|
|
605
|
+
|
|
606
|
+
resolved_timeout = (timeout ||
|
|
607
|
+
ENV.fetch("GHOSTCRAWL_TIMEOUT", nil) ||
|
|
608
|
+
DEFAULT_TIMEOUT).to_i
|
|
609
|
+
|
|
610
|
+
# Build the Kiota core via BaseBearerTokenAuthenticationProvider + FaradayRequestAdapter.
|
|
611
|
+
# All HTTP, auth, serialization, and URL routing delegate to the generated core.
|
|
612
|
+
auth_provider = MicrosoftKiotaAbstractions::BaseBearerTokenAuthenticationProvider.new(
|
|
613
|
+
StaticTokenProvider.new(resolved_token)
|
|
614
|
+
)
|
|
615
|
+
adapter = MicrosoftKiotaFaraday::FaradayRequestAdapter.new(auth_provider)
|
|
616
|
+
adapter.set_base_url(resolved_base)
|
|
617
|
+
|
|
618
|
+
# The default Faraday connection sets no timeout, so it inherits net/http's
|
|
619
|
+
# 60s read timeout — too short for browser-rendered work. Raise it.
|
|
620
|
+
if adapter.client.respond_to?(:options) && resolved_timeout.positive?
|
|
621
|
+
adapter.client.options.timeout = resolved_timeout
|
|
622
|
+
adapter.client.options.open_timeout = 30
|
|
623
|
+
end
|
|
624
|
+
|
|
625
|
+
@core = Ghostcrawl::GhostcrawlClient.new(adapter)
|
|
626
|
+
@v1 = @core.v1
|
|
627
|
+
# Kept so #me can re-issue the GET /v1/me request with the raw-JSON
|
|
628
|
+
# (Binary) response factory instead of the typed MeResponse model.
|
|
629
|
+
@adapter = adapter
|
|
630
|
+
|
|
631
|
+
@crawl_runs = CrawlRunsClient.new(@v1)
|
|
632
|
+
@sessions = SessionsClient.new(@v1)
|
|
633
|
+
@profiles = ProfilesClient.new(@v1)
|
|
634
|
+
@webhooks = WebhooksClient.new(@v1)
|
|
635
|
+
@schedules = SchedulesClient.new(@v1)
|
|
636
|
+
@datasets = DatasetsClient.new(@v1)
|
|
637
|
+
@recordings = RecordingsClient.new(@v1)
|
|
638
|
+
@kv = KVClient.new(@v1)
|
|
639
|
+
end
|
|
640
|
+
|
|
641
|
+
# @return [CrawlRunsClient]
|
|
642
|
+
attr_reader :crawl_runs
|
|
643
|
+
# @return [SessionsClient]
|
|
644
|
+
attr_reader :sessions
|
|
645
|
+
# @return [ProfilesClient]
|
|
646
|
+
attr_reader :profiles
|
|
647
|
+
# @return [WebhooksClient]
|
|
648
|
+
attr_reader :webhooks
|
|
649
|
+
# @return [SchedulesClient]
|
|
650
|
+
attr_reader :schedules
|
|
651
|
+
# @return [DatasetsClient]
|
|
652
|
+
attr_reader :datasets
|
|
653
|
+
# @return [RecordingsClient]
|
|
654
|
+
attr_reader :recordings
|
|
655
|
+
# @return [KVClient]
|
|
656
|
+
attr_reader :kv
|
|
657
|
+
|
|
658
|
+
# ---------------------------------------------------------------------------
|
|
659
|
+
# Top-level facade methods — delegate to generated builders
|
|
660
|
+
# ---------------------------------------------------------------------------
|
|
661
|
+
|
|
662
|
+
# Scrape a single URL and return the rendered content.
|
|
663
|
+
# Delegates to POST /v1/scrape via the generated ScrapeRequestBuilder.
|
|
664
|
+
# @param url [String] target URL
|
|
665
|
+
# @param format [String] output format: "markdown" (default), "html", "text"
|
|
666
|
+
# @param engine [String] browser engine: "auto" (default), "chrome", "firefox", "webkit"
|
|
667
|
+
# @param javascript [Boolean] enable JavaScript rendering (default true)
|
|
668
|
+
# @param extract_schema [Hash, nil] JSON Schema for structured extraction
|
|
669
|
+
# @param raise_on_result_error [Boolean] raise {Ghostcrawl::ScrapeError} on a
|
|
670
|
+
# target-side (HTTP-200) failure instead of returning the raw hash (default true)
|
|
671
|
+
# @return [Hash] response with +markdown+, +status+, and other fields
|
|
672
|
+
def scrape(url:, format: "markdown", engine: "auto", javascript: true, extract_schema: nil,
|
|
673
|
+
raise_on_result_error: true, **opts)
|
|
674
|
+
# Use AdditionalDataBody to send only the fields we specify — the generated
|
|
675
|
+
# ScrapeRequest model would serialize typed defaults (nulls + empty enums) that
|
|
676
|
+
# cause 422 validation errors on the server.
|
|
677
|
+
data = { "url" => url, "format" => format, "engine" => engine,
|
|
678
|
+
"javascript_enabled" => javascript }.merge(opts.transform_keys(&:to_s))
|
|
679
|
+
data["extract_schema"] = extract_schema unless extract_schema.nil?
|
|
680
|
+
hash = ResponseHelper.to_hash(@v1.scrape.post(AdditionalDataBody.new(data)))
|
|
681
|
+
raise_on_result_error ? ResponseHelper.raise_on_result_error!(hash) : hash
|
|
682
|
+
end
|
|
683
|
+
|
|
684
|
+
# Search the web and return results.
|
|
685
|
+
# Delegates to POST /v1/search via the generated SearchRequestBuilder.
|
|
686
|
+
#
|
|
687
|
+
# /v1/search requires your own search-backend API key (BYO; GhostCrawl
|
|
688
|
+
# charges no markup). Pass it as +provider_key+ — it is sent as the
|
|
689
|
+
# +X-Provider-Authorization: Bearer <provider_key>+ header the backend
|
|
690
|
+
# requires. Without it the API replies 401 search_backend_key_missing.
|
|
691
|
+
# @param query [String] search query
|
|
692
|
+
# @param engine [String] search engine: "google" (default), "bing", "duckduckgo"
|
|
693
|
+
# @param limit [Integer] maximum results (default 10)
|
|
694
|
+
# @param provider_key [String, nil] BYO search-backend key (sent as X-Provider-Authorization)
|
|
695
|
+
# @return [Hash] response with +results+ list
|
|
696
|
+
def search(query:, engine: "google", limit: 10, provider_key: nil, **opts)
|
|
697
|
+
data = { "query" => query, "engine" => engine,
|
|
698
|
+
"limit" => limit }.merge(opts.transform_keys(&:to_s))
|
|
699
|
+
config = nil
|
|
700
|
+
unless provider_key.nil?
|
|
701
|
+
config = MicrosoftKiotaAbstractions::RequestConfiguration.new
|
|
702
|
+
headers = MicrosoftKiotaAbstractions::RequestHeaders.new
|
|
703
|
+
headers.add("X-Provider-Authorization", "Bearer #{provider_key}")
|
|
704
|
+
config.headers = headers
|
|
705
|
+
end
|
|
706
|
+
ResponseHelper.to_hash(@v1.search.post(AdditionalDataBody.new(data), config))
|
|
707
|
+
end
|
|
708
|
+
|
|
709
|
+
# Extract structured data from a URL using a JSON Schema.
|
|
710
|
+
# Delegates to POST /v1/extract via the generated ExtractRequestBuilder.
|
|
711
|
+
# @param url [String] target URL
|
|
712
|
+
# @param schema [Hash] JSON Schema describing the shape to extract
|
|
713
|
+
# @param raise_on_result_error [Boolean] raise {Ghostcrawl::ScrapeError} on a
|
|
714
|
+
# target-side (HTTP-200) failure instead of returning the raw hash (default true)
|
|
715
|
+
# @return [Hash] extracted data
|
|
716
|
+
def extract(url:, schema:, raise_on_result_error: true, **opts)
|
|
717
|
+
data = { "url" => url, "schema" => schema }.merge(opts.transform_keys(&:to_s))
|
|
718
|
+
hash = ResponseHelper.to_hash(@v1.extract.post(AdditionalDataBody.new(data)))
|
|
719
|
+
raise_on_result_error ? ResponseHelper.raise_on_result_error!(hash) : hash
|
|
720
|
+
end
|
|
721
|
+
|
|
722
|
+
# Start a deep crawl from a seed URL.
|
|
723
|
+
# Delegates to POST /v1/crawl/deep via the generated CrawlDeepRequestBuilder.
|
|
724
|
+
# @param url [String] seed URL
|
|
725
|
+
# @param max_depth [Integer] maximum crawl depth (default 2)
|
|
726
|
+
# @param max_pages [Integer] maximum pages (default 100)
|
|
727
|
+
# @param raise_on_result_error [Boolean] raise {Ghostcrawl::ScrapeError} on a
|
|
728
|
+
# target-side (HTTP-200) failure instead of returning the raw hash (default true)
|
|
729
|
+
# @return [Hash] crawl run record
|
|
730
|
+
def crawl(url:, max_depth: 2, max_pages: 100, raise_on_result_error: true, **opts)
|
|
731
|
+
data = { "seed_urls" => [url], "max_depth" => max_depth,
|
|
732
|
+
"max_urls" => max_pages }.merge(opts.transform_keys(&:to_s))
|
|
733
|
+
hash = ResponseHelper.to_hash(@v1.crawl.deep.post(AdditionalDataBody.new(data)))
|
|
734
|
+
raise_on_result_error ? ResponseHelper.raise_on_result_error!(hash) : hash
|
|
735
|
+
end
|
|
736
|
+
|
|
737
|
+
# Map all URLs reachable from a seed URL.
|
|
738
|
+
# Delegates to POST /v1/map via the generated MapRequestBuilder.
|
|
739
|
+
# @param url [String] seed URL
|
|
740
|
+
# @return [Hash] response with +urls+ list
|
|
741
|
+
def map(url:, **opts)
|
|
742
|
+
data = { "url" => url }.merge(opts.transform_keys(&:to_s))
|
|
743
|
+
ResponseHelper.to_hash(@v1.map.post(AdditionalDataBody.new(data)))
|
|
744
|
+
end
|
|
745
|
+
|
|
746
|
+
# Get the current account's profile.
|
|
747
|
+
# Delegates to GET /v1/me via the generated MeRequestBuilder.
|
|
748
|
+
#
|
|
749
|
+
# The generated +@v1.me.get+ deserializes into the typed +MeResponse+ model,
|
|
750
|
+
# whose +created_at+ composed-type member fails under the pinned Kiota JSON
|
|
751
|
+
# parser ("Error during deserialization"). To avoid that, we re-issue the
|
|
752
|
+
# SAME request (reusing the builder's URL/auth/header wiring via
|
|
753
|
+
# +to_get_request_information+) with the +Binary+ response factory — the
|
|
754
|
+
# exact raw-JSON path the +scrape+ builder already uses (see KiotaParseNodeFix).
|
|
755
|
+
# @return [Hash] account info with +user_id+, +email+, +created_at+, +primary_team+
|
|
756
|
+
def me
|
|
757
|
+
request_info = @v1.me.to_get_request_information(nil)
|
|
758
|
+
ResponseHelper.to_hash(@adapter.send_async(request_info, Ghostcrawl::V1::Binary, {}))
|
|
759
|
+
end
|
|
760
|
+
end
|
|
761
|
+
end
|