crawlora 1.8.0.pre.sdk.1 → 1.8.0.pre.sdk.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/crawlora/version.rb +1 -1
- data/openapi/public.json +62 -9
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a055f6f58608503ed3b6e3aeaee2df8c354e3a3dd545ee0c0d1a78f426751fdf
|
|
4
|
+
data.tar.gz: ec2164216f5226fcd2478ec9b2ef6145e56b6f09a2d6ede5bcf8626a263cf936
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c915cda57689bdd0708f50f54a432af81938fe0cbbba731d6b2cc3e0995d604e7dae7507410455a68795fd9890a3b69c53153576f74b9bdd65bb6aa61dbe8628
|
|
7
|
+
data.tar.gz: 360e1bb791ab1cea375d31514fd8fe6a8214f254b4114c2eb7e7e6e60e087be70acaeaabbefd85dfeb7b0d1c85cb15050cad07a1c37d7c82f4c76e98d7c1ab8a
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## v1.8.0-sdk.2
|
|
4
|
+
|
|
5
|
+
- Regenerated from the public API contract (499 operations, unchanged). Enriches
|
|
6
|
+
the Web `antibot-check` diagnostic response with additional fields:
|
|
7
|
+
`block_reason`, `block_detail`, `auth_required`, `captcha_type`,
|
|
8
|
+
`captcha_types`, `captcha_mode`, `confidence_score`, `custom_vm`, and
|
|
9
|
+
`vm_vendor`.
|
|
10
|
+
- Clarified the `google-search` and datasets `google-map-businesses/search`
|
|
11
|
+
endpoint descriptions (wording only; no behavior change).
|
|
12
|
+
|
|
3
13
|
## v1.8.0-sdk.1
|
|
4
14
|
|
|
5
15
|
- Added two new platforms, regenerated from the public API contract (now 499
|
data/lib/crawlora/version.rb
CHANGED
|
@@ -5,5 +5,5 @@ module Crawlora
|
|
|
5
5
|
# as the Go/Java/PHP SDKs). RubyGems treats it as a prerelease and normalizes
|
|
6
6
|
# the published gem version (the `-` becomes `.pre.`, e.g. `1.5.0.pre.sdk.N`).
|
|
7
7
|
# Bumped across all SDK repos by the API repo's tools/sdkgen/bump_version.py.
|
|
8
|
-
VERSION = "1.8.0-sdk.
|
|
8
|
+
VERSION = "1.8.0-sdk.2"
|
|
9
9
|
end
|
data/openapi/public.json
CHANGED
|
@@ -485,10 +485,25 @@
|
|
|
485
485
|
},
|
|
486
486
|
"antibot.Protection": {
|
|
487
487
|
"properties": {
|
|
488
|
+
"captcha_mode": {
|
|
489
|
+
"type": "string"
|
|
490
|
+
},
|
|
491
|
+
"captcha_type": {
|
|
492
|
+
"description": "CaptchaType / CaptchaMode sub-classify a kind=captcha detection\n(e.g. recaptcha_v3 / score, turnstile / managed).",
|
|
493
|
+
"type": "string"
|
|
494
|
+
},
|
|
488
495
|
"confidence": {
|
|
489
496
|
"description": "low | medium | high",
|
|
490
497
|
"type": "string"
|
|
491
498
|
},
|
|
499
|
+
"confidence_score": {
|
|
500
|
+
"description": "ConfidenceScore is the numeric (0-100) additive confidence over this\ndetection's own evidence (Wappalyzer-style); Confidence is its bucketed label.",
|
|
501
|
+
"type": "integer"
|
|
502
|
+
},
|
|
503
|
+
"custom_vm": {
|
|
504
|
+
"description": "CustomVM marks the proprietary signed-payload / closed-VM class (Kasada,\nF5 Shape, TikTok) whose anti-bot is a closed in-browser bytecode VM rather\nthan generic transport detection; VMVendor names it.",
|
|
505
|
+
"type": "boolean"
|
|
506
|
+
},
|
|
492
507
|
"evidence": {
|
|
493
508
|
"items": {
|
|
494
509
|
"type": "string"
|
|
@@ -501,6 +516,9 @@
|
|
|
501
516
|
},
|
|
502
517
|
"vendor": {
|
|
503
518
|
"type": "string"
|
|
519
|
+
},
|
|
520
|
+
"vm_vendor": {
|
|
521
|
+
"type": "string"
|
|
504
522
|
}
|
|
505
523
|
},
|
|
506
524
|
"type": "object"
|
|
@@ -542,10 +560,35 @@
|
|
|
542
560
|
},
|
|
543
561
|
"antibot.Verdict": {
|
|
544
562
|
"properties": {
|
|
563
|
+
"auth_required": {
|
|
564
|
+
"type": "boolean"
|
|
565
|
+
},
|
|
566
|
+
"block_detail": {
|
|
567
|
+
"type": "string"
|
|
568
|
+
},
|
|
569
|
+
"block_reason": {
|
|
570
|
+
"description": "BlockReason is WHY the representative request was blocked (or \"ok\"):\nok | bot_challenge | captcha_required | rate_limited | ip_blocked |\ngeo_blocked | auth_required | payment_required | service_unavailable |\nforbidden | other | unreachable. Different reasons call for different fixes\n(rotate IPs vs run a browser vs log in), so they are surfaced distinctly.",
|
|
571
|
+
"type": "string"
|
|
572
|
+
},
|
|
573
|
+
"captcha_types": {
|
|
574
|
+
"description": "CaptchaTypes lists the distinct CAPTCHA types detected (e.g. recaptcha_v3,\nturnstile) for easy aggregation.",
|
|
575
|
+
"items": {
|
|
576
|
+
"type": "string"
|
|
577
|
+
},
|
|
578
|
+
"type": "array"
|
|
579
|
+
},
|
|
545
580
|
"coverage": {
|
|
546
581
|
"description": "Coverage is `full` when every transport tier ran, or `partial` when some\nwere skipped (e.g. browser sidecars unconfigured in this environment), in\nwhich case a \"blocked\" verdict may understate what a browser could do.",
|
|
547
582
|
"type": "string"
|
|
548
583
|
},
|
|
584
|
+
"custom_vm": {
|
|
585
|
+
"description": "CustomVM / VMVendor are set when any detected protection is the proprietary\nclosed-VM class (Kasada / F5 Shape / TikTok).",
|
|
586
|
+
"type": "boolean"
|
|
587
|
+
},
|
|
588
|
+
"detection_confidence_score": {
|
|
589
|
+
"description": "ConfidenceScore is the strongest detection's numeric confidence (0-100), or\n0 when nothing was detected (confidence describes a detection, not a negative).",
|
|
590
|
+
"type": "integer"
|
|
591
|
+
},
|
|
549
592
|
"difficulty_band": {
|
|
550
593
|
"$ref": "#/definitions/antibot.Band"
|
|
551
594
|
},
|
|
@@ -556,6 +599,10 @@
|
|
|
556
599
|
"easiest_working_transport": {
|
|
557
600
|
"type": "string"
|
|
558
601
|
},
|
|
602
|
+
"enforcement": {
|
|
603
|
+
"description": "Enforcement separates a managed vendor actively challenging/blocking us from\none merely sitting in front as passive CDN/edge:\nopen | passive_edge | active_challenge | active_block.",
|
|
604
|
+
"type": "string"
|
|
605
|
+
},
|
|
559
606
|
"gated_layers": {
|
|
560
607
|
"description": "GatedLayers names the anti-bot detection layers that block a plain client,\nderived empirically from which probe tiers were rejected: `tls` (a plain\nHTTP client's TLS/header fingerprint was rejected, but browser-impersonation\nor a browser got through), `js` (HTTP transports failed; JavaScript\nrendering / a real browser was required), `behavior` (a plain headless\nbrowser was also rejected; an anti-detect/stealth browser was needed).",
|
|
561
608
|
"items": {
|
|
@@ -581,6 +628,9 @@
|
|
|
581
628
|
"recommended_profile": {
|
|
582
629
|
"type": "string"
|
|
583
630
|
},
|
|
631
|
+
"retry_after": {
|
|
632
|
+
"type": "string"
|
|
633
|
+
},
|
|
584
634
|
"scrapeable": {
|
|
585
635
|
"type": "boolean"
|
|
586
636
|
},
|
|
@@ -592,6 +642,9 @@
|
|
|
592
642
|
},
|
|
593
643
|
"url": {
|
|
594
644
|
"type": "string"
|
|
645
|
+
},
|
|
646
|
+
"vm_vendor": {
|
|
647
|
+
"type": "string"
|
|
595
648
|
}
|
|
596
649
|
},
|
|
597
650
|
"type": "object"
|
|
@@ -43442,7 +43495,7 @@
|
|
|
43442
43495
|
"consumes": [
|
|
43443
43496
|
"application/json"
|
|
43444
43497
|
],
|
|
43445
|
-
"description": "Fetches a domain's homepage and extracts a normalized brand profile (title, description, colors, logos, backdrops, socials, links, and any schema.org organization data). Enrichment-only fields that are not present in the page markup are returned as null.",
|
|
43498
|
+
"description": "Fetches a domain's homepage and Web App Manifest and extracts a normalized brand profile (title, description, brand colors normalized to hex, logos and icons ranked best-first, backdrops, socials, links, and any schema.org organization data). Enrichment-only fields that are not present in the page markup are returned as null.",
|
|
43446
43499
|
"operationId": "brand-retrieve",
|
|
43447
43500
|
"parameters": [
|
|
43448
43501
|
{
|
|
@@ -47200,7 +47253,7 @@
|
|
|
47200
47253
|
"consumes": [
|
|
47201
47254
|
"application/json"
|
|
47202
47255
|
],
|
|
47203
|
-
"description": "Searches Google Maps business records stored in
|
|
47256
|
+
"description": "Searches Google Maps business records stored in a search index. Sort enum: `relevance`, `updated_at_desc`, `rating_desc`, `review_count_desc`, `distance_asc`.",
|
|
47204
47257
|
"operationId": "datasets-google-map-businesses-search",
|
|
47205
47258
|
"parameters": [
|
|
47206
47259
|
{
|
|
@@ -47896,8 +47949,8 @@
|
|
|
47896
47949
|
"$ref": "#/definitions/app.Response"
|
|
47897
47950
|
}
|
|
47898
47951
|
},
|
|
47899
|
-
"
|
|
47900
|
-
"description": "
|
|
47952
|
+
"502": {
|
|
47953
|
+
"description": "Bad Gateway",
|
|
47901
47954
|
"schema": {
|
|
47902
47955
|
"$ref": "#/definitions/app.Response"
|
|
47903
47956
|
}
|
|
@@ -47995,8 +48048,8 @@
|
|
|
47995
48048
|
"$ref": "#/definitions/app.Response"
|
|
47996
48049
|
}
|
|
47997
48050
|
},
|
|
47998
|
-
"
|
|
47999
|
-
"description": "
|
|
48051
|
+
"502": {
|
|
48052
|
+
"description": "Bad Gateway",
|
|
48000
48053
|
"schema": {
|
|
48001
48054
|
"$ref": "#/definitions/app.Response"
|
|
48002
48055
|
}
|
|
@@ -48128,8 +48181,8 @@
|
|
|
48128
48181
|
"$ref": "#/definitions/app.Response"
|
|
48129
48182
|
}
|
|
48130
48183
|
},
|
|
48131
|
-
"
|
|
48132
|
-
"description": "
|
|
48184
|
+
"502": {
|
|
48185
|
+
"description": "Bad Gateway",
|
|
48133
48186
|
"schema": {
|
|
48134
48187
|
"$ref": "#/definitions/app.Response"
|
|
48135
48188
|
}
|
|
@@ -49588,7 +49641,7 @@
|
|
|
49588
49641
|
"consumes": [
|
|
49589
49642
|
"application/json"
|
|
49590
49643
|
],
|
|
49591
|
-
"description": "Returns normalized Google web search results. Results are fetched through
|
|
49644
|
+
"description": "Returns normalized Google web search results. Results are fetched through proxied browser renderers that race several concurrent renders per request and return the first clean result, with stale-cache fallback when available. The endpoint returns 503 when Google serves a challenge page or unusable HTML. Rate limit is enforced at 1 request per second, and if the limit is exceeded a 429 status code is returned with rate limit headers.",
|
|
49592
49645
|
"operationId": "google-search",
|
|
49593
49646
|
"parameters": [
|
|
49594
49647
|
{
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: crawlora
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.8.0.pre.sdk.
|
|
4
|
+
version: 1.8.0.pre.sdk.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Crawlora
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: 'Crawlora is a web-scraping API for structured public web data — search,
|
|
14
14
|
marketplace, social, finance, media, reviews, and geodata — without running your
|