ghostcrawl 2.2.1 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49310560652f43f54a4ce7d69b0792e22675e6d5d23923ec7960f9f4be0cd52d
4
- data.tar.gz: 6245e25155ae69a24bb442775639f8872526f5988b69b1db3a35541ba388f822
3
+ metadata.gz: ff09081da25b9c78b261437d5bc26c2a4b25f3e727a53e8393f1adb00aa6536c
4
+ data.tar.gz: ad0869fe147e1c33a49fadd364bc93ce5857fd73eb03b584d4c1ce618e1092a9
5
5
  SHA512:
6
- metadata.gz: b2c7fcaced06f2628f77d2c574ad34714f064d91660037fdcb5f06bda80e33e446dba4717c169216a3585762ee83ed7a94c708dd316902cde0a21bcfd2b9ff29
7
- data.tar.gz: 618777d4e590acf5828ca2999028af7f3c43b71469ba65caf6e6970a62c551fa8aeba35ddda7e5febef22b75249cc77b5e7ad380e23515b25bbd392c6be89b94
6
+ metadata.gz: '085820b153fb6b70ea26f6e866787388835cfec7e4cce86498f83551abe6b4c8800b5eb14f86203c59ca77b7e70cffbd459d0d84f7a75fc69f5337914bcf60e6'
7
+ data.tar.gz: 11a00207fc7469d02bec976018c8e627ea11c0da0ce78d4dc5a6d5b76fced17a5f3a37053159e0bde94dd0c44e4d3e273e7cf57cdac1d652a410bcba4da814ff
@@ -668,7 +668,7 @@ module Ghostcrawl
668
668
  # @param extract_schema [Hash, nil] JSON Schema for structured extraction
669
669
  # @param raise_on_result_error [Boolean] raise {Ghostcrawl::ScrapeError} on a
670
670
  # target-side (HTTP-200) failure instead of returning the raw hash (default true)
671
- # @return [Hash] response with +markdown+, +status+, and other fields
671
+ # @return [Hash] response with +content+, +markdown+, +status+, and other fields
672
672
  def scrape(url:, format: "markdown", engine: "auto", javascript: true, extract_schema: nil,
673
673
  raise_on_result_error: true, **opts)
674
674
  # Use AdditionalDataBody to send only the fields we specify — the generated
@@ -678,6 +678,7 @@ module Ghostcrawl
678
678
  "javascript_enabled" => javascript }.merge(opts.transform_keys(&:to_s))
679
679
  data["extract_schema"] = extract_schema unless extract_schema.nil?
680
680
  hash = ResponseHelper.to_hash(@v1.scrape.post(AdditionalDataBody.new(data)))
681
+ normalize_scrape_content(hash)
681
682
  raise_on_result_error ? ResponseHelper.raise_on_result_error!(hash) : hash
682
683
  end
683
684
 
@@ -757,5 +758,42 @@ module Ghostcrawl
757
758
  request_info = @v1.me.to_get_request_information(nil)
758
759
  ResponseHelper.to_hash(@adapter.send_async(request_info, Ghostcrawl::V1::Binary, {}))
759
760
  end
761
+
762
+ private
763
+
764
+ # Normalizes a +"content"+ key onto a decoded scrape response.
765
+ #
766
+ # The API returns the rendered page under a FORMAT-SPECIFIC key
767
+ # (+"markdown"+, +"html"+, or +"text"+), but the documented quickstart reads
768
+ # +result["content"]+. This mirrors that value onto +"content"+ in place,
769
+ # KEEPING the format-specific key intact (backward compatible).
770
+ #
771
+ # No-op unless +result+ is a Hash that does not already carry +"content"+.
772
+ # The value chosen is: the field named by +result["format"]+ when that field
773
+ # is a String, else the first String among +"markdown"+, +"html"+, +"text"+.
774
+ #
775
+ # @param result [Object] the decoded response (only mutated when a Hash)
776
+ # @return [Object] the same +result+, unchanged reference
777
+ # @api private
778
+ def normalize_scrape_content(result)
779
+ return result unless result.is_a?(Hash) && !result.key?("content")
780
+
781
+ fmt = result["format"]
782
+ value = result[fmt] if fmt.is_a?(String)
783
+ value = nil unless value.is_a?(String)
784
+
785
+ unless value.is_a?(String)
786
+ %w[markdown html text].each do |key|
787
+ candidate = result[key]
788
+ if candidate.is_a?(String)
789
+ value = candidate
790
+ break
791
+ end
792
+ end
793
+ end
794
+
795
+ result["content"] = value if value.is_a?(String)
796
+ result
797
+ end
760
798
  end
761
799
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Ghostcrawl
4
- VERSION = "2.2.1"
4
+ VERSION = "2.2.2"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ghostcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.1
4
+ version: 2.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - GhostCrawl