css_parser 2.1.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d6f16251b0848f62bfa8dcbd58cba5b17af6e799e23dcfd9c03276b6af5a2b9c
4
- data.tar.gz: a9a952385360606bca1250f5a76b8d04ea707157665de05b8a394f032512c9b1
3
+ metadata.gz: 7286523850595059190f244936748ce609fc7d22de85ef56d5c5c229bd9a4ba4
4
+ data.tar.gz: 64fe512e21e1687c4221be94793eedbf603bd5723ef32a804177f70ba96e41b7
5
5
  SHA512:
6
- metadata.gz: 9def82ae68f5905e14f6b66e649d90ba454d5a93bc041163253e6715adbcc936c6c7bc64ee511117190e54337ce87470fb89b75b786dd813e13e1655b34ec180
7
- data.tar.gz: 7d54f7b5ebe428725ee6ef434790e365f93b938fa73e315c039833ecdf42da52ffeff59bc49afe0c83b3b34093c54da30d7625220634c0376187d652939dd9b6
6
+ metadata.gz: 63fa1631ae27fb97c375eb50d2e2b29f99e029872be0ad82191834818ad989470517c7d52cb77291a6a2a58fd920c3d1aa10a6250b5b569641016c77862b57cc
7
+ data.tar.gz: e6c24be4780b12aff38d866e5f0bc6b2b17deb78a94d2bb5072ec23bd82fcd70e1bcab63e4e02e915d2796e09dcaf9f5a239614c5c792d33003bb8ef0ca89d00
@@ -17,6 +17,8 @@ module CssParser
17
17
  # [<tt>absolute_paths</tt>] Convert relative paths to absolute paths (<tt>href</tt>, <tt>src</tt> and <tt>url('')</tt>. Boolean, default is <tt>false</tt>.
18
18
  # [<tt>import</tt>] Follow <tt>@import</tt> rules. Boolean, default is <tt>true</tt>.
19
19
  # [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
20
+ # [<tt>allow_local_network</tt>] Permit http(s) fetches against loopback / private / link-local / cloud-metadata addresses. Boolean, default is <tt>false</tt>. When <tt>false</tt> (the default), outbound HTTP requests are routed through <tt>ssrf_filter</tt>, which resolves the host and rejects unsafe IP ranges. Set to <tt>true</tt> only when the destination is known to be safe (e.g. local fixture servers in tests). Independent of <tt>allow_file_uris</tt>.
21
+ # [<tt>allow_file_uris</tt>] Permit <tt>file://</tt> URIs via <tt>load_uri!</tt>. Boolean, default is <tt>false</tt>. When <tt>false</tt> (the default), a caller that passes a <tt>file://</tt> URI to <tt>load_uri!</tt> — directly or via a CSS <tt>@import</tt> resolved against a <tt>file://</tt> base_uri — is refused, closing the local-file-disclosure vector when the URI is influenced by user input. <tt>load_file!</tt> is unaffected: it is the explicit local-file API and takes a caller-supplied path. Independent of <tt>allow_local_network</tt>.
20
22
  class Parser
21
23
  USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (https://github.com/premailer/css_parser)".freeze
22
24
  RULESET_TOKENIZER_RX = /\s+|\\{2,}|\\?[{}\s"]|[()]|.[^\s"{}()\\]*/.freeze
@@ -28,8 +30,15 @@ module CssParser
28
30
 
29
31
  MAX_REDIRECTS = 3
30
32
 
33
+ # Schemes accepted by `read_remote_file`. `file://` is intentionally
34
+ # NOT in this list — local files are handled directly by `load_uri!`
35
+ # and `load_file!`. Keeping `file://` out of the remote read path
36
+ # closes the cross-scheme redirect (HTTP 3xx → `file://`) vector that
37
+ # was GHSA-9pmc-p236-855h.
38
+ REMOTE_ALLOWED_SCHEMES = %w[http https].freeze
39
+
31
40
  # Array of CSS files that have been loaded.
32
- attr_reader :loaded_uris
41
+ attr_reader :loaded_uris
33
42
 
34
43
  def initialize(options = {})
35
44
  @options = {
@@ -38,14 +47,14 @@ module CssParser
38
47
  io_exceptions: true,
39
48
  rule_set_exceptions: true,
40
49
  capture_offsets: false,
41
- user_agent: USER_AGENT
50
+ user_agent: USER_AGENT,
51
+ allow_local_network: false,
52
+ allow_file_uris: false
42
53
  }.merge(options)
43
54
 
44
55
  # array of RuleSets
45
56
  @rules = []
46
57
 
47
- @redirect_count = nil
48
-
49
58
  @loaded_uris = []
50
59
 
51
60
  # unprocessed blocks of CSS
@@ -510,7 +519,28 @@ module CssParser
510
519
  # pass on the uri if we are capturing file offsets
511
520
  opts[:filename] = uri.to_s if opts[:capture_offsets]
512
521
 
513
- src, = read_remote_file(uri) # skip charset
522
+ # file:// is handled here, not inside read_remote_file. The
523
+ # remote-read path must never service file:// URIs, so a 3xx
524
+ # `Location: file://...` redirect cannot be turned into a local
525
+ # File.read.
526
+ #
527
+ # file:// via `load_uri!` is also gated by `allow_file_uris`:
528
+ # an attacker who can influence a URI passed here (e.g. via a CSS
529
+ # @import resolved against an attacker-controlled base_uri) could
530
+ # otherwise turn it into arbitrary local file disclosure. Callers
531
+ # that legitimately need to load local files should use
532
+ # `load_file!` (the explicit local-file API).
533
+ src = if uri.scheme == 'file'
534
+ unless @options[:allow_file_uris]
535
+ raise RemoteFileError, uri.to_s if @options[:io_exceptions]
536
+
537
+ return
538
+ end
539
+ read_local_file(uri)
540
+ else
541
+ src_and_charset, = read_remote_file(uri) # skip charset
542
+ src_and_charset
543
+ end
514
544
 
515
545
  add_block!(src, opts) if src
516
546
  end
@@ -604,86 +634,124 @@ module CssParser
604
634
  utf8_block
605
635
  end
606
636
 
607
- # Download a file into a string.
637
+ # Read a local file:// URI. Called only from `load_uri!` — never
638
+ # from the remote read path — so an HTTP redirect cannot reach this
639
+ # branch (GHSA-9pmc-p236-855h).
640
+ def read_local_file(uri) # :nodoc:
641
+ # Internal invariant: this method is the implementation of the
642
+ # `allow_file_uris: true` branch of `load_uri!`. If it is ever
643
+ # reached without that flag set, a future change has bypassed the
644
+ # LFI gate; refuse to read rather than silently leak.
645
+ unless @options[:allow_file_uris]
646
+ raise "BUG: #{self.class}##{__method__} reached with " \
647
+ 'allow_file_uris=false (LFI gate bypassed)'
648
+ end
649
+
650
+ return nil unless circular_reference_check(uri.to_s)
651
+
652
+ path = uri.path
653
+ path.gsub!(%r{^/}, '') if Gem.win_platform?
654
+ File.read(path, mode: 'rb')
655
+ rescue
656
+ raise RemoteFileError, uri.to_s if @options[:io_exceptions]
657
+
658
+ nil
659
+ end
660
+
661
+ # Download a remote http(s) file into a string.
608
662
  #
609
663
  # Returns the file's data and character set in an array.
664
+ #
665
+ # In the default (secure) configuration, requests are issued via
666
+ # `SsrfFilter.get`, which:
667
+ # - rejects any scheme other than http/https (defeats redirect-to-
668
+ # `file://` / `gopher://` / `dict://` etc.);
669
+ # - resolves the hostname with `Resolv` and rejects requests whose
670
+ # resolved IP is loopback, RFC-1918, link-local, multicast, or any
671
+ # other range typically used for internal services (defeats SSRF
672
+ # via literal IPs and via CNAME / attacker-controlled A records);
673
+ # - re-validates scheme and IP on every redirect hop.
674
+ #
675
+ # When `allow_local_network: true` is set on the Parser, the SSRF
676
+ # check is bypassed and plain `Net::HTTP` is used — but the scheme
677
+ # is still validated on every redirect hop, so cross-scheme
678
+ # redirect to `file://` (the original GHSA-9pmc-p236-855h sink)
679
+ # remains closed even on this opt-in path.
610
680
  #--
611
681
  # TODO: add option to fail silently or throw and exception on a 404
612
682
  #++
613
683
  def read_remote_file(uri) # :nodoc:
614
- if @redirect_count.nil?
615
- @redirect_count = 0
616
- else
617
- @redirect_count += 1
618
- end
684
+ uri = Addressable::URI.parse(uri.to_s)
619
685
 
620
686
  unless circular_reference_check(uri.to_s)
621
- @redirect_count = nil
622
687
  return nil, nil
623
688
  end
624
689
 
625
- if @redirect_count > MAX_REDIRECTS
626
- @redirect_count = nil
690
+ unless REMOTE_ALLOWED_SCHEMES.include?(uri.scheme)
691
+ raise RemoteFileError, uri.to_s if @options[:io_exceptions]
692
+
627
693
  return nil, nil
628
694
  end
629
695
 
630
- src = '', charset = nil
631
-
632
696
  begin
633
- uri = Addressable::URI.parse(uri.to_s)
634
-
635
- if uri.scheme == 'file'
636
- # local file
637
- path = uri.path
638
- path.gsub!(%r{^/}, '') if Gem.win_platform?
639
- src = File.read(path, mode: 'rb')
640
- else
641
- # remote file
642
- if uri.scheme == 'https'
643
- uri.port = 443 unless uri.port
644
- http = Net::HTTP.new(uri.host, uri.port)
645
- http.use_ssl = true
646
- else
647
- http = Net::HTTP.new(uri.host, uri.port)
648
- end
649
-
650
- res = http.get(uri.request_uri, {'User-Agent' => @options[:user_agent], 'Accept-Encoding' => 'gzip'})
651
- src = res.body
652
- charset = res.respond_to?(:charset) ? res.encoding : 'utf-8'
653
-
654
- if res.code.to_i >= 400
655
- @redirect_count = nil
656
- raise RemoteFileError, uri.to_s if @options[:io_exceptions]
697
+ res = if @options[:allow_local_network]
698
+ fetch_via_net_http(uri)
699
+ else
700
+ SsrfFilter.get(
701
+ uri.to_s,
702
+ scheme_whitelist: REMOTE_ALLOWED_SCHEMES,
703
+ max_redirects: MAX_REDIRECTS,
704
+ headers: {'User-Agent' => @options[:user_agent]}
705
+ )
706
+ end
657
707
 
658
- return '', nil
659
- elsif res.code.to_i >= 300 and res.code.to_i < 400
660
- unless res['Location'].nil?
661
- return read_remote_file Addressable::URI.parse(Addressable::URI.escape(res['Location']))
662
- end
663
- end
708
+ if res.code.to_i >= 400
709
+ raise RemoteFileError, uri.to_s if @options[:io_exceptions]
664
710
 
665
- case res['content-encoding']
666
- when 'gzip'
667
- io = Zlib::GzipReader.new(StringIO.new(res.body))
668
- src = io.read
669
- when 'deflate'
670
- io = Zlib::Inflate.new
671
- src = io.inflate(res.body)
672
- end
711
+ return '', nil
673
712
  end
674
713
 
675
- if charset
676
- src.encode!('UTF-8', charset)
677
- end
714
+ charset = res.respond_to?(:charset) ? res.encoding : 'utf-8'
715
+ src = res.body
716
+ src.encode!('UTF-8', charset) if charset
717
+
718
+ [src, charset]
678
719
  rescue
679
- @redirect_count = nil
680
720
  raise RemoteFileError, uri.to_s if @options[:io_exceptions]
681
721
 
682
- return nil, nil
722
+ [nil, nil]
723
+ end
724
+ end
725
+
726
+ # Net::HTTP path used only when `allow_local_network: true`. Validates
727
+ # the URI scheme on every redirect hop so a `Location: file://...`
728
+ # cannot be followed even on this opt-in code path.
729
+ def fetch_via_net_http(uri, redirect_count = 0) # :nodoc:
730
+ # Internal invariant: this method is the implementation of the
731
+ # `allow_local_network: true` branch of `read_remote_file`. If it
732
+ # is ever reached without that flag set, a future change has
733
+ # bypassed the SSRF gate; refuse to fetch rather than silently
734
+ # connect. The recursive call on a redirect inherits this guard
735
+ # because the option does not change mid-request.
736
+ unless @options[:allow_local_network]
737
+ raise "BUG: #{self.class}##{__method__} reached with " \
738
+ 'allow_local_network=false (SSRF gate bypassed)'
739
+ end
740
+
741
+ raise RemoteFileError, uri.to_s unless REMOTE_ALLOWED_SCHEMES.include?(uri.scheme)
742
+ raise RemoteFileError, uri.to_s if redirect_count > MAX_REDIRECTS
743
+
744
+ http = Net::HTTP.new(uri.host, uri.port || uri.default_port)
745
+ http.use_ssl = (uri.scheme == 'https')
746
+
747
+ res = http.get(uri.request_uri, {'User-Agent' => @options[:user_agent]})
748
+
749
+ if res.code.to_i >= 300 && res.code.to_i < 400 && res['Location']
750
+ redirect_uri = Addressable::URI.parse(Addressable::URI.escape(res['Location']))
751
+ return fetch_via_net_http(redirect_uri, redirect_count + 1)
683
752
  end
684
753
 
685
- @redirect_count = nil
686
- [src, charset]
754
+ res
687
755
  end
688
756
 
689
757
  private
@@ -259,8 +259,10 @@ module CssParser
259
259
  inherit
260
260
  currentColor
261
261
  ].freeze
262
- RE_COLOUR_NUMERIC = /\b(hsl|rgb)\s*\(-?\s*-?\d+(\.\d+)?%?\s*%?,-?\s*-?\d+(\.\d+)?%?\s*%?,-?\s*-?\d+(\.\d+)?%?\s*%?\)/i.freeze
263
- RE_COLOUR_NUMERIC_ALPHA = /\b(hsla|rgba)\s*\(-?\s*-?\d+(\.\d+)?%?\s*%?,-?\s*-?\d+(\.\d+)?%?\s*%?,-?\s*-?\d+(\.\d+)?%?\s*%?,-?\s*-?\d+(\.\d+)?%?\s*%?\)/i.freeze
262
+ # CSS <number> allows the integer part to be omitted (e.g. `.1`), per CSS Values & Units.
263
+ # `(?:\d*\.)?\d+` accepts `1`, `1.5`, and `.5` while still rejecting bare `1.`.
264
+ RE_COLOUR_NUMERIC = /\b(hsl|rgb)\s*\(-?\s*-?(?:\d*\.)?\d+%?\s*%?,-?\s*-?(?:\d*\.)?\d+%?\s*%?,-?\s*-?(?:\d*\.)?\d+%?\s*%?\)/i.freeze
265
+ RE_COLOUR_NUMERIC_ALPHA = /\b(hsla|rgba)\s*\(-?\s*-?(?:\d*\.)?\d+%?\s*%?,-?\s*-?(?:\d*\.)?\d+%?\s*%?,-?\s*-?(?:\d*\.)?\d+%?\s*%?,-?\s*-?(?:\d*\.)?\d+%?\s*%?\)/i.freeze
264
266
  RE_COLOUR_HEX = /\s*#([0-9a-fA-F]{6}|[0-9a-fA-F]{3})\b/.freeze
265
267
  RE_COLOUR_NAMED = /\s*\b(#{NAMED_COLOURS.join('|')})\b/i.freeze
266
268
  RE_COLOUR = Regexp.union(RE_COLOUR_NUMERIC, RE_COLOUR_NUMERIC_ALPHA, RE_COLOUR_HEX, RE_COLOUR_NAMED)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CssParser
4
- VERSION = '2.1.0'.freeze
4
+ VERSION = '3.0.0'.freeze
5
5
  end
data/lib/css_parser.rb CHANGED
@@ -4,8 +4,7 @@ require 'addressable/uri'
4
4
  require 'uri'
5
5
  require 'net/https'
6
6
  require 'digest/md5'
7
- require 'zlib'
8
- require 'stringio'
7
+ require 'ssrf_filter'
9
8
 
10
9
  require 'css_parser/version'
11
10
  require 'css_parser/rule_set'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: css_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dunae
@@ -23,6 +23,20 @@ dependencies:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
25
  version: '0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: ssrf_filter
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.5'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.5'
26
40
  description: A set of classes for parsing CSS in Ruby.
27
41
  email: code@dunae.ca
28
42
  executables: []