safe_yaml 0.1 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +3 -0
  3. data/.travis.yml +48 -0
  4. data/CHANGES.md +154 -0
  5. data/Gemfile +3 -1
  6. data/LICENSE.txt +22 -0
  7. data/README.md +191 -0
  8. data/Rakefile +22 -2
  9. data/bin/safe_yaml +75 -0
  10. data/bundle_install_all_ruby_versions.sh +11 -0
  11. data/lib/safe_yaml.rb +90 -6
  12. data/lib/safe_yaml/deep.rb +34 -0
  13. data/lib/safe_yaml/libyaml_checker.rb +36 -0
  14. data/lib/safe_yaml/load.rb +181 -0
  15. data/lib/safe_yaml/parse/date.rb +37 -0
  16. data/lib/safe_yaml/parse/hexadecimal.rb +12 -0
  17. data/lib/safe_yaml/parse/sexagesimal.rb +26 -0
  18. data/lib/safe_yaml/psych_handler.rb +99 -0
  19. data/lib/safe_yaml/psych_resolver.rb +52 -0
  20. data/lib/safe_yaml/resolver.rb +94 -0
  21. data/lib/safe_yaml/safe_to_ruby_visitor.rb +29 -0
  22. data/lib/safe_yaml/store.rb +39 -0
  23. data/lib/safe_yaml/syck_hack.rb +36 -0
  24. data/lib/safe_yaml/syck_node_monkeypatch.rb +43 -0
  25. data/lib/safe_yaml/syck_resolver.rb +38 -0
  26. data/lib/safe_yaml/transform.rb +41 -0
  27. data/lib/safe_yaml/transform/to_boolean.rb +21 -0
  28. data/lib/safe_yaml/transform/to_date.rb +13 -0
  29. data/lib/safe_yaml/transform/to_float.rb +33 -0
  30. data/lib/safe_yaml/transform/to_integer.rb +26 -0
  31. data/lib/safe_yaml/transform/to_nil.rb +18 -0
  32. data/lib/safe_yaml/transform/to_symbol.rb +17 -0
  33. data/lib/safe_yaml/transform/transformation_map.rb +47 -0
  34. data/lib/{version.rb → safe_yaml/version.rb} +1 -1
  35. data/run_specs_all_ruby_versions.sh +38 -0
  36. data/safe_yaml.gemspec +11 -8
  37. data/spec/exploit.1.9.2.yaml +2 -0
  38. data/spec/exploit.1.9.3.yaml +2 -0
  39. data/spec/issue48.txt +20 -0
  40. data/spec/issue49.yml +0 -0
  41. data/spec/libyaml_checker_spec.rb +69 -0
  42. data/spec/psych_resolver_spec.rb +10 -0
  43. data/spec/resolver_specs.rb +278 -0
  44. data/spec/safe_yaml_spec.rb +697 -23
  45. data/spec/spec_helper.rb +37 -2
  46. data/spec/store_spec.rb +57 -0
  47. data/spec/support/exploitable_back_door.rb +13 -7
  48. data/spec/syck_resolver_spec.rb +10 -0
  49. data/spec/transform/base64_spec.rb +11 -0
  50. data/spec/transform/to_date_spec.rb +60 -0
  51. data/spec/transform/to_float_spec.rb +42 -0
  52. data/spec/transform/to_integer_spec.rb +64 -0
  53. data/spec/transform/to_symbol_spec.rb +51 -0
  54. data/spec/yaml_spec.rb +15 -0
  55. metadata +78 -24
  56. data/Gemfile.lock +0 -28
  57. data/lib/handler.rb +0 -86
  58. data/spec/handler_spec.rb +0 -108
@@ -0,0 +1,12 @@
1
+ module SafeYAML
2
+ class Parse
3
+ class Hexadecimal
4
+ MATCHER = /\A[-+]?0x[0-9a-fA-F_]+\Z/.freeze
5
+
6
+ def self.value(value)
7
+ # This is safe to do since we already validated the value.
8
+ return Integer(value.gsub(/_/, ""))
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,26 @@
1
+ module SafeYAML
2
+ class Parse
3
+ class Sexagesimal
4
+ INTEGER_MATCHER = /\A[-+]?[0-9][0-9_]*(:[0-5]?[0-9])+\Z/.freeze
5
+ FLOAT_MATCHER = /\A[-+]?[0-9][0-9_]*(:[0-5]?[0-9])+\.[0-9_]*\Z/.freeze
6
+
7
+ def self.value(value)
8
+ before_decimal, after_decimal = value.split(".")
9
+
10
+ whole_part = 0
11
+ multiplier = 1
12
+
13
+ before_decimal = before_decimal.split(":")
14
+ until before_decimal.empty?
15
+ whole_part += (Float(before_decimal.pop) * multiplier)
16
+ multiplier *= 60
17
+ end
18
+
19
+ result = whole_part
20
+ result += Float("." + after_decimal) unless after_decimal.nil?
21
+ result *= -1 if value[0] == "-"
22
+ result
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,99 @@
1
+ require "psych"
2
+ require "base64"
3
+
4
+ module SafeYAML
5
+ class PsychHandler < Psych::Handler
6
+ def initialize(options, &block)
7
+ @options = SafeYAML::OPTIONS.merge(options || {})
8
+ @block = block
9
+ @initializers = @options[:custom_initializers] || {}
10
+ @anchors = {}
11
+ @stack = []
12
+ @current_key = nil
13
+ @result = nil
14
+ @begun = false
15
+ end
16
+
17
+ def result
18
+ @begun ? @result : false
19
+ end
20
+
21
+ def add_to_current_structure(value, anchor=nil, quoted=nil, tag=nil)
22
+ value = Transform.to_proper_type(value, quoted, tag, @options)
23
+
24
+ @anchors[anchor] = value if anchor
25
+
26
+ if !@begun
27
+ @begun = true
28
+ @result = value
29
+ @current_structure = @result
30
+ return
31
+ end
32
+
33
+ if @current_structure.respond_to?(:<<)
34
+ @current_structure << value
35
+
36
+ elsif @current_structure.respond_to?(:[]=)
37
+ if @current_key.nil?
38
+ @current_key = value
39
+
40
+ else
41
+ if @current_key == "<<"
42
+ @current_structure.merge!(value)
43
+ else
44
+ @current_structure[@current_key] = value
45
+ end
46
+
47
+ @current_key = nil
48
+ end
49
+
50
+ else
51
+ raise "Don't know how to add to a #{@current_structure.class}!"
52
+ end
53
+ end
54
+
55
+ def end_current_structure
56
+ @stack.pop
57
+ @current_structure = @stack.last
58
+ end
59
+
60
+ def streaming?
61
+ true
62
+ end
63
+
64
+ # event handlers
65
+ def alias(anchor)
66
+ add_to_current_structure(@anchors[anchor])
67
+ end
68
+
69
+ def scalar(value, anchor, tag, plain, quoted, style)
70
+ add_to_current_structure(value, anchor, quoted, tag)
71
+ end
72
+
73
+ def end_document(implicit)
74
+ @block.call(@result)
75
+ end
76
+
77
+ def start_mapping(anchor, tag, implicit, style)
78
+ map = @initializers.include?(tag) ? @initializers[tag].call : {}
79
+ self.add_to_current_structure(map, anchor)
80
+ @current_structure = map
81
+ @stack.push(map)
82
+ end
83
+
84
+ def end_mapping
85
+ self.end_current_structure()
86
+ end
87
+
88
+ def start_sequence(anchor, tag, implicit, style)
89
+ seq = @initializers.include?(tag) ? @initializers[tag].call : []
90
+ self.add_to_current_structure(seq, anchor)
91
+ @current_structure = seq
92
+ @stack.push(seq)
93
+ end
94
+
95
+ def end_sequence
96
+ self.end_current_structure()
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,52 @@
1
+ module SafeYAML
2
+ class PsychResolver < Resolver
3
+ NODE_TYPES = {
4
+ Psych::Nodes::Document => :root,
5
+ Psych::Nodes::Mapping => :map,
6
+ Psych::Nodes::Sequence => :seq,
7
+ Psych::Nodes::Scalar => :scalar,
8
+ Psych::Nodes::Alias => :alias
9
+ }.freeze
10
+
11
+ def initialize(options={})
12
+ super
13
+ @aliased_nodes = {}
14
+ end
15
+
16
+ def resolve_root(root)
17
+ resolve_seq(root).first
18
+ end
19
+
20
+ def resolve_alias(node)
21
+ resolve_node(@aliased_nodes[node.anchor])
22
+ end
23
+
24
+ def native_resolve(node)
25
+ @visitor ||= SafeYAML::SafeToRubyVisitor.new(self)
26
+ @visitor.accept(node)
27
+ end
28
+
29
+ def get_node_type(node)
30
+ NODE_TYPES[node.class]
31
+ end
32
+
33
+ def get_node_tag(node)
34
+ node.tag
35
+ end
36
+
37
+ def get_node_value(node)
38
+ @aliased_nodes[node.anchor] = node if node.respond_to?(:anchor) && node.anchor
39
+
40
+ case get_node_type(node)
41
+ when :root, :map, :seq
42
+ node.children
43
+ when :scalar
44
+ node.value
45
+ end
46
+ end
47
+
48
+ def value_is_quoted?(node)
49
+ node.quoted
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,94 @@
1
+ module SafeYAML
2
+ class Resolver
3
+ def initialize(options)
4
+ @options = SafeYAML::OPTIONS.merge(options || {})
5
+ @whitelist = @options[:whitelisted_tags] || []
6
+ @initializers = @options[:custom_initializers] || {}
7
+ @raise_on_unknown_tag = @options[:raise_on_unknown_tag]
8
+ end
9
+
10
+ def resolve_node(node)
11
+ return node if !node
12
+ return self.native_resolve(node) if tag_is_whitelisted?(self.get_node_tag(node))
13
+
14
+ case self.get_node_type(node)
15
+ when :root
16
+ resolve_root(node)
17
+ when :map
18
+ resolve_map(node)
19
+ when :seq
20
+ resolve_seq(node)
21
+ when :scalar
22
+ resolve_scalar(node)
23
+ when :alias
24
+ resolve_alias(node)
25
+ else
26
+ raise "Don't know how to resolve this node: #{node.inspect}"
27
+ end
28
+ end
29
+
30
+ def resolve_map(node)
31
+ tag = get_and_check_node_tag(node)
32
+ hash = @initializers.include?(tag) ? @initializers[tag].call : {}
33
+ map = normalize_map(self.get_node_value(node))
34
+
35
+ # Take the "<<" key nodes first, as these are meant to approximate a form of inheritance.
36
+ inheritors = map.select { |key_node, value_node| resolve_node(key_node) == "<<" }
37
+ inheritors.each do |key_node, value_node|
38
+ merge_into_hash(hash, resolve_node(value_node))
39
+ end
40
+
41
+ # All that's left should be normal (non-"<<") nodes.
42
+ (map - inheritors).each do |key_node, value_node|
43
+ hash[resolve_node(key_node)] = resolve_node(value_node)
44
+ end
45
+
46
+ return hash
47
+ end
48
+
49
+ def resolve_seq(node)
50
+ seq = self.get_node_value(node)
51
+
52
+ tag = get_and_check_node_tag(node)
53
+ arr = @initializers.include?(tag) ? @initializers[tag].call : []
54
+
55
+ seq.inject(arr) { |array, n| array << resolve_node(n) }
56
+ end
57
+
58
+ def resolve_scalar(node)
59
+ Transform.to_proper_type(self.get_node_value(node), self.value_is_quoted?(node), get_and_check_node_tag(node), @options)
60
+ end
61
+
62
+ def get_and_check_node_tag(node)
63
+ tag = self.get_node_tag(node)
64
+ SafeYAML.tag_safety_check!(tag, @options)
65
+ tag
66
+ end
67
+
68
+ def tag_is_whitelisted?(tag)
69
+ @whitelist.include?(tag)
70
+ end
71
+
72
+ def options
73
+ @options
74
+ end
75
+
76
+ private
77
+ def normalize_map(map)
78
+ # Syck creates Hashes from maps.
79
+ if map.is_a?(Hash)
80
+ map.inject([]) { |arr, key_and_value| arr << key_and_value }
81
+
82
+ # Psych is really weird; it flattens out a Hash completely into: [key, value, key, value, ...]
83
+ else
84
+ map.each_slice(2).to_a
85
+ end
86
+ end
87
+
88
+ def merge_into_hash(hash, array)
89
+ array.each do |key, value|
90
+ hash[key] = value
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,29 @@
1
+ module SafeYAML
2
+ class SafeToRubyVisitor < Psych::Visitors::ToRuby
3
+ INITIALIZE_ARITY = superclass.instance_method(:initialize).arity
4
+
5
+ def initialize(resolver)
6
+ case INITIALIZE_ARITY
7
+ when 2
8
+ # https://github.com/tenderlove/psych/blob/v2.0.0/lib/psych/visitors/to_ruby.rb#L14-L28
9
+ loader = Psych::ClassLoader.new
10
+ scanner = Psych::ScalarScanner.new(loader)
11
+ super(scanner, loader)
12
+
13
+ else
14
+ super()
15
+ end
16
+
17
+ @resolver = resolver
18
+ end
19
+
20
+ def accept(node)
21
+ if node.tag
22
+ SafeYAML.tag_safety_check!(node.tag, @resolver.options)
23
+ return super
24
+ end
25
+
26
+ @resolver.resolve_node(node)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,39 @@
1
+ require 'safe_yaml/load'
2
+ require 'yaml/store'
3
+
4
+ module SafeYAML
5
+
6
+ class Store < YAML::Store
7
+
8
+ # Override YAML::Store#initialize to accept additional option
9
+ # +safe_yaml_opts+.
10
+ def initialize(file_name, yaml_opts = {}, safe_yaml_opts = {})
11
+ @safe_yaml_opts = safe_yaml_opts
12
+ super(file_name, yaml_opts)
13
+ end
14
+
15
+ # Override YAML::Store#load to use SafeYAML.load instead of
16
+ # YAML.load (via #safe_yaml_load).
17
+ #--
18
+ # PStore#load is private, while YAML::Store#load is public.
19
+ #++
20
+ def load(content)
21
+ table = safe_yaml_load(content)
22
+ table == false ? {} : table
23
+ end
24
+
25
+ private
26
+
27
+ if SafeYAML::YAML_ENGINE == 'psych'
28
+ def safe_yaml_load(content)
29
+ SafeYAML.load(content, nil, @safe_yaml_opts)
30
+ end
31
+ else
32
+ def safe_yaml_load(content)
33
+ SafeYAML.load(content, @safe_yaml_opts)
34
+ end
35
+ end
36
+
37
+ end
38
+
39
+ end
@@ -0,0 +1,36 @@
1
+ # Hack to JRuby 1.8's YAML Parser Yecht
2
+ #
3
+ # This file is always loaded AFTER either syck or psych are already
4
+ # loaded. It then looks at what constants are available and creates
5
+ # a consistent view on all rubys.
6
+ #
7
+ # Taken from rubygems and modified.
8
+ # See https://github.com/rubygems/rubygems/blob/master/lib/rubygems/syck_hack.rb
9
+
10
+ module YAML
11
+ # In newer 1.9.2, there is a Syck toplevel constant instead of it
12
+ # being underneith YAML. If so, reference it back under YAML as
13
+ # well.
14
+ if defined? ::Syck
15
+ # for tests that change YAML::ENGINE
16
+ # 1.8 does not support the second argument to const_defined?
17
+ remove_const :Syck rescue nil
18
+
19
+ Syck = ::Syck
20
+
21
+ # JRuby's "Syck" is called "Yecht"
22
+ elsif defined? YAML::Yecht
23
+ Syck = YAML::Yecht
24
+ end
25
+ end
26
+
27
+ # Sometime in the 1.9 dev cycle, the Syck constant was moved from under YAML
28
+ # to be a toplevel constant. So gemspecs created under these versions of Syck
29
+ # will have references to Syck::DefaultKey.
30
+ #
31
+ # So we need to be sure that we reference Syck at the toplevel too so that
32
+ # we can always load these kind of gemspecs.
33
+ #
34
+ if !defined?(Syck)
35
+ Syck = YAML::Syck
36
+ end
@@ -0,0 +1,43 @@
1
+ # This is, admittedly, pretty insane. Fundamentally the challenge here is this: if we want to allow
2
+ # whitelisting of tags (while still leveraging Syck's internal functionality), then we have to
3
+ # change how Syck::Node#transform works. But since we (SafeYAML) do not control instantiation of
4
+ # Syck::Node objects, we cannot, for example, subclass Syck::Node and override #tranform the "easy"
5
+ # way. So the only choice is to monkeypatch, like this. And the only way to make this work
6
+ # recursively with potentially call-specific options (that my feeble brain can think of) is to set
7
+ # pseudo-global options on the first call and unset them once the recursive stack has fully unwound.
8
+
9
+ monkeypatch = <<-EORUBY
10
+ class Node
11
+ @@safe_transform_depth = 0
12
+ @@safe_transform_whitelist = nil
13
+
14
+ def safe_transform(options={})
15
+ begin
16
+ @@safe_transform_depth += 1
17
+ @@safe_transform_whitelist ||= options[:whitelisted_tags]
18
+
19
+ if self.type_id
20
+ SafeYAML.tag_safety_check!(self.type_id, options)
21
+ return unsafe_transform if @@safe_transform_whitelist.include?(self.type_id)
22
+ end
23
+
24
+ SafeYAML::SyckResolver.new.resolve_node(self)
25
+
26
+ ensure
27
+ @@safe_transform_depth -= 1
28
+ if @@safe_transform_depth == 0
29
+ @@safe_transform_whitelist = nil
30
+ end
31
+ end
32
+ end
33
+
34
+ alias_method :unsafe_transform, :transform
35
+ alias_method :transform, :safe_transform
36
+ end
37
+ EORUBY
38
+
39
+ if defined?(YAML::Syck::Node)
40
+ YAML::Syck.module_eval monkeypatch
41
+ else
42
+ Syck.module_eval monkeypatch
43
+ end
@@ -0,0 +1,38 @@
1
+ module SafeYAML
2
+ class SyckResolver < Resolver
3
+ QUOTE_STYLES = [
4
+ :quote1,
5
+ :quote2
6
+ ].freeze
7
+
8
+ NODE_TYPES = {
9
+ Hash => :map,
10
+ Array => :seq,
11
+ String => :scalar
12
+ }.freeze
13
+
14
+ def initialize(options={})
15
+ super
16
+ end
17
+
18
+ def native_resolve(node)
19
+ node.transform(self.options)
20
+ end
21
+
22
+ def get_node_type(node)
23
+ NODE_TYPES[node.value.class]
24
+ end
25
+
26
+ def get_node_tag(node)
27
+ node.type_id
28
+ end
29
+
30
+ def get_node_value(node)
31
+ node.value
32
+ end
33
+
34
+ def value_is_quoted?(node)
35
+ QUOTE_STYLES.include?(node.instance_variable_get(:@style))
36
+ end
37
+ end
38
+ end