psych 3.0.0.beta2-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +16 -0
 - data/.travis.yml +20 -0
 - data/CHANGELOG.rdoc +576 -0
 - data/Gemfile +3 -0
 - data/Mavenfile +7 -0
 - data/README.md +73 -0
 - data/Rakefile +46 -0
 - data/bin/console +7 -0
 - data/bin/setup +6 -0
 - data/ext/psych/.gitignore +11 -0
 - data/ext/psych/depend +3 -0
 - data/ext/psych/extconf.rb +39 -0
 - data/ext/psych/psych.c +34 -0
 - data/ext/psych/psych.h +17 -0
 - data/ext/psych/psych_emitter.c +554 -0
 - data/ext/psych/psych_emitter.h +8 -0
 - data/ext/psych/psych_parser.c +568 -0
 - data/ext/psych/psych_parser.h +6 -0
 - data/ext/psych/psych_to_ruby.c +39 -0
 - data/ext/psych/psych_to_ruby.h +8 -0
 - data/ext/psych/psych_yaml_tree.c +24 -0
 - data/ext/psych/psych_yaml_tree.h +8 -0
 - data/ext/psych/yaml/LICENSE +19 -0
 - data/ext/psych/yaml/api.c +1392 -0
 - data/ext/psych/yaml/config.h +10 -0
 - data/ext/psych/yaml/dumper.c +394 -0
 - data/ext/psych/yaml/emitter.c +2329 -0
 - data/ext/psych/yaml/loader.c +444 -0
 - data/ext/psych/yaml/parser.c +1374 -0
 - data/ext/psych/yaml/reader.c +469 -0
 - data/ext/psych/yaml/scanner.c +3576 -0
 - data/ext/psych/yaml/writer.c +141 -0
 - data/ext/psych/yaml/yaml.h +1971 -0
 - data/ext/psych/yaml/yaml_private.h +662 -0
 - data/lib/psych.rb +511 -0
 - data/lib/psych/class_loader.rb +102 -0
 - data/lib/psych/coder.rb +95 -0
 - data/lib/psych/core_ext.rb +19 -0
 - data/lib/psych/exception.rb +14 -0
 - data/lib/psych/handler.rb +250 -0
 - data/lib/psych/handlers/document_stream.rb +23 -0
 - data/lib/psych/handlers/recorder.rb +40 -0
 - data/lib/psych/json/ruby_events.rb +20 -0
 - data/lib/psych/json/stream.rb +17 -0
 - data/lib/psych/json/tree_builder.rb +13 -0
 - data/lib/psych/json/yaml_events.rb +30 -0
 - data/lib/psych/nodes.rb +78 -0
 - data/lib/psych/nodes/alias.rb +19 -0
 - data/lib/psych/nodes/document.rb +61 -0
 - data/lib/psych/nodes/mapping.rb +57 -0
 - data/lib/psych/nodes/node.rb +56 -0
 - data/lib/psych/nodes/scalar.rb +68 -0
 - data/lib/psych/nodes/sequence.rb +82 -0
 - data/lib/psych/nodes/stream.rb +38 -0
 - data/lib/psych/omap.rb +5 -0
 - data/lib/psych/parser.rb +52 -0
 - data/lib/psych/scalar_scanner.rb +149 -0
 - data/lib/psych/set.rb +5 -0
 - data/lib/psych/stream.rb +38 -0
 - data/lib/psych/streaming.rb +28 -0
 - data/lib/psych/syntax_error.rb +22 -0
 - data/lib/psych/tree_builder.rb +97 -0
 - data/lib/psych/versions.rb +9 -0
 - data/lib/psych/visitors.rb +7 -0
 - data/lib/psych/visitors/depth_first.rb +27 -0
 - data/lib/psych/visitors/emitter.rb +52 -0
 - data/lib/psych/visitors/json_tree.rb +25 -0
 - data/lib/psych/visitors/to_ruby.rb +401 -0
 - data/lib/psych/visitors/visitor.rb +20 -0
 - data/lib/psych/visitors/yaml_tree.rb +551 -0
 - data/lib/psych/y.rb +10 -0
 - data/psych.gemspec +64 -0
 - metadata +175 -0
 
| 
         @@ -0,0 +1,469 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
             
     | 
| 
      
 2 
     | 
    
         
            +
            #include "yaml_private.h"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            /*
         
     | 
| 
      
 5 
     | 
    
         
            +
             * Declarations.
         
     | 
| 
      
 6 
     | 
    
         
            +
             */
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            static int
         
     | 
| 
      
 9 
     | 
    
         
            +
            yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem,
         
     | 
| 
      
 10 
     | 
    
         
            +
                    size_t offset, int value);
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            static int
         
     | 
| 
      
 13 
     | 
    
         
            +
            yaml_parser_update_raw_buffer(yaml_parser_t *parser);
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            static int
         
     | 
| 
      
 16 
     | 
    
         
            +
            yaml_parser_determine_encoding(yaml_parser_t *parser);
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            YAML_DECLARE(int)
         
     | 
| 
      
 19 
     | 
    
         
            +
            yaml_parser_update_buffer(yaml_parser_t *parser, size_t length);
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            /*
         
     | 
| 
      
 22 
     | 
    
         
            +
             * Set the reader error and return 0.
         
     | 
| 
      
 23 
     | 
    
         
            +
             */
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            static int
         
     | 
| 
      
 26 
     | 
    
         
            +
            yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem,
         
     | 
| 
      
 27 
     | 
    
         
            +
                    size_t offset, int value)
         
     | 
| 
      
 28 
     | 
    
         
            +
            {
         
     | 
| 
      
 29 
     | 
    
         
            +
                parser->error = YAML_READER_ERROR;
         
     | 
| 
      
 30 
     | 
    
         
            +
                parser->problem = problem;
         
     | 
| 
      
 31 
     | 
    
         
            +
                parser->problem_offset = offset;
         
     | 
| 
      
 32 
     | 
    
         
            +
                parser->problem_value = value;
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                return 0;
         
     | 
| 
      
 35 
     | 
    
         
            +
            }
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
            /*
         
     | 
| 
      
 38 
     | 
    
         
            +
             * Byte order marks.
         
     | 
| 
      
 39 
     | 
    
         
            +
             */
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            #define BOM_UTF8    "\xef\xbb\xbf"
         
     | 
| 
      
 42 
     | 
    
         
            +
            #define BOM_UTF16LE "\xff\xfe"
         
     | 
| 
      
 43 
     | 
    
         
            +
            #define BOM_UTF16BE "\xfe\xff"
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            /*
         
     | 
| 
      
 46 
     | 
    
         
            +
             * Determine the input stream encoding by checking the BOM symbol. If no BOM is
         
     | 
| 
      
 47 
     | 
    
         
            +
             * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure.
         
     | 
| 
      
 48 
     | 
    
         
            +
             */
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
            static int
         
     | 
| 
      
 51 
     | 
    
         
            +
            yaml_parser_determine_encoding(yaml_parser_t *parser)
         
     | 
| 
      
 52 
     | 
    
         
            +
            {
         
     | 
| 
      
 53 
     | 
    
         
            +
                /* Ensure that we had enough bytes in the raw buffer. */
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                while (!parser->eof
         
     | 
| 
      
 56 
     | 
    
         
            +
                        && parser->raw_buffer.last - parser->raw_buffer.pointer < 3) {
         
     | 
| 
      
 57 
     | 
    
         
            +
                    if (!yaml_parser_update_raw_buffer(parser)) {
         
     | 
| 
      
 58 
     | 
    
         
            +
                        return 0;
         
     | 
| 
      
 59 
     | 
    
         
            +
                    }
         
     | 
| 
      
 60 
     | 
    
         
            +
                }
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                /* Determine the encoding. */
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2
         
     | 
| 
      
 65 
     | 
    
         
            +
                        && !memcmp(parser->raw_buffer.pointer, BOM_UTF16LE, 2)) {
         
     | 
| 
      
 66 
     | 
    
         
            +
                    parser->encoding = YAML_UTF16LE_ENCODING;
         
     | 
| 
      
 67 
     | 
    
         
            +
                    parser->raw_buffer.pointer += 2;
         
     | 
| 
      
 68 
     | 
    
         
            +
                    parser->offset += 2;
         
     | 
| 
      
 69 
     | 
    
         
            +
                }
         
     | 
| 
      
 70 
     | 
    
         
            +
                else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2
         
     | 
| 
      
 71 
     | 
    
         
            +
                        && !memcmp(parser->raw_buffer.pointer, BOM_UTF16BE, 2)) {
         
     | 
| 
      
 72 
     | 
    
         
            +
                    parser->encoding = YAML_UTF16BE_ENCODING;
         
     | 
| 
      
 73 
     | 
    
         
            +
                    parser->raw_buffer.pointer += 2;
         
     | 
| 
      
 74 
     | 
    
         
            +
                    parser->offset += 2;
         
     | 
| 
      
 75 
     | 
    
         
            +
                }
         
     | 
| 
      
 76 
     | 
    
         
            +
                else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 3
         
     | 
| 
      
 77 
     | 
    
         
            +
                        && !memcmp(parser->raw_buffer.pointer, BOM_UTF8, 3)) {
         
     | 
| 
      
 78 
     | 
    
         
            +
                    parser->encoding = YAML_UTF8_ENCODING;
         
     | 
| 
      
 79 
     | 
    
         
            +
                    parser->raw_buffer.pointer += 3;
         
     | 
| 
      
 80 
     | 
    
         
            +
                    parser->offset += 3;
         
     | 
| 
      
 81 
     | 
    
         
            +
                }
         
     | 
| 
      
 82 
     | 
    
         
            +
                else {
         
     | 
| 
      
 83 
     | 
    
         
            +
                    parser->encoding = YAML_UTF8_ENCODING;
         
     | 
| 
      
 84 
     | 
    
         
            +
                }
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
                return 1;
         
     | 
| 
      
 87 
     | 
    
         
            +
            }
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
            /*
         
     | 
| 
      
 90 
     | 
    
         
            +
             * Update the raw buffer.
         
     | 
| 
      
 91 
     | 
    
         
            +
             */
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
            static int
         
     | 
| 
      
 94 
     | 
    
         
            +
            yaml_parser_update_raw_buffer(yaml_parser_t *parser)
         
     | 
| 
      
 95 
     | 
    
         
            +
            {
         
     | 
| 
      
 96 
     | 
    
         
            +
                size_t size_read = 0;
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
                /* Return if the raw buffer is full. */
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                if (parser->raw_buffer.start == parser->raw_buffer.pointer
         
     | 
| 
      
 101 
     | 
    
         
            +
                        && parser->raw_buffer.last == parser->raw_buffer.end)
         
     | 
| 
      
 102 
     | 
    
         
            +
                    return 1;
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
                /* Return on EOF. */
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
                if (parser->eof) return 1;
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                /* Move the remaining bytes in the raw buffer to the beginning. */
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                if (parser->raw_buffer.start < parser->raw_buffer.pointer
         
     | 
| 
      
 111 
     | 
    
         
            +
                        && parser->raw_buffer.pointer < parser->raw_buffer.last) {
         
     | 
| 
      
 112 
     | 
    
         
            +
                    memmove(parser->raw_buffer.start, parser->raw_buffer.pointer,
         
     | 
| 
      
 113 
     | 
    
         
            +
                            parser->raw_buffer.last - parser->raw_buffer.pointer);
         
     | 
| 
      
 114 
     | 
    
         
            +
                }
         
     | 
| 
      
 115 
     | 
    
         
            +
                parser->raw_buffer.last -=
         
     | 
| 
      
 116 
     | 
    
         
            +
                    parser->raw_buffer.pointer - parser->raw_buffer.start;
         
     | 
| 
      
 117 
     | 
    
         
            +
                parser->raw_buffer.pointer = parser->raw_buffer.start;
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
                /* Call the read handler to fill the buffer. */
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
                if (!parser->read_handler(parser->read_handler_data, parser->raw_buffer.last,
         
     | 
| 
      
 122 
     | 
    
         
            +
                            parser->raw_buffer.end - parser->raw_buffer.last, &size_read)) {
         
     | 
| 
      
 123 
     | 
    
         
            +
                    return yaml_parser_set_reader_error(parser, "input error",
         
     | 
| 
      
 124 
     | 
    
         
            +
                            parser->offset, -1);
         
     | 
| 
      
 125 
     | 
    
         
            +
                }
         
     | 
| 
      
 126 
     | 
    
         
            +
                parser->raw_buffer.last += size_read;
         
     | 
| 
      
 127 
     | 
    
         
            +
                if (!size_read) {
         
     | 
| 
      
 128 
     | 
    
         
            +
                    parser->eof = 1;
         
     | 
| 
      
 129 
     | 
    
         
            +
                }
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
                return 1;
         
     | 
| 
      
 132 
     | 
    
         
            +
            }
         
     | 
| 
      
 133 
     | 
    
         
            +
             
     | 
| 
      
 134 
     | 
    
         
            +
            /*
         
     | 
| 
      
 135 
     | 
    
         
            +
             * Ensure that the buffer contains at least `length` characters.
         
     | 
| 
      
 136 
     | 
    
         
            +
             * Return 1 on success, 0 on failure.
         
     | 
| 
      
 137 
     | 
    
         
            +
             *
         
     | 
| 
      
 138 
     | 
    
         
            +
             * The length is supposed to be significantly less that the buffer size.
         
     | 
| 
      
 139 
     | 
    
         
            +
             */
         
     | 
| 
      
 140 
     | 
    
         
            +
             
     | 
| 
      
 141 
     | 
    
         
            +
            YAML_DECLARE(int)
         
     | 
| 
      
 142 
     | 
    
         
            +
            yaml_parser_update_buffer(yaml_parser_t *parser, size_t length)
         
     | 
| 
      
 143 
     | 
    
         
            +
            {
         
     | 
| 
      
 144 
     | 
    
         
            +
                int first = 1;
         
     | 
| 
      
 145 
     | 
    
         
            +
             
     | 
| 
      
 146 
     | 
    
         
            +
                assert(parser->read_handler);   /* Read handler must be set. */
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
                /* If the EOF flag is set and the raw buffer is empty, do nothing. */
         
     | 
| 
      
 149 
     | 
    
         
            +
             
     | 
| 
      
 150 
     | 
    
         
            +
                if (parser->eof && parser->raw_buffer.pointer == parser->raw_buffer.last)
         
     | 
| 
      
 151 
     | 
    
         
            +
                    return 1;
         
     | 
| 
      
 152 
     | 
    
         
            +
             
     | 
| 
      
 153 
     | 
    
         
            +
                /* Return if the buffer contains enough characters. */
         
     | 
| 
      
 154 
     | 
    
         
            +
             
     | 
| 
      
 155 
     | 
    
         
            +
                if (parser->unread >= length)
         
     | 
| 
      
 156 
     | 
    
         
            +
                    return 1;
         
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
      
 158 
     | 
    
         
            +
                /* Determine the input encoding if it is not known yet. */
         
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
      
 160 
     | 
    
         
            +
                if (!parser->encoding) {
         
     | 
| 
      
 161 
     | 
    
         
            +
                    if (!yaml_parser_determine_encoding(parser))
         
     | 
| 
      
 162 
     | 
    
         
            +
                        return 0;
         
     | 
| 
      
 163 
     | 
    
         
            +
                }
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
                /* Move the unread characters to the beginning of the buffer. */
         
     | 
| 
      
 166 
     | 
    
         
            +
             
     | 
| 
      
 167 
     | 
    
         
            +
                if (parser->buffer.start < parser->buffer.pointer
         
     | 
| 
      
 168 
     | 
    
         
            +
                        && parser->buffer.pointer < parser->buffer.last) {
         
     | 
| 
      
 169 
     | 
    
         
            +
                    size_t size = parser->buffer.last - parser->buffer.pointer;
         
     | 
| 
      
 170 
     | 
    
         
            +
                    memmove(parser->buffer.start, parser->buffer.pointer, size);
         
     | 
| 
      
 171 
     | 
    
         
            +
                    parser->buffer.pointer = parser->buffer.start;
         
     | 
| 
      
 172 
     | 
    
         
            +
                    parser->buffer.last = parser->buffer.start + size;
         
     | 
| 
      
 173 
     | 
    
         
            +
                }
         
     | 
| 
      
 174 
     | 
    
         
            +
                else if (parser->buffer.pointer == parser->buffer.last) {
         
     | 
| 
      
 175 
     | 
    
         
            +
                    parser->buffer.pointer = parser->buffer.start;
         
     | 
| 
      
 176 
     | 
    
         
            +
                    parser->buffer.last = parser->buffer.start;
         
     | 
| 
      
 177 
     | 
    
         
            +
                }
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
      
 179 
     | 
    
         
            +
                /* Fill the buffer until it has enough characters. */
         
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
                while (parser->unread < length)
         
     | 
| 
      
 182 
     | 
    
         
            +
                {
         
     | 
| 
      
 183 
     | 
    
         
            +
                    /* Fill the raw buffer if necessary. */
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
                    if (!first || parser->raw_buffer.pointer == parser->raw_buffer.last) {
         
     | 
| 
      
 186 
     | 
    
         
            +
                        if (!yaml_parser_update_raw_buffer(parser)) return 0;
         
     | 
| 
      
 187 
     | 
    
         
            +
                    }
         
     | 
| 
      
 188 
     | 
    
         
            +
                    first = 0;
         
     | 
| 
      
 189 
     | 
    
         
            +
             
     | 
| 
      
 190 
     | 
    
         
            +
                    /* Decode the raw buffer. */
         
     | 
| 
      
 191 
     | 
    
         
            +
             
     | 
| 
      
 192 
     | 
    
         
            +
                    while (parser->raw_buffer.pointer != parser->raw_buffer.last)
         
     | 
| 
      
 193 
     | 
    
         
            +
                    {
         
     | 
| 
      
 194 
     | 
    
         
            +
                        unsigned int value = 0, value2 = 0;
         
     | 
| 
      
 195 
     | 
    
         
            +
                        int incomplete = 0;
         
     | 
| 
      
 196 
     | 
    
         
            +
                        unsigned char octet;
         
     | 
| 
      
 197 
     | 
    
         
            +
                        unsigned int width = 0;
         
     | 
| 
      
 198 
     | 
    
         
            +
                        int low, high;
         
     | 
| 
      
 199 
     | 
    
         
            +
                        size_t k;
         
     | 
| 
      
 200 
     | 
    
         
            +
                        size_t raw_unread = parser->raw_buffer.last - parser->raw_buffer.pointer;
         
     | 
| 
      
 201 
     | 
    
         
            +
             
     | 
| 
      
 202 
     | 
    
         
            +
                        /* Decode the next character. */
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
                        switch (parser->encoding)
         
     | 
| 
      
 205 
     | 
    
         
            +
                        {
         
     | 
| 
      
 206 
     | 
    
         
            +
                            case YAML_UTF8_ENCODING:
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
                                /*
         
     | 
| 
      
 209 
     | 
    
         
            +
                                 * Decode a UTF-8 character.  Check RFC 3629
         
     | 
| 
      
 210 
     | 
    
         
            +
                                 * (http://www.ietf.org/rfc/rfc3629.txt) for more details.
         
     | 
| 
      
 211 
     | 
    
         
            +
                                 *
         
     | 
| 
      
 212 
     | 
    
         
            +
                                 * The following table (taken from the RFC) is used for
         
     | 
| 
      
 213 
     | 
    
         
            +
                                 * decoding.
         
     | 
| 
      
 214 
     | 
    
         
            +
                                 *
         
     | 
| 
      
 215 
     | 
    
         
            +
                                 *    Char. number range |        UTF-8 octet sequence
         
     | 
| 
      
 216 
     | 
    
         
            +
                                 *      (hexadecimal)    |              (binary)
         
     | 
| 
      
 217 
     | 
    
         
            +
                                 *   --------------------+------------------------------------
         
     | 
| 
      
 218 
     | 
    
         
            +
                                 *   0000 0000-0000 007F | 0xxxxxxx
         
     | 
| 
      
 219 
     | 
    
         
            +
                                 *   0000 0080-0000 07FF | 110xxxxx 10xxxxxx
         
     | 
| 
      
 220 
     | 
    
         
            +
                                 *   0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
         
     | 
| 
      
 221 
     | 
    
         
            +
                                 *   0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
         
     | 
| 
      
 222 
     | 
    
         
            +
                                 *
         
     | 
| 
      
 223 
     | 
    
         
            +
                                 * Additionally, the characters in the range 0xD800-0xDFFF
         
     | 
| 
      
 224 
     | 
    
         
            +
                                 * are prohibited as they are reserved for use with UTF-16
         
     | 
| 
      
 225 
     | 
    
         
            +
                                 * surrogate pairs.
         
     | 
| 
      
 226 
     | 
    
         
            +
                                 */
         
     | 
| 
      
 227 
     | 
    
         
            +
             
     | 
| 
      
 228 
     | 
    
         
            +
                                /* Determine the length of the UTF-8 sequence. */
         
     | 
| 
      
 229 
     | 
    
         
            +
             
     | 
| 
      
 230 
     | 
    
         
            +
                                octet = parser->raw_buffer.pointer[0];
         
     | 
| 
      
 231 
     | 
    
         
            +
                                width = (octet & 0x80) == 0x00 ? 1 :
         
     | 
| 
      
 232 
     | 
    
         
            +
                                        (octet & 0xE0) == 0xC0 ? 2 :
         
     | 
| 
      
 233 
     | 
    
         
            +
                                        (octet & 0xF0) == 0xE0 ? 3 :
         
     | 
| 
      
 234 
     | 
    
         
            +
                                        (octet & 0xF8) == 0xF0 ? 4 : 0;
         
     | 
| 
      
 235 
     | 
    
         
            +
             
     | 
| 
      
 236 
     | 
    
         
            +
                                /* Check if the leading octet is valid. */
         
     | 
| 
      
 237 
     | 
    
         
            +
             
     | 
| 
      
 238 
     | 
    
         
            +
                                if (!width)
         
     | 
| 
      
 239 
     | 
    
         
            +
                                    return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 240 
     | 
    
         
            +
                                            "invalid leading UTF-8 octet",
         
     | 
| 
      
 241 
     | 
    
         
            +
                                            parser->offset, octet);
         
     | 
| 
      
 242 
     | 
    
         
            +
             
     | 
| 
      
 243 
     | 
    
         
            +
                                /* Check if the raw buffer contains an incomplete character. */
         
     | 
| 
      
 244 
     | 
    
         
            +
             
     | 
| 
      
 245 
     | 
    
         
            +
                                if (width > raw_unread) {
         
     | 
| 
      
 246 
     | 
    
         
            +
                                    if (parser->eof) {
         
     | 
| 
      
 247 
     | 
    
         
            +
                                        return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 248 
     | 
    
         
            +
                                                "incomplete UTF-8 octet sequence",
         
     | 
| 
      
 249 
     | 
    
         
            +
                                                parser->offset, -1);
         
     | 
| 
      
 250 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 251 
     | 
    
         
            +
                                    incomplete = 1;
         
     | 
| 
      
 252 
     | 
    
         
            +
                                    break;
         
     | 
| 
      
 253 
     | 
    
         
            +
                                }
         
     | 
| 
      
 254 
     | 
    
         
            +
             
     | 
| 
      
 255 
     | 
    
         
            +
                                /* Decode the leading octet. */
         
     | 
| 
      
 256 
     | 
    
         
            +
             
     | 
| 
      
 257 
     | 
    
         
            +
                                value = (octet & 0x80) == 0x00 ? octet & 0x7F :
         
     | 
| 
      
 258 
     | 
    
         
            +
                                        (octet & 0xE0) == 0xC0 ? octet & 0x1F :
         
     | 
| 
      
 259 
     | 
    
         
            +
                                        (octet & 0xF0) == 0xE0 ? octet & 0x0F :
         
     | 
| 
      
 260 
     | 
    
         
            +
                                        (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0;
         
     | 
| 
      
 261 
     | 
    
         
            +
             
     | 
| 
      
 262 
     | 
    
         
            +
                                /* Check and decode the trailing octets. */
         
     | 
| 
      
 263 
     | 
    
         
            +
             
     | 
| 
      
 264 
     | 
    
         
            +
                                for (k = 1; k < width; k ++)
         
     | 
| 
      
 265 
     | 
    
         
            +
                                {
         
     | 
| 
      
 266 
     | 
    
         
            +
                                    octet = parser->raw_buffer.pointer[k];
         
     | 
| 
      
 267 
     | 
    
         
            +
             
     | 
| 
      
 268 
     | 
    
         
            +
                                    /* Check if the octet is valid. */
         
     | 
| 
      
 269 
     | 
    
         
            +
             
     | 
| 
      
 270 
     | 
    
         
            +
                                    if ((octet & 0xC0) != 0x80)
         
     | 
| 
      
 271 
     | 
    
         
            +
                                        return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 272 
     | 
    
         
            +
                                                "invalid trailing UTF-8 octet",
         
     | 
| 
      
 273 
     | 
    
         
            +
                                                parser->offset+k, octet);
         
     | 
| 
      
 274 
     | 
    
         
            +
             
     | 
| 
      
 275 
     | 
    
         
            +
                                    /* Decode the octet. */
         
     | 
| 
      
 276 
     | 
    
         
            +
             
     | 
| 
      
 277 
     | 
    
         
            +
                                    value = (value << 6) + (octet & 0x3F);
         
     | 
| 
      
 278 
     | 
    
         
            +
                                }
         
     | 
| 
      
 279 
     | 
    
         
            +
             
     | 
| 
      
 280 
     | 
    
         
            +
                                /* Check the length of the sequence against the value. */
         
     | 
| 
      
 281 
     | 
    
         
            +
             
     | 
| 
      
 282 
     | 
    
         
            +
                                if (!((width == 1) ||
         
     | 
| 
      
 283 
     | 
    
         
            +
                                        (width == 2 && value >= 0x80) ||
         
     | 
| 
      
 284 
     | 
    
         
            +
                                        (width == 3 && value >= 0x800) ||
         
     | 
| 
      
 285 
     | 
    
         
            +
                                        (width == 4 && value >= 0x10000)))
         
     | 
| 
      
 286 
     | 
    
         
            +
                                    return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 287 
     | 
    
         
            +
                                            "invalid length of a UTF-8 sequence",
         
     | 
| 
      
 288 
     | 
    
         
            +
                                            parser->offset, -1);
         
     | 
| 
      
 289 
     | 
    
         
            +
             
     | 
| 
      
 290 
     | 
    
         
            +
                                /* Check the range of the value. */
         
     | 
| 
      
 291 
     | 
    
         
            +
             
     | 
| 
      
 292 
     | 
    
         
            +
                                if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF)
         
     | 
| 
      
 293 
     | 
    
         
            +
                                    return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 294 
     | 
    
         
            +
                                            "invalid Unicode character",
         
     | 
| 
      
 295 
     | 
    
         
            +
                                            parser->offset, value);
         
     | 
| 
      
 296 
     | 
    
         
            +
             
     | 
| 
      
 297 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 298 
     | 
    
         
            +
             
     | 
| 
      
 299 
     | 
    
         
            +
                            case YAML_UTF16LE_ENCODING:
         
     | 
| 
      
 300 
     | 
    
         
            +
                            case YAML_UTF16BE_ENCODING:
         
     | 
| 
      
 301 
     | 
    
         
            +
             
     | 
| 
      
 302 
     | 
    
         
            +
                                low = (parser->encoding == YAML_UTF16LE_ENCODING ? 0 : 1);
         
     | 
| 
      
 303 
     | 
    
         
            +
                                high = (parser->encoding == YAML_UTF16LE_ENCODING ? 1 : 0);
         
     | 
| 
      
 304 
     | 
    
         
            +
             
     | 
| 
      
 305 
     | 
    
         
            +
                                /*
         
     | 
| 
      
 306 
     | 
    
         
            +
                                 * The UTF-16 encoding is not as simple as one might
         
     | 
| 
      
 307 
     | 
    
         
            +
                                 * naively think.  Check RFC 2781
         
     | 
| 
      
 308 
     | 
    
         
            +
                                 * (http://www.ietf.org/rfc/rfc2781.txt).
         
     | 
| 
      
 309 
     | 
    
         
            +
                                 *
         
     | 
| 
      
 310 
     | 
    
         
            +
                                 * Normally, two subsequent bytes describe a Unicode
         
     | 
| 
      
 311 
     | 
    
         
            +
                                 * character.  However a special technique (called a
         
     | 
| 
      
 312 
     | 
    
         
            +
                                 * surrogate pair) is used for specifying character
         
     | 
| 
      
 313 
     | 
    
         
            +
                                 * values larger than 0xFFFF.
         
     | 
| 
      
 314 
     | 
    
         
            +
                                 *
         
     | 
| 
      
 315 
     | 
    
         
            +
                                 * A surrogate pair consists of two pseudo-characters:
         
     | 
| 
      
 316 
     | 
    
         
            +
                                 *      high surrogate area (0xD800-0xDBFF)
         
     | 
| 
      
 317 
     | 
    
         
            +
                                 *      low surrogate area (0xDC00-0xDFFF)
         
     | 
| 
      
 318 
     | 
    
         
            +
                                 *
         
     | 
| 
      
 319 
     | 
    
         
            +
                                 * The following formulas are used for decoding
         
     | 
| 
      
 320 
     | 
    
         
            +
                                 * and encoding characters using surrogate pairs:
         
     | 
| 
      
 321 
     | 
    
         
            +
                                 *
         
     | 
| 
      
 322 
     | 
    
         
            +
                                 *  U  = U' + 0x10000   (0x01 00 00 <= U <= 0x10 FF FF)
         
     | 
| 
      
 323 
     | 
    
         
            +
                                 *  U' = yyyyyyyyyyxxxxxxxxxx   (0 <= U' <= 0x0F FF FF)
         
     | 
| 
      
 324 
     | 
    
         
            +
                                 *  W1 = 110110yyyyyyyyyy
         
     | 
| 
      
 325 
     | 
    
         
            +
                                 *  W2 = 110111xxxxxxxxxx
         
     | 
| 
      
 326 
     | 
    
         
            +
                                 *
         
     | 
| 
      
 327 
     | 
    
         
            +
                                 * where U is the character value, W1 is the high surrogate
         
     | 
| 
      
 328 
     | 
    
         
            +
                                 * area, W2 is the low surrogate area.
         
     | 
| 
      
 329 
     | 
    
         
            +
                                 */
         
     | 
| 
      
 330 
     | 
    
         
            +
             
     | 
| 
      
 331 
     | 
    
         
            +
                                /* Check for incomplete UTF-16 character. */
         
     | 
| 
      
 332 
     | 
    
         
            +
             
     | 
| 
      
 333 
     | 
    
         
            +
                                if (raw_unread < 2) {
         
     | 
| 
      
 334 
     | 
    
         
            +
                                    if (parser->eof) {
         
     | 
| 
      
 335 
     | 
    
         
            +
                                        return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 336 
     | 
    
         
            +
                                                "incomplete UTF-16 character",
         
     | 
| 
      
 337 
     | 
    
         
            +
                                                parser->offset, -1);
         
     | 
| 
      
 338 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 339 
     | 
    
         
            +
                                    incomplete = 1;
         
     | 
| 
      
 340 
     | 
    
         
            +
                                    break;
         
     | 
| 
      
 341 
     | 
    
         
            +
                                }
         
     | 
| 
      
 342 
     | 
    
         
            +
             
     | 
| 
      
 343 
     | 
    
         
            +
                                /* Get the character. */
         
     | 
| 
      
 344 
     | 
    
         
            +
             
     | 
| 
      
 345 
     | 
    
         
            +
                                value = parser->raw_buffer.pointer[low]
         
     | 
| 
      
 346 
     | 
    
         
            +
                                    + (parser->raw_buffer.pointer[high] << 8);
         
     | 
| 
      
 347 
     | 
    
         
            +
             
     | 
| 
      
 348 
     | 
    
         
            +
                                /* Check for unexpected low surrogate area. */
         
     | 
| 
      
 349 
     | 
    
         
            +
             
     | 
| 
      
 350 
     | 
    
         
            +
                                if ((value & 0xFC00) == 0xDC00)
         
     | 
| 
      
 351 
     | 
    
         
            +
                                    return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 352 
     | 
    
         
            +
                                            "unexpected low surrogate area",
         
     | 
| 
      
 353 
     | 
    
         
            +
                                            parser->offset, value);
         
     | 
| 
      
 354 
     | 
    
         
            +
             
     | 
| 
      
 355 
     | 
    
         
            +
                                /* Check for a high surrogate area. */
         
     | 
| 
      
 356 
     | 
    
         
            +
             
     | 
| 
      
 357 
     | 
    
         
            +
                                if ((value & 0xFC00) == 0xD800) {
         
     | 
| 
      
 358 
     | 
    
         
            +
             
     | 
| 
      
 359 
     | 
    
         
            +
                                    width = 4;
         
     | 
| 
      
 360 
     | 
    
         
            +
             
     | 
| 
      
 361 
     | 
    
         
            +
                                    /* Check for incomplete surrogate pair. */
         
     | 
| 
      
 362 
     | 
    
         
            +
             
     | 
| 
      
 363 
     | 
    
         
            +
                                    if (raw_unread < 4) {
         
     | 
| 
      
 364 
     | 
    
         
            +
                                        if (parser->eof) {
         
     | 
| 
      
 365 
     | 
    
         
            +
                                            return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 366 
     | 
    
         
            +
                                                    "incomplete UTF-16 surrogate pair",
         
     | 
| 
      
 367 
     | 
    
         
            +
                                                    parser->offset, -1);
         
     | 
| 
      
 368 
     | 
    
         
            +
                                        }
         
     | 
| 
      
 369 
     | 
    
         
            +
                                        incomplete = 1;
         
     | 
| 
      
 370 
     | 
    
         
            +
                                        break;
         
     | 
| 
      
 371 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 372 
     | 
    
         
            +
             
     | 
| 
      
 373 
     | 
    
         
            +
                                    /* Get the next character. */
         
     | 
| 
      
 374 
     | 
    
         
            +
             
     | 
| 
      
 375 
     | 
    
         
            +
                                    value2 = parser->raw_buffer.pointer[low+2]
         
     | 
| 
      
 376 
     | 
    
         
            +
                                        + (parser->raw_buffer.pointer[high+2] << 8);
         
     | 
| 
      
 377 
     | 
    
         
            +
             
     | 
| 
      
 378 
     | 
    
         
            +
                                    /* Check for a low surrogate area. */
         
     | 
| 
      
 379 
     | 
    
         
            +
             
     | 
| 
      
 380 
     | 
    
         
            +
                                    if ((value2 & 0xFC00) != 0xDC00)
         
     | 
| 
      
 381 
     | 
    
         
            +
                                        return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 382 
     | 
    
         
            +
                                                "expected low surrogate area",
         
     | 
| 
      
 383 
     | 
    
         
            +
                                                parser->offset+2, value2);
         
     | 
| 
      
 384 
     | 
    
         
            +
             
     | 
| 
      
 385 
     | 
    
         
            +
                                    /* Generate the value of the surrogate pair. */
         
     | 
| 
      
 386 
     | 
    
         
            +
             
     | 
| 
      
 387 
     | 
    
         
            +
                                    value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF);
         
     | 
| 
      
 388 
     | 
    
         
            +
                                }
         
     | 
| 
      
 389 
     | 
    
         
            +
             
     | 
| 
      
 390 
     | 
    
         
            +
                                else {
         
     | 
| 
      
 391 
     | 
    
         
            +
                                    width = 2;
         
     | 
| 
      
 392 
     | 
    
         
            +
                                }
         
     | 
| 
      
 393 
     | 
    
         
            +
             
     | 
| 
      
 394 
     | 
    
         
            +
                                break;
         
     | 
| 
      
 395 
     | 
    
         
            +
             
     | 
| 
      
 396 
     | 
    
         
            +
                            default:
         
     | 
| 
      
 397 
     | 
    
         
            +
                                assert(1);      /* Impossible. */
         
     | 
| 
      
 398 
     | 
    
         
            +
                        }
         
     | 
| 
      
 399 
     | 
    
         
            +
             
     | 
| 
      
 400 
     | 
    
         
            +
                        /* Check if the raw buffer contains enough bytes to form a character. */
         
     | 
| 
      
 401 
     | 
    
         
            +
             
     | 
| 
      
 402 
     | 
    
         
            +
                        if (incomplete) break;
         
     | 
| 
      
 403 
     | 
    
         
            +
             
     | 
| 
      
 404 
     | 
    
         
            +
                        /*
         
     | 
| 
      
 405 
     | 
    
         
            +
                         * Check if the character is in the allowed range:
         
     | 
| 
      
 406 
     | 
    
         
            +
                         *      #x9 | #xA | #xD | [#x20-#x7E]               (8 bit)
         
     | 
| 
      
 407 
     | 
    
         
            +
                         *      | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD]    (16 bit)
         
     | 
| 
      
 408 
     | 
    
         
            +
                         *      | [#x10000-#x10FFFF]                        (32 bit)
         
     | 
| 
      
 409 
     | 
    
         
            +
                         */
         
     | 
| 
      
 410 
     | 
    
         
            +
             
     | 
| 
      
 411 
     | 
    
         
            +
                        if (! (value == 0x09 || value == 0x0A || value == 0x0D
         
     | 
| 
      
 412 
     | 
    
         
            +
                                    || (value >= 0x20 && value <= 0x7E)
         
     | 
| 
      
 413 
     | 
    
         
            +
                                    || (value == 0x85) || (value >= 0xA0 && value <= 0xD7FF)
         
     | 
| 
      
 414 
     | 
    
         
            +
                                    || (value >= 0xE000 && value <= 0xFFFD)
         
     | 
| 
      
 415 
     | 
    
         
            +
                                    || (value >= 0x10000 && value <= 0x10FFFF)))
         
     | 
| 
      
 416 
     | 
    
         
            +
                            return yaml_parser_set_reader_error(parser,
         
     | 
| 
      
 417 
     | 
    
         
            +
                                    "control characters are not allowed",
         
     | 
| 
      
 418 
     | 
    
         
            +
                                    parser->offset, value);
         
     | 
| 
      
 419 
     | 
    
         
            +
             
     | 
| 
      
 420 
     | 
    
         
            +
                        /* Move the raw pointers. */
         
     | 
| 
      
 421 
     | 
    
         
            +
             
     | 
| 
      
 422 
     | 
    
         
            +
                        parser->raw_buffer.pointer += width;
         
     | 
| 
      
 423 
     | 
    
         
            +
                        parser->offset += width;
         
     | 
| 
      
 424 
     | 
    
         
            +
             
     | 
| 
      
 425 
     | 
    
         
            +
                        /* Finally put the character into the buffer. */
         
     | 
| 
      
 426 
     | 
    
         
            +
             
     | 
| 
      
 427 
     | 
    
         
            +
                        /* 0000 0000-0000 007F -> 0xxxxxxx */
         
     | 
| 
      
 428 
     | 
    
         
            +
                        if (value <= 0x7F) {
         
     | 
| 
      
 429 
     | 
    
         
            +
                            *(parser->buffer.last++) = value;
         
     | 
| 
      
 430 
     | 
    
         
            +
                        }
         
     | 
| 
      
 431 
     | 
    
         
            +
                        /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
         
     | 
| 
      
 432 
     | 
    
         
            +
                        else if (value <= 0x7FF) {
         
     | 
| 
      
 433 
     | 
    
         
            +
                            *(parser->buffer.last++) = 0xC0 + (value >> 6);
         
     | 
| 
      
 434 
     | 
    
         
            +
                            *(parser->buffer.last++) = 0x80 + (value & 0x3F);
         
     | 
| 
      
 435 
     | 
    
         
            +
                        }
         
     | 
| 
      
 436 
     | 
    
         
            +
                        /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
         
     | 
| 
      
 437 
     | 
    
         
            +
                        else if (value <= 0xFFFF) {
         
     | 
| 
      
 438 
     | 
    
         
            +
                            *(parser->buffer.last++) = 0xE0 + (value >> 12);
         
     | 
| 
      
 439 
     | 
    
         
            +
                            *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F);
         
     | 
| 
      
 440 
     | 
    
         
            +
                            *(parser->buffer.last++) = 0x80 + (value & 0x3F);
         
     | 
| 
      
 441 
     | 
    
         
            +
                        }
         
     | 
| 
      
 442 
     | 
    
         
            +
                        /* 0001 0000-0010 FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
         
     | 
| 
      
 443 
     | 
    
         
            +
                        else {
         
     | 
| 
      
 444 
     | 
    
         
            +
                            *(parser->buffer.last++) = 0xF0 + (value >> 18);
         
     | 
| 
      
 445 
     | 
    
         
            +
                            *(parser->buffer.last++) = 0x80 + ((value >> 12) & 0x3F);
         
     | 
| 
      
 446 
     | 
    
         
            +
                            *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F);
         
     | 
| 
      
 447 
     | 
    
         
            +
                            *(parser->buffer.last++) = 0x80 + (value & 0x3F);
         
     | 
| 
      
 448 
     | 
    
         
            +
                        }
         
     | 
| 
      
 449 
     | 
    
         
            +
             
     | 
| 
      
 450 
     | 
    
         
            +
                        parser->unread ++;
         
     | 
| 
      
 451 
     | 
    
         
            +
                    }
         
     | 
| 
      
 452 
     | 
    
         
            +
             
     | 
| 
      
 453 
     | 
    
         
            +
                    /* On EOF, put NUL into the buffer and return. */
         
     | 
| 
      
 454 
     | 
    
         
            +
             
     | 
| 
      
 455 
     | 
    
         
            +
                    if (parser->eof) {
         
     | 
| 
      
 456 
     | 
    
         
            +
                        *(parser->buffer.last++) = '\0';
         
     | 
| 
      
 457 
     | 
    
         
            +
                        parser->unread ++;
         
     | 
| 
      
 458 
     | 
    
         
            +
                        return 1;
         
     | 
| 
      
 459 
     | 
    
         
            +
                    }
         
     | 
| 
      
 460 
     | 
    
         
            +
             
     | 
| 
      
 461 
     | 
    
         
            +
                }
         
     | 
| 
      
 462 
     | 
    
         
            +
             
     | 
| 
      
 463 
     | 
    
         
            +
                if (parser->offset >= PTRDIFF_MAX)
         
     | 
| 
      
 464 
     | 
    
         
            +
                    return yaml_parser_set_reader_error(parser, "input is too long",
         
     | 
| 
      
 465 
     | 
    
         
            +
                            PTRDIFF_MAX, -1);
         
     | 
| 
      
 466 
     | 
    
         
            +
             
     | 
| 
      
 467 
     | 
    
         
            +
                return 1;
         
     | 
| 
      
 468 
     | 
    
         
            +
            }
         
     | 
| 
      
 469 
     | 
    
         
            +
             
     |