RubyGems - crack-without-safe_yaml - Versions diffs - 0.4.3 - Mend

crack-without-safe_yaml 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +7 -0
data/.gitignore +8 -0
data/.travis.yml +14 -0
data/Gemfile +5 -0
data/History +25 -0
data/LICENSE +20 -0
data/README.md +46 -0
data/crack.gemspec +18 -0
data/lib/crack.rb +7 -0
data/lib/crack/json.rb +98 -0
data/lib/crack/util.rb +17 -0
data/lib/crack/version.rb +3 -0
data/lib/crack/xml.rb +238 -0
data/script/bootstrap +21 -0
data/script/release +42 -0
data/script/test +25 -0
data/test/data/large_dataset.json +139988 -0
data/test/data/twittersearch-firefox.json +1 -0
data/test/data/twittersearch-ie.json +1 -0
data/test/hash_test.rb +26 -0
data/test/json_test.rb +91 -0
data/test/parser_test.rb +27 -0
data/test/string_test.rb +31 -0
data/test/test_helper.rb +3 -0
data/test/xml_test.rb +514 -0
metadata +78 -0

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: f0e2f8a3bdaf0cf4d6370d17515f8337602f9dd1
+  data.tar.gz: 0ed3cc855986617a10b12e6e260de348c2c01a9f
+SHA512:
+  metadata.gz: 21196e82748f57c78554dd7d6b2495ab96cb118bc3f68ee08a4f4b01f01d7cc40169a334931c3be40b9dc3bb4ac95fdb9500d928abafaac51a9ca9fe429c412f
+  data.tar.gz: 2846f21043ddb611dd121bb2ba7db73a74bcfa2c4cf63eebb9dbe36f17099898114e1c3202d66afdd409ce36629fcf846b309fab47b9627bacbf46fabc8736fd

data/.gitignore ADDED

@@ -0,0 +1,8 @@
+*.sw?
+.DS_Store
+coverage
+rdoc
+pkg
+*.gem
+/.bundle
+/Gemfile.lock

data/.travis.yml ADDED

@@ -0,0 +1,14 @@
+language: ruby
+rvm:
+  - 1.8
+  - 1.9
+  - 2.0
+  - 2.1
+  - 2.2
+  - jruby
+  - rbx
+matrix:
+  allow_failures:
+    - rvm: 1.8
+script: ./script/test
+sudo: false

data/Gemfile ADDED

@@ -0,0 +1,5 @@
+source 'https://rubygems.org'
+gemspec
+gem "rake"
+gem "minitest"

data/History ADDED

@@ -0,0 +1,25 @@
+== 0.1.7 2010-02-19
+* 1 minor patch
+  * Added patch from @purp for ISO 8601 date/time format
+== 0.1.6 2010-01-31
+* 1 minor patch
+  * Added Crack::VERSION constant - http://weblog.rubyonrails.org/2009/9/1/gem-packaging-best-practices
+== 0.1.5 2010-01-27
+* 1 minor patch
+  * Strings that begin with dates shouldn't be parsed as such (sandro)
+== 0.1.3 2009-06-22
+* 1 minor patch
+  * Parsing a text node with attributes stores them in the attributes method (tamalw)
+== 0.1.2 2009-04-21
+* 2 minor patches
+  * Correct unnormalization of attribute values (der-flo)
+  * Fix error in parsing YAML in the case where a hash value ends with backslashes, and there are subsequent values in the hash (deadprogrammer)
+== 0.1.1 2009-03-31
+* 1 minor patch
+  * Parsing empty or blank xml now returns empty hash instead of raising error.
+== 0.1.0 2009-03-28
+* Initial release.

data/LICENSE ADDED

@@ -0,0 +1,20 @@
+Copyright (c) 2009 John Nunemaker
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED

@@ -0,0 +1,46 @@
+# crack
+[![Build Status](https://travis-ci.org/jnunemaker/crack.svg?branch=master)](https://travis-ci.org/jnunemaker/crack)
+Really simple JSON and XML parsing, ripped from Merb and Rails. The XML parser is ripped from Merb and the JSON parser is ripped from Rails. I take no credit, just packaged them for all to enjoy and easily use.
+## compatibility
+* ruby 1.8.7
+* ruby 1.9+ (3 failures related to time parsing, would love it if someone could figure them out)
+## note on patches/pull requests
+* Fork the project.
+* Make your feature addition or bug fix.
+* Add tests for it. This is important so I don't break it in a future version unintentionally.
+* `script/test` - this will bootstrap and run the tests
+* Commit, do not mess with rakefile, version, or history. (if you want to have your own version, that is fine but bump version in a commit by itself in another branch so I can ignore when I pull)
+* Send me a pull request. Bonus points for topic branches.
+## usage
+```ruby
+gem 'crack' # in Gemfile
+require 'crack' # for xml and json
+require 'crack/json' # for just json
+require 'crack/xml' # for just xml
+```
+## examples
+```ruby
+Crack::XML.parse("<tag>This is the contents</tag>")
+# => {'tag' => 'This is the contents'}
+Crack::JSON.parse('{"tag":"This is the contents"}')
+# => {'tag' => 'This is the contents'}
+```
+## Copyright
+Copyright (c) 2009 John Nunemaker. See LICENSE for details.
+## Docs
+http://rdoc.info/projects/jnunemaker/crack

data/crack.gemspec ADDED

@@ -0,0 +1,18 @@
+# -*- encoding: utf-8 -*-
+require File.expand_path('../lib/crack/version', __FILE__)
+Gem::Specification.new do |gem|
+  gem.authors       = ["John Nunemaker"]
+  gem.email         = ["nunemaker@gmail.com"]
+  gem.description   = %q{Really simple JSON and XML parsing, ripped from Merb and Rails.}
+  gem.summary       = %q{Really simple JSON and XML parsing, ripped from Merb and Rails.}
+  gem.homepage      = "http://github.com/jnunemaker/crack"
+  gem.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  gem.files         = `git ls-files`.split("\n")
+  gem.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  gem.name          = "crack-without-safe_yaml"
+  gem.require_paths = ["lib"]
+  gem.version       = Crack::VERSION
+  gem.license       = "MIT"
+end

data/lib/crack.rb ADDED

@@ -0,0 +1,7 @@
+module Crack
+  class ParseError < StandardError; end
+end
+require 'crack/util'
+require 'crack/json'
+require 'crack/xml'

data/lib/crack/json.rb ADDED

@@ -0,0 +1,98 @@
+# Copyright (c) 2004-2008 David Heinemeier Hansson
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+require 'strscan'
+module Crack
+  class JSON
+    def self.parser_exceptions
+      @parser_exceptions ||= [ArgumentError, Psych::SyntaxError]
+    end
+    def self.parse(json)
+      yaml = unescape(convert_json_to_yaml(json))
+      YAML.safe_load(yaml, [Regexp, Date, Time])
+    rescue *parser_exceptions
+      raise ParseError, "Invalid JSON string"
+    rescue Psych::DisallowedClass
+      yaml
+    end
+    protected
+      def self.unescape(str)
+        # Force the encoding to be UTF-8 so we can perform regular expressions
+        # on 1.9.2 without blowing up.
+        # see http://stackoverflow.com/questions/1224204/ruby-mechanize-getting-force-encoding-exception for a similar issue
+        str.force_encoding('UTF-8') if defined?(Encoding) && str.respond_to?(:force_encoding)
+        str.gsub(/\\u0000/, "").gsub(/\\[u|U]([0-9a-fA-F]{4})/) { [$1.hex].pack("U") }
+      end
+      # matches YAML-formatted dates
+      DATE_REGEX = /^\d{4}-\d{2}-\d{2}$|^\d{4}-\d{1,2}-\d{1,2}[T \t]+\d{1,2}:\d{2}:\d{2}(\.[0-9]*)?(([ \t]*)Z|[-+]\d{2}?(:\d{2})?)$/
+      # Ensure that ":" and "," are always followed by a space
+      def self.convert_json_to_yaml(json) #:nodoc:
+        json = String.new(json) #can't modify a frozen string
+        scanner, quoting, marks, pos, date_starts, date_ends = StringScanner.new(json), false, [], nil, [], []
+        while scanner.scan_until(/(\\['"]|['":,\/\\]|\\.)/)
+          case char = scanner[1]
+          when '"', "'"
+            if !quoting
+              quoting = char
+              pos = scanner.pos
+            elsif quoting == char
+              if json[pos..scanner.pos-2] =~ DATE_REGEX
+                # found a date, track the exact positions of the quotes so we can remove them later.
+                # oh, and increment them for each current mark, each one is an extra padded space that bumps
+                # the position in the final YAML output
+                total_marks = marks.size
+                date_starts << pos+total_marks
+                date_ends << scanner.pos+total_marks
+              end
+              quoting = false
+            end
+          when "/"
+            if !quoting
+              json[scanner.pos - 1] = "!ruby/regexp /"
+              scanner.pos += 13
+              scanner.scan_until(/\/[mix]*/)
+            end
+          when ":",","
+            marks << scanner.pos - 1 unless quoting
+          when "\\"
+            scanner.skip(/\\/)
+          end
+        end
+        if marks.empty?
+          json.gsub(/\\\//, '/')
+        else
+          left_pos  = marks.clone.unshift(-1)
+          right_pos = marks << json.length
+          output    = []
+          left_pos.each_with_index do |left, i|
+            output << json[left.succ..right_pos[i]]
+          end
+          output = output * " "
+          format_dates(output, date_starts, date_ends)
+          output.gsub!(/\\\//, '/')
+          output
+        end
+      end
+      def self.format_dates(output, date_starts, date_ends)
+        if YAML.constants.include?('Syck')
+          (date_starts + date_ends).each { |i| output[i-1] = ' ' }
+        else
+          extra_chars_to_be_added = 0
+          date_starts.each do |i|
+            output[i-2+extra_chars_to_be_added] = '!!timestamp '
+            extra_chars_to_be_added += 10
+          end
+        end
+      end
+  end
+end

data/lib/crack/util.rb ADDED

@@ -0,0 +1,17 @@
+module Crack
+  module Util
+    def snake_case(str)
+      return str.downcase if str =~ /^[A-Z]+$/
+      str.gsub(/([A-Z]+)(?=[A-Z][a-z]?)|\B[A-Z]/, '_\&') =~ /_*(.*)/
+      return $+.downcase
+    end
+    def to_xml_attributes(hash)
+      hash.map do |k,v|
+        %{#{Crack::Util.snake_case(k.to_s).sub(/^(.{1,1})/) { |m| m.downcase }}="#{v.to_s.gsub('"', '&quot;')}"}
+      end.join(' ')
+    end
+    extend self
+  end
+end

data/lib/crack/version.rb ADDED

@@ -0,0 +1,3 @@
+module Crack
+  VERSION = "0.4.3"
+end

data/lib/crack/xml.rb ADDED

@@ -0,0 +1,238 @@
+require 'rexml/parsers/streamparser'
+require 'rexml/parsers/baseparser'
+require 'rexml/light/node'
+require 'rexml/text'
+require "rexml/document"
+require 'date'
+require 'time'
+require 'yaml'
+require 'bigdecimal'
+# The Reason behind redefining the String Class for this specific plugin is to
+# avoid the dynamic insertion of stuff on it (see version previous to this commit).
+# Doing that disables the possibility of efectuating a dump on the structure. This way it goes.
+class REXMLUtiliyNodeString < String
+  attr_accessor :attributes
+end
+# This is a slighly modified version of the XMLUtilityNode from
+# http://merb.devjavu.com/projects/merb/ticket/95 (has.sox@gmail.com)
+# It's mainly just adding vowels, as I ht cd wth n vwls :)
+# This represents the hard part of the work, all I did was change the
+# underlying parser.
+class REXMLUtilityNode #:nodoc:
+  attr_accessor :name, :attributes, :children, :type
+  def self.typecasts
+    @@typecasts
+  end
+  def self.typecasts=(obj)
+    @@typecasts = obj
+  end
+  def self.available_typecasts
+    @@available_typecasts
+  end
+  def self.available_typecasts=(obj)
+    @@available_typecasts = obj
+  end
+  self.typecasts = {}
+  self.typecasts["integer"]       = lambda{|v| v.nil? ? nil : v.to_i}
+  self.typecasts["boolean"]       = lambda{|v| v.nil? ? nil : (v.strip != "false")}
+  self.typecasts["datetime"]      = lambda{|v| v.nil? ? nil : Time.parse(v).utc}
+  self.typecasts["date"]          = lambda{|v| v.nil? ? nil : Date.parse(v)}
+  self.typecasts["dateTime"]      = lambda{|v| v.nil? ? nil : Time.parse(v).utc}
+  self.typecasts["decimal"]       = lambda{|v| v.nil? ? nil : BigDecimal(v.to_s)}
+  self.typecasts["double"]        = lambda{|v| v.nil? ? nil : v.to_f}
+  self.typecasts["float"]         = lambda{|v| v.nil? ? nil : v.to_f}
+  self.typecasts["string"]        = lambda{|v| v.to_s}
+  self.typecasts["base64Binary"]  = lambda{|v| v.unpack('m').first }
+  self.available_typecasts = self.typecasts.keys
+  def initialize(name, normalized_attributes = {})
+    # unnormalize attribute values
+    attributes = Hash[* normalized_attributes.map { |key, value|
+      [ key, unnormalize_xml_entities(value) ]
+    }.flatten]
+    @name         = name.tr("-", "_")
+    # leave the type alone if we don't know what it is
+    @type         = self.class.available_typecasts.include?(attributes["type"]) ? attributes.delete("type") : attributes["type"]
+    @nil_element  = attributes.delete("nil") == "true"
+    @attributes   = undasherize_keys(attributes)
+    @children     = []
+    @text         = false
+  end
+  def add_node(node)
+    @text = true if node.is_a? String
+    @children << node
+  end
+  def to_hash
+    # ACG: Added a check here to prevent an exception a type == "file" tag has nodes within it
+    if @type == "file" and (@children.first.nil? or @children.first.is_a?(String))
+      f = StringIO.new((@children.first || '').unpack('m').first)
+      class << f
+        attr_accessor :original_filename, :content_type
+      end
+      f.original_filename = attributes['name'] || 'untitled'
+      f.content_type = attributes['content_type'] || 'application/octet-stream'
+      return {name => f}
+    end
+    if @text
+      t = typecast_value( unnormalize_xml_entities( inner_html ) )
+      if t.is_a?(String)
+        t = REXMLUtiliyNodeString.new(t)
+        t.attributes = attributes
+      end
+      return { name => t }
+    else
+      #change repeating groups into an array
+      groups = @children.inject({}) { |s,e| (s[e.name] ||= []) << e; s }
+      out = nil
+      if @type == "array"
+        out = []
+        groups.each do |k, v|
+          if v.size == 1
+            out << v.first.to_hash.entries.first.last
+          else
+            out << v.map{|e| e.to_hash[k]}
+          end
+        end
+        out = out.flatten
+      else # If Hash
+        out = {}
+        groups.each do |k,v|
+          if v.size == 1
+            out.merge!(v.first)
+          else
+            out.merge!( k => v.map{|e| e.to_hash[k]})
+          end
+        end
+        out.merge! attributes unless attributes.empty?
+        out = out.empty? ? nil : out
+      end
+      if @type && out.nil?
+        { name => typecast_value(out) }
+      else
+        { name => out }
+      end
+    end
+  end
+  # Typecasts a value based upon its type. For instance, if
+  # +node+ has #type == "integer",
+  # {{[node.typecast_value("12") #=> 12]}}
+  #
+  # @param value<String> The value that is being typecast.
+  #
+  # @details [:type options]
+  #   "integer"::
+  #     converts +value+ to an integer with #to_i
+  #   "boolean"::
+  #     checks whether +value+, after removing spaces, is the literal
+  #     "true"
+  #   "datetime"::
+  #     Parses +value+ using Time.parse, and returns a UTC Time
+  #   "date"::
+  #     Parses +value+ using Date.parse
+  #
+  # @return <Integer, TrueClass, FalseClass, Time, Date, Object>
+  #   The result of typecasting +value+.
+  #
+  # @note
+  #   If +self+ does not have a "type" key, or if it's not one of the
+  #   options specified above, the raw +value+ will be returned.
+  def typecast_value(value)
+    return value unless @type
+    proc = self.class.typecasts[@type]
+    proc.nil? ? value : proc.call(value)
+  end
+  # Take keys of the form foo-bar and convert them to foo_bar
+  def undasherize_keys(params)
+    params.keys.each do |key, value|
+      params[key.tr("-", "_")] = params.delete(key)
+    end
+    params
+  end
+  # Get the inner_html of the REXML node.
+  def inner_html
+    @children.join
+  end
+  # Converts the node into a readable HTML node.
+  #
+  # @return <String> The HTML node in text form.
+  def to_html
+    attributes.merge!(:type => @type ) if @type
+    "<#{name}#{Crack::Util.to_xml_attributes(attributes)}>#{@nil_element ? '' : inner_html}</#{name}>"
+  end
+  # @alias #to_html #to_s
+  def to_s
+    to_html
+  end
+  private
+  def unnormalize_xml_entities value
+    REXML::Text.unnormalize(value)
+  end
+end
+module Crack
+  class REXMLParser
+    def self.parse(xml)
+      stack = []
+      parser = REXML::Parsers::BaseParser.new(xml)
+      while true
+        event = parser.pull
+        case event[0]
+        when :end_document
+          break
+        when :end_doctype, :start_doctype
+          # do nothing
+        when :start_element
+          stack.push REXMLUtilityNode.new(event[1], event[2])
+        when :end_element
+          if stack.size > 1
+            temp = stack.pop
+            stack.last.add_node(temp)
+          end
+        when :text, :cdata
+          stack.last.add_node(event[1]) unless event[1].strip.length == 0 || stack.empty?
+        end
+      end
+      stack.length > 0 ? stack.pop.to_hash : {}
+    end
+  end
+  class XML
+    def self.parser
+      @@parser ||= REXMLParser
+    end
+    def self.parser=(parser)
+      @@parser = parser
+    end
+    def self.parse(xml)
+      parser.parse(xml)
+    end
+  end
+end