mortar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +36 -0
 - data/bin/mortar +13 -0
 - data/lib/mortar.rb +23 -0
 - data/lib/mortar/auth.rb +312 -0
 - data/lib/mortar/cli.rb +54 -0
 - data/lib/mortar/command.rb +267 -0
 - data/lib/mortar/command/auth.rb +96 -0
 - data/lib/mortar/command/base.rb +319 -0
 - data/lib/mortar/command/clusters.rb +41 -0
 - data/lib/mortar/command/describe.rb +97 -0
 - data/lib/mortar/command/generate.rb +121 -0
 - data/lib/mortar/command/help.rb +166 -0
 - data/lib/mortar/command/illustrate.rb +97 -0
 - data/lib/mortar/command/jobs.rb +174 -0
 - data/lib/mortar/command/pigscripts.rb +45 -0
 - data/lib/mortar/command/projects.rb +128 -0
 - data/lib/mortar/command/validate.rb +94 -0
 - data/lib/mortar/command/version.rb +42 -0
 - data/lib/mortar/errors.rb +24 -0
 - data/lib/mortar/generators/generator_base.rb +107 -0
 - data/lib/mortar/generators/macro_generator.rb +37 -0
 - data/lib/mortar/generators/pigscript_generator.rb +40 -0
 - data/lib/mortar/generators/project_generator.rb +67 -0
 - data/lib/mortar/generators/udf_generator.rb +28 -0
 - data/lib/mortar/git.rb +233 -0
 - data/lib/mortar/helpers.rb +488 -0
 - data/lib/mortar/project.rb +156 -0
 - data/lib/mortar/snapshot.rb +39 -0
 - data/lib/mortar/templates/macro/macro.pig +14 -0
 - data/lib/mortar/templates/pigscript/pigscript.pig +38 -0
 - data/lib/mortar/templates/pigscript/python_udf.py +13 -0
 - data/lib/mortar/templates/project/Gemfile +3 -0
 - data/lib/mortar/templates/project/README.md +8 -0
 - data/lib/mortar/templates/project/gitignore +4 -0
 - data/lib/mortar/templates/project/macros/gitkeep +0 -0
 - data/lib/mortar/templates/project/pigscripts/pigscript.pig +35 -0
 - data/lib/mortar/templates/project/udfs/python/python_udf.py +13 -0
 - data/lib/mortar/templates/udf/python_udf.py +13 -0
 - data/lib/mortar/version.rb +20 -0
 - data/lib/vendor/mortar/okjson.rb +598 -0
 - data/lib/vendor/mortar/uuid.rb +312 -0
 - data/spec/mortar/auth_spec.rb +156 -0
 - data/spec/mortar/command/auth_spec.rb +46 -0
 - data/spec/mortar/command/base_spec.rb +82 -0
 - data/spec/mortar/command/clusters_spec.rb +61 -0
 - data/spec/mortar/command/describe_spec.rb +135 -0
 - data/spec/mortar/command/generate_spec.rb +139 -0
 - data/spec/mortar/command/illustrate_spec.rb +140 -0
 - data/spec/mortar/command/jobs_spec.rb +364 -0
 - data/spec/mortar/command/pigscripts_spec.rb +70 -0
 - data/spec/mortar/command/projects_spec.rb +165 -0
 - data/spec/mortar/command/validate_spec.rb +119 -0
 - data/spec/mortar/command_spec.rb +122 -0
 - data/spec/mortar/git_spec.rb +278 -0
 - data/spec/mortar/helpers_spec.rb +82 -0
 - data/spec/mortar/project_spec.rb +76 -0
 - data/spec/mortar/snapshot_spec.rb +46 -0
 - data/spec/spec.opts +1 -0
 - data/spec/spec_helper.rb +278 -0
 - data/spec/support/display_message_matcher.rb +68 -0
 - metadata +259 -0
 
| 
         @@ -0,0 +1,156 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #
         
     | 
| 
      
 2 
     | 
    
         
            +
            # Copyright 2012 Mortar Data Inc.
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 
      
 5 
     | 
    
         
            +
            # you may not use this file except in compliance with the License.
         
     | 
| 
      
 6 
     | 
    
         
            +
            # You may obtain a copy of the License at
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
      
 10 
     | 
    
         
            +
            # Unless required by applicable law or agreed to in writing, software
         
     | 
| 
      
 11 
     | 
    
         
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 
      
 12 
     | 
    
         
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 
      
 13 
     | 
    
         
            +
            # See the License for the specific language governing permissions and
         
     | 
| 
      
 14 
     | 
    
         
            +
            # limitations under the License.
         
     | 
| 
      
 15 
     | 
    
         
            +
            #
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            require 'fileutils'
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            module Mortar
         
     | 
| 
      
 20 
     | 
    
         
            +
              module Project
         
     | 
| 
      
 21 
     | 
    
         
            +
                class ProjectError < RuntimeError; end
         
     | 
| 
      
 22 
     | 
    
         
            +
                
         
     | 
| 
      
 23 
     | 
    
         
            +
                class Project
         
     | 
| 
      
 24 
     | 
    
         
            +
                  def self.required_directories
         
     | 
| 
      
 25 
     | 
    
         
            +
                    ["macros", "pigscripts", "udfs"]
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
      
 27 
     | 
    
         
            +
                  
         
     | 
| 
      
 28 
     | 
    
         
            +
                  attr_accessor :name
         
     | 
| 
      
 29 
     | 
    
         
            +
                  attr_accessor :remote
         
     | 
| 
      
 30 
     | 
    
         
            +
                  attr_accessor :root_path
         
     | 
| 
      
 31 
     | 
    
         
            +
                  
         
     | 
| 
      
 32 
     | 
    
         
            +
                  def initialize(name, root_path, remote)
         
     | 
| 
      
 33 
     | 
    
         
            +
                    @name = name
         
     | 
| 
      
 34 
     | 
    
         
            +
                    @root_path = root_path
         
     | 
| 
      
 35 
     | 
    
         
            +
                    @remote = remote
         
     | 
| 
      
 36 
     | 
    
         
            +
                  end
         
     | 
| 
      
 37 
     | 
    
         
            +
                  
         
     | 
| 
      
 38 
     | 
    
         
            +
                  def python_udfs_path
         
     | 
| 
      
 39 
     | 
    
         
            +
                    File.join(@root_path, "udfs/python")
         
     | 
| 
      
 40 
     | 
    
         
            +
                  end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                  def python_udfs
         
     | 
| 
      
 43 
     | 
    
         
            +
                    @python_udfs ||= PythonUDFs.new(
         
     | 
| 
      
 44 
     | 
    
         
            +
                      python_udfs_path,
         
     | 
| 
      
 45 
     | 
    
         
            +
                      "python",
         
     | 
| 
      
 46 
     | 
    
         
            +
                      ".py")
         
     | 
| 
      
 47 
     | 
    
         
            +
                  end
         
     | 
| 
      
 48 
     | 
    
         
            +
                  
         
     | 
| 
      
 49 
     | 
    
         
            +
                  def pigscripts_path
         
     | 
| 
      
 50 
     | 
    
         
            +
                    File.join(@root_path, "pigscripts")
         
     | 
| 
      
 51 
     | 
    
         
            +
                  end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                  def pigscripts
         
     | 
| 
      
 54 
     | 
    
         
            +
                    @pigscripts ||= PigScripts.new(
         
     | 
| 
      
 55 
     | 
    
         
            +
                      pigscripts_path,
         
     | 
| 
      
 56 
     | 
    
         
            +
                      "pigscripts",
         
     | 
| 
      
 57 
     | 
    
         
            +
                      ".pig")
         
     | 
| 
      
 58 
     | 
    
         
            +
                    @pigscripts
         
     | 
| 
      
 59 
     | 
    
         
            +
                  end
         
     | 
| 
      
 60 
     | 
    
         
            +
                  
         
     | 
| 
      
 61 
     | 
    
         
            +
                  def tmp_path
         
     | 
| 
      
 62 
     | 
    
         
            +
                    path = File.join(@root_path, "tmp")
         
     | 
| 
      
 63 
     | 
    
         
            +
                    unless File.directory? path
         
     | 
| 
      
 64 
     | 
    
         
            +
                      FileUtils.mkdir_p path
         
     | 
| 
      
 65 
     | 
    
         
            +
                    end
         
     | 
| 
      
 66 
     | 
    
         
            +
                    path
         
     | 
| 
      
 67 
     | 
    
         
            +
                  end
         
     | 
| 
      
 68 
     | 
    
         
            +
                end
         
     | 
| 
      
 69 
     | 
    
         
            +
                
         
     | 
| 
      
 70 
     | 
    
         
            +
                class ProjectEntity
         
     | 
| 
      
 71 
     | 
    
         
            +
                  
         
     | 
| 
      
 72 
     | 
    
         
            +
                  include Enumerable
         
     | 
| 
      
 73 
     | 
    
         
            +
                  
         
     | 
| 
      
 74 
     | 
    
         
            +
                  def initialize(path, name, filename_extension)
         
     | 
| 
      
 75 
     | 
    
         
            +
                    @path = path
         
     | 
| 
      
 76 
     | 
    
         
            +
                    @name = name
         
     | 
| 
      
 77 
     | 
    
         
            +
                    @filename_extension = filename_extension
         
     | 
| 
      
 78 
     | 
    
         
            +
                    @elements = elements
         
     | 
| 
      
 79 
     | 
    
         
            +
                  end
         
     | 
| 
      
 80 
     | 
    
         
            +
                  
         
     | 
| 
      
 81 
     | 
    
         
            +
                  def method_missing(method, *args)
         
     | 
| 
      
 82 
     | 
    
         
            +
                    method_name = method.to_s
         
     | 
| 
      
 83 
     | 
    
         
            +
                    return @elements[method_name] if @elements[method_name]
         
     | 
| 
      
 84 
     | 
    
         
            +
                    super
         
     | 
| 
      
 85 
     | 
    
         
            +
                  end
         
     | 
| 
      
 86 
     | 
    
         
            +
                  
         
     | 
| 
      
 87 
     | 
    
         
            +
                  def each
         
     | 
| 
      
 88 
     | 
    
         
            +
                    @elements.each {|key, value| yield [key, value]}
         
     | 
| 
      
 89 
     | 
    
         
            +
                  end
         
     | 
| 
      
 90 
     | 
    
         
            +
                  
         
     | 
| 
      
 91 
     | 
    
         
            +
                  def [](key)
         
     | 
| 
      
 92 
     | 
    
         
            +
                    @elements[key]
         
     | 
| 
      
 93 
     | 
    
         
            +
                  end
         
     | 
| 
      
 94 
     | 
    
         
            +
                  
         
     | 
| 
      
 95 
     | 
    
         
            +
                  def keys
         
     | 
| 
      
 96 
     | 
    
         
            +
                    @elements.keys
         
     | 
| 
      
 97 
     | 
    
         
            +
                  end
         
     | 
| 
      
 98 
     | 
    
         
            +
                  
         
     | 
| 
      
 99 
     | 
    
         
            +
                  protected
         
     | 
| 
      
 100 
     | 
    
         
            +
                  
         
     | 
| 
      
 101 
     | 
    
         
            +
                  def element_name(element_path)
         
     | 
| 
      
 102 
     | 
    
         
            +
                    File.basename(element_path, @filename_extension)
         
     | 
| 
      
 103 
     | 
    
         
            +
                  end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
                  def elements
         
     | 
| 
      
 106 
     | 
    
         
            +
                    unless File.directory? @path
         
     | 
| 
      
 107 
     | 
    
         
            +
                      raise ProjectError, "Unable to find #{@name} directory in project"
         
     | 
| 
      
 108 
     | 
    
         
            +
                    end
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                    # get {script_name => full_path}
         
     | 
| 
      
 111 
     | 
    
         
            +
                    file_paths = Dir[File.join(@path, "**", "*#{@filename_extension}")]
         
     | 
| 
      
 112 
     | 
    
         
            +
                    file_paths_hsh = file_paths.collect{|element_path| [element_name(element_path), element(element_name(element_path), element_path)]}.flatten
         
     | 
| 
      
 113 
     | 
    
         
            +
                    Hash[*file_paths_hsh]
         
     | 
| 
      
 114 
     | 
    
         
            +
                  end
         
     | 
| 
      
 115 
     | 
    
         
            +
                  
         
     | 
| 
      
 116 
     | 
    
         
            +
                  def element(path)
         
     | 
| 
      
 117 
     | 
    
         
            +
                    raise NotImplementedError, "Implement in subclass"
         
     | 
| 
      
 118 
     | 
    
         
            +
                  end
         
     | 
| 
      
 119 
     | 
    
         
            +
                end
         
     | 
| 
      
 120 
     | 
    
         
            +
                
         
     | 
| 
      
 121 
     | 
    
         
            +
                class PigScripts < ProjectEntity
         
     | 
| 
      
 122 
     | 
    
         
            +
                  def element(name, path)
         
     | 
| 
      
 123 
     | 
    
         
            +
                    Script.new(name, path)
         
     | 
| 
      
 124 
     | 
    
         
            +
                  end
         
     | 
| 
      
 125 
     | 
    
         
            +
                end
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
                class PythonUDFs < ProjectEntity
         
     | 
| 
      
 128 
     | 
    
         
            +
                  def element(name, path)
         
     | 
| 
      
 129 
     | 
    
         
            +
                    Script.new(name, path)
         
     | 
| 
      
 130 
     | 
    
         
            +
                  end
         
     | 
| 
      
 131 
     | 
    
         
            +
                end
         
     | 
| 
      
 132 
     | 
    
         
            +
                
         
     | 
| 
      
 133 
     | 
    
         
            +
                class Script
         
     | 
| 
      
 134 
     | 
    
         
            +
                  
         
     | 
| 
      
 135 
     | 
    
         
            +
                  attr_reader :name
         
     | 
| 
      
 136 
     | 
    
         
            +
                  attr_reader :path
         
     | 
| 
      
 137 
     | 
    
         
            +
                  
         
     | 
| 
      
 138 
     | 
    
         
            +
                  def initialize(name, path)
         
     | 
| 
      
 139 
     | 
    
         
            +
                    @name = name
         
     | 
| 
      
 140 
     | 
    
         
            +
                    @path = path
         
     | 
| 
      
 141 
     | 
    
         
            +
                  end
         
     | 
| 
      
 142 
     | 
    
         
            +
             
     | 
| 
      
 143 
     | 
    
         
            +
                  def code
         
     | 
| 
      
 144 
     | 
    
         
            +
                    script_file = File.open(@path, "r")
         
     | 
| 
      
 145 
     | 
    
         
            +
                    script_contents = script_file.read
         
     | 
| 
      
 146 
     | 
    
         
            +
                    script_file.close
         
     | 
| 
      
 147 
     | 
    
         
            +
                    script_contents
         
     | 
| 
      
 148 
     | 
    
         
            +
                  end
         
     | 
| 
      
 149 
     | 
    
         
            +
                  
         
     | 
| 
      
 150 
     | 
    
         
            +
                  def to_s
         
     | 
| 
      
 151 
     | 
    
         
            +
                    code
         
     | 
| 
      
 152 
     | 
    
         
            +
                  end
         
     | 
| 
      
 153 
     | 
    
         
            +
                end
         
     | 
| 
      
 154 
     | 
    
         
            +
                
         
     | 
| 
      
 155 
     | 
    
         
            +
              end
         
     | 
| 
      
 156 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,39 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #
         
     | 
| 
      
 2 
     | 
    
         
            +
            # Copyright 2012 Mortar Data Inc.
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 
      
 5 
     | 
    
         
            +
            # you may not use this file except in compliance with the License.
         
     | 
| 
      
 6 
     | 
    
         
            +
            # You may obtain a copy of the License at
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
      
 10 
     | 
    
         
            +
            # Unless required by applicable law or agreed to in writing, software
         
     | 
| 
      
 11 
     | 
    
         
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 
      
 12 
     | 
    
         
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 
      
 13 
     | 
    
         
            +
            # See the License for the specific language governing permissions and
         
     | 
| 
      
 14 
     | 
    
         
            +
            # limitations under the License.
         
     | 
| 
      
 15 
     | 
    
         
            +
            #
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            module Mortar
         
     | 
| 
      
 18 
     | 
    
         
            +
              module Snapshot
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                extend self
         
     | 
| 
      
 21 
     | 
    
         
            +
                
         
     | 
| 
      
 22 
     | 
    
         
            +
                def create_and_push_snapshot_branch(git, project)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  # create / push a snapshot branch
         
     | 
| 
      
 24 
     | 
    
         
            +
                  snapshot_branch = action("Taking code snapshot") do
         
     | 
| 
      
 25 
     | 
    
         
            +
                    git.create_snapshot_branch()
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                  git_ref = action("Sending code snapshot to Mortar") do
         
     | 
| 
      
 29 
     | 
    
         
            +
                    # push the code
         
     | 
| 
      
 30 
     | 
    
         
            +
                    git.push(project.remote, snapshot_branch)
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                    # grab the commit hash and clean out the branch from the local branches
         
     | 
| 
      
 33 
     | 
    
         
            +
                    ref = git.git_ref(snapshot_branch)
         
     | 
| 
      
 34 
     | 
    
         
            +
                    git.branch_delete(snapshot_branch)
         
     | 
| 
      
 35 
     | 
    
         
            +
                    ref
         
     | 
| 
      
 36 
     | 
    
         
            +
                  end
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
              end
         
     | 
| 
      
 39 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,14 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2 
     | 
    
         
            +
             * <%= macro_name %>: Pig macros for use in pigscripts.
         
     | 
| 
      
 3 
     | 
    
         
            +
             *
         
     | 
| 
      
 4 
     | 
    
         
            +
             */
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            /**
         
     | 
| 
      
 7 
     | 
    
         
            +
             * A simple example macro function that returns the entity passed in.
         
     | 
| 
      
 8 
     | 
    
         
            +
             */
         
     | 
| 
      
 9 
     | 
    
         
            +
            DEFINE <%= macro_name.capitalize %>_EXAMPLE(input_relation)
         
     | 
| 
      
 10 
     | 
    
         
            +
            returns output_relation {
         
     | 
| 
      
 11 
     | 
    
         
            +
                -- just an example
         
     | 
| 
      
 12 
     | 
    
         
            +
                $output_relation = FOREACH $input_relation
         
     | 
| 
      
 13 
     | 
    
         
            +
                                  GENERATE *;
         
     | 
| 
      
 14 
     | 
    
         
            +
            };
         
     | 
| 
         @@ -0,0 +1,38 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2 
     | 
    
         
            +
             * <%= script_name %>
         
     | 
| 
      
 3 
     | 
    
         
            +
             *
         
     | 
| 
      
 4 
     | 
    
         
            +
             * Required parameters:
         
     | 
| 
      
 5 
     | 
    
         
            +
             *
         
     | 
| 
      
 6 
     | 
    
         
            +
             * -param INPUT_PATH Input path for script data (e.g. s3n://hawk-example-data/tutorial/excite.log.bz2)
         
     | 
| 
      
 7 
     | 
    
         
            +
             * -param OUTPUT_PATH Output path for script data (e.g. s3n://my-output-bucket/<%= script_name %>)
         
     | 
| 
      
 8 
     | 
    
         
            +
             */
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            <% if not options[:skip_udf] %>
         
     | 
| 
      
 11 
     | 
    
         
            +
            /**
         
     | 
| 
      
 12 
     | 
    
         
            +
             * User-Defined Functions (UDFs)
         
     | 
| 
      
 13 
     | 
    
         
            +
             */
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            REGISTER '../udfs/python/<%= script_name %>.py' using streaming_python as <%= script_name %>;
         
     | 
| 
      
 16 
     | 
    
         
            +
            <% end %>
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            -- This is an example of loading up input data
         
     | 
| 
      
 19 
     | 
    
         
            +
            my_input_data = LOAD '$INPUT_PATH' 
         
     | 
| 
      
 20 
     | 
    
         
            +
                           USING PigStorage('\t') 
         
     | 
| 
      
 21 
     | 
    
         
            +
                              AS (field0:chararray, field1:chararray, field2:chararray);
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            -- This is an example pig operation
         
     | 
| 
      
 24 
     | 
    
         
            +
            filtered = FILTER my_input_data
         
     | 
| 
      
 25 
     | 
    
         
            +
                           BY field0 IS NOT NULL;
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            -- This is an example call to a python user-defined function
         
     | 
| 
      
 28 
     | 
    
         
            +
            with_udf_output = FOREACH filtered 
         
     | 
| 
      
 29 
     | 
    
         
            +
                             GENERATE field0..field2, 
         
     | 
| 
      
 30 
     | 
    
         
            +
                                      <%= script_name %>.example_udf(field0) AS example_udf_field;
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            -- remove any existing data
         
     | 
| 
      
 33 
     | 
    
         
            +
            rmf $OUTPUT_PATH;
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
            -- store the results
         
     | 
| 
      
 36 
     | 
    
         
            +
            STORE with_udf_output 
         
     | 
| 
      
 37 
     | 
    
         
            +
             INTO '$OUTPUT_PATH' 
         
     | 
| 
      
 38 
     | 
    
         
            +
            USING PigStorage('\t');
         
     | 
| 
         @@ -0,0 +1,13 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            from pig_util import outputSchema
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            # 
         
     | 
| 
      
 4 
     | 
    
         
            +
            # This is where we write python UDFs (User-Defined Functions) that we can call from pig.
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Pig needs to know the schema of the data coming out of the function, 
         
     | 
| 
      
 6 
     | 
    
         
            +
            # which we specify using the @outputSchema decorator.
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            @outputSchema('example_udf:int')
         
     | 
| 
      
 9 
     | 
    
         
            +
            def example_udf(input_str):
         
     | 
| 
      
 10 
     | 
    
         
            +
                """
         
     | 
| 
      
 11 
     | 
    
         
            +
                A simple example function that just returns the length of the string passed in.
         
     | 
| 
      
 12 
     | 
    
         
            +
                """
         
     | 
| 
      
 13 
     | 
    
         
            +
                return len(input_str) if input_str else None
         
     | 
| 
         
            File without changes
         
     | 
| 
         @@ -0,0 +1,35 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2 
     | 
    
         
            +
             * <%= project_name %>
         
     | 
| 
      
 3 
     | 
    
         
            +
             *
         
     | 
| 
      
 4 
     | 
    
         
            +
             * Required parameters:
         
     | 
| 
      
 5 
     | 
    
         
            +
             *
         
     | 
| 
      
 6 
     | 
    
         
            +
             * -param INPUT_PATH Input path for script data (e.g. s3n://hawk-example-data/tutorial/excite.log.bz2)
         
     | 
| 
      
 7 
     | 
    
         
            +
             * -param OUTPUT_PATH Output path for script data (e.g. s3n://my-output-bucket/<%= project_name %>)
         
     | 
| 
      
 8 
     | 
    
         
            +
             */
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            /**
         
     | 
| 
      
 11 
     | 
    
         
            +
             * User-Defined Functions (UDFs)
         
     | 
| 
      
 12 
     | 
    
         
            +
             */
         
     | 
| 
      
 13 
     | 
    
         
            +
            REGISTER '../udfs/python/<%= project_name %>.py' using streaming_python as <%= project_name %>;
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            -- This is an example of loading up input data
         
     | 
| 
      
 16 
     | 
    
         
            +
            my_input_data = LOAD '$INPUT_PATH' 
         
     | 
| 
      
 17 
     | 
    
         
            +
                           USING PigStorage('\t') 
         
     | 
| 
      
 18 
     | 
    
         
            +
                              AS (field0:chararray, field1:chararray, field2:chararray);
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            -- This is an example pig operation
         
     | 
| 
      
 21 
     | 
    
         
            +
            filtered = FILTER my_input_data
         
     | 
| 
      
 22 
     | 
    
         
            +
                           BY field0 IS NOT NULL;
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            -- This is an example call to a python user-defined function
         
     | 
| 
      
 25 
     | 
    
         
            +
            with_udf_output = FOREACH filtered 
         
     | 
| 
      
 26 
     | 
    
         
            +
                             GENERATE field0..field2, 
         
     | 
| 
      
 27 
     | 
    
         
            +
                                      <%= project_name %>.example_udf(field0) AS example_udf_field;
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            -- remove any existing data
         
     | 
| 
      
 30 
     | 
    
         
            +
            rmf $OUTPUT_PATH;
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            -- store the results
         
     | 
| 
      
 33 
     | 
    
         
            +
            STORE with_udf_output 
         
     | 
| 
      
 34 
     | 
    
         
            +
             INTO '$OUTPUT_PATH' 
         
     | 
| 
      
 35 
     | 
    
         
            +
            USING PigStorage('\t');
         
     | 
| 
         @@ -0,0 +1,13 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            from pig_util import outputSchema
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            # 
         
     | 
| 
      
 4 
     | 
    
         
            +
            # This is where we write python UDFs (User-Defined Functions) that we can call from pig.
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Pig needs to know the schema of the data coming out of the function, 
         
     | 
| 
      
 6 
     | 
    
         
            +
            # which we specify using the @outputSchema decorator.
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            @outputSchema('example_udf:int')
         
     | 
| 
      
 9 
     | 
    
         
            +
            def example_udf(input_str):
         
     | 
| 
      
 10 
     | 
    
         
            +
                """
         
     | 
| 
      
 11 
     | 
    
         
            +
                A simple example function that just returns the length of the string passed in.
         
     | 
| 
      
 12 
     | 
    
         
            +
                """
         
     | 
| 
      
 13 
     | 
    
         
            +
                return len(input_str) if input_str else None
         
     | 
| 
         @@ -0,0 +1,13 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            from pig_util import outputSchema
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            # 
         
     | 
| 
      
 4 
     | 
    
         
            +
            # This is where we write python UDFs (User-Defined Functions) that we can call from pig.
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Pig needs to know the schema of the data coming out of the function, 
         
     | 
| 
      
 6 
     | 
    
         
            +
            # which we specify using the @outputSchema decorator.
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            @outputSchema('example_udf:int')
         
     | 
| 
      
 9 
     | 
    
         
            +
            def example_udf(input_str):
         
     | 
| 
      
 10 
     | 
    
         
            +
                """
         
     | 
| 
      
 11 
     | 
    
         
            +
                A simple example function that just returns the length of the string passed in.
         
     | 
| 
      
 12 
     | 
    
         
            +
                """
         
     | 
| 
      
 13 
     | 
    
         
            +
                return len(input_str) if input_str else None
         
     | 
| 
         @@ -0,0 +1,20 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #
         
     | 
| 
      
 2 
     | 
    
         
            +
            # Copyright 2012 Mortar Data Inc.
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 
      
 5 
     | 
    
         
            +
            # you may not use this file except in compliance with the License.
         
     | 
| 
      
 6 
     | 
    
         
            +
            # You may obtain a copy of the License at
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
      
 10 
     | 
    
         
            +
            # Unless required by applicable law or agreed to in writing, software
         
     | 
| 
      
 11 
     | 
    
         
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 
      
 12 
     | 
    
         
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 
      
 13 
     | 
    
         
            +
            # See the License for the specific language governing permissions and
         
     | 
| 
      
 14 
     | 
    
         
            +
            # limitations under the License.
         
     | 
| 
      
 15 
     | 
    
         
            +
            #
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            module Mortar
         
     | 
| 
      
 18 
     | 
    
         
            +
              # see http://semver.org/
         
     | 
| 
      
 19 
     | 
    
         
            +
              VERSION = "0.1.0"
         
     | 
| 
      
 20 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,598 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # encoding: UTF-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            #
         
     | 
| 
      
 3 
     | 
    
         
            +
            # Copyright 2011, 2012 Keith Rarick
         
     | 
| 
      
 4 
     | 
    
         
            +
            #
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Permission is hereby granted, free of charge, to any person obtaining a copy
         
     | 
| 
      
 6 
     | 
    
         
            +
            # of this software and associated documentation files (the "Software"), to deal
         
     | 
| 
      
 7 
     | 
    
         
            +
            # in the Software without restriction, including without limitation the rights
         
     | 
| 
      
 8 
     | 
    
         
            +
            # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         
     | 
| 
      
 9 
     | 
    
         
            +
            # copies of the Software, and to permit persons to whom the Software is
         
     | 
| 
      
 10 
     | 
    
         
            +
            # furnished to do so, subject to the following conditions:
         
     | 
| 
      
 11 
     | 
    
         
            +
            #
         
     | 
| 
      
 12 
     | 
    
         
            +
            # The above copyright notice and this permission notice shall be included in
         
     | 
| 
      
 13 
     | 
    
         
            +
            # all copies or substantial portions of the Software.
         
     | 
| 
      
 14 
     | 
    
         
            +
            #
         
     | 
| 
      
 15 
     | 
    
         
            +
            # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         
     | 
| 
      
 16 
     | 
    
         
            +
            # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         
     | 
| 
      
 17 
     | 
    
         
            +
            # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         
     | 
| 
      
 18 
     | 
    
         
            +
            # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         
     | 
| 
      
 19 
     | 
    
         
            +
            # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         
     | 
| 
      
 20 
     | 
    
         
            +
            # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
         
     | 
| 
      
 21 
     | 
    
         
            +
            # THE SOFTWARE.
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            # See https://github.com/kr/okjson for updates.
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            require 'stringio'
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            # Some parts adapted from
         
     | 
| 
      
 28 
     | 
    
         
            +
            # http://golang.org/src/pkg/json/decode.go and
         
     | 
| 
      
 29 
     | 
    
         
            +
            # http://golang.org/src/pkg/utf8/utf8.go
         
     | 
| 
      
 30 
     | 
    
         
            +
            module Mortar
         
     | 
| 
      
 31 
     | 
    
         
            +
              module OkJson
         
     | 
| 
      
 32 
     | 
    
         
            +
                extend self
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                # Decodes a json document in string s and
         
     | 
| 
      
 36 
     | 
    
         
            +
                # returns the corresponding ruby value.
         
     | 
| 
      
 37 
     | 
    
         
            +
                # String s must be valid UTF-8. If you have
         
     | 
| 
      
 38 
     | 
    
         
            +
                # a string in some other encoding, convert
         
     | 
| 
      
 39 
     | 
    
         
            +
                # it first.
         
     | 
| 
      
 40 
     | 
    
         
            +
                #
         
     | 
| 
      
 41 
     | 
    
         
            +
                # String values in the resulting structure
         
     | 
| 
      
 42 
     | 
    
         
            +
                # will be UTF-8.
         
     | 
| 
      
 43 
     | 
    
         
            +
                def decode(s)
         
     | 
| 
      
 44 
     | 
    
         
            +
                  ts = lex(s)
         
     | 
| 
      
 45 
     | 
    
         
            +
                  v, ts = textparse(ts)
         
     | 
| 
      
 46 
     | 
    
         
            +
                  if ts.length > 0
         
     | 
| 
      
 47 
     | 
    
         
            +
                    raise Error, 'trailing garbage'
         
     | 
| 
      
 48 
     | 
    
         
            +
                  end
         
     | 
| 
      
 49 
     | 
    
         
            +
                  v
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                # Parses a "json text" in the sense of RFC 4627.
         
     | 
| 
      
 54 
     | 
    
         
            +
                # Returns the parsed value and any trailing tokens.
         
     | 
| 
      
 55 
     | 
    
         
            +
                # Note: this is almost the same as valparse,
         
     | 
| 
      
 56 
     | 
    
         
            +
                # except that it does not accept atomic values.
         
     | 
| 
      
 57 
     | 
    
         
            +
                def textparse(ts)
         
     | 
| 
      
 58 
     | 
    
         
            +
                  if ts.length < 0
         
     | 
| 
      
 59 
     | 
    
         
            +
                    raise Error, 'empty'
         
     | 
| 
      
 60 
     | 
    
         
            +
                  end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                  typ, _, val = ts[0]
         
     | 
| 
      
 63 
     | 
    
         
            +
                  case typ
         
     | 
| 
      
 64 
     | 
    
         
            +
                  when '{' then objparse(ts)
         
     | 
| 
      
 65 
     | 
    
         
            +
                  when '[' then arrparse(ts)
         
     | 
| 
      
 66 
     | 
    
         
            +
                  else
         
     | 
| 
      
 67 
     | 
    
         
            +
                    raise Error, "unexpected #{val.inspect}"
         
     | 
| 
      
 68 
     | 
    
         
            +
                  end
         
     | 
| 
      
 69 
     | 
    
         
            +
                end
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                # Parses a "value" in the sense of RFC 4627.
         
     | 
| 
      
 73 
     | 
    
         
            +
                # Returns the parsed value and any trailing tokens.
         
     | 
| 
      
 74 
     | 
    
         
            +
                def valparse(ts)
         
     | 
| 
      
 75 
     | 
    
         
            +
                  if ts.length < 0
         
     | 
| 
      
 76 
     | 
    
         
            +
                    raise Error, 'empty'
         
     | 
| 
      
 77 
     | 
    
         
            +
                  end
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
                  typ, _, val = ts[0]
         
     | 
| 
      
 80 
     | 
    
         
            +
                  case typ
         
     | 
| 
      
 81 
     | 
    
         
            +
                  when '{' then objparse(ts)
         
     | 
| 
      
 82 
     | 
    
         
            +
                  when '[' then arrparse(ts)
         
     | 
| 
      
 83 
     | 
    
         
            +
                  when :val,:str then [val, ts[1..-1]]
         
     | 
| 
      
 84 
     | 
    
         
            +
                  else
         
     | 
| 
      
 85 
     | 
    
         
            +
                    raise Error, "unexpected #{val.inspect}"
         
     | 
| 
      
 86 
     | 
    
         
            +
                  end
         
     | 
| 
      
 87 
     | 
    
         
            +
                end
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
                # Parses an "object" in the sense of RFC 4627.
         
     | 
| 
      
 91 
     | 
    
         
            +
                # Returns the parsed value and any trailing tokens.
         
     | 
| 
      
 92 
     | 
    
         
            +
                def objparse(ts)
         
     | 
| 
      
 93 
     | 
    
         
            +
                  ts = eat('{', ts)
         
     | 
| 
      
 94 
     | 
    
         
            +
                  obj = {}
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                  if ts[0][0] == '}'
         
     | 
| 
      
 97 
     | 
    
         
            +
                    return obj, ts[1..-1]
         
     | 
| 
      
 98 
     | 
    
         
            +
                  end
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                  k, v, ts = pairparse(ts)
         
     | 
| 
      
 101 
     | 
    
         
            +
                  obj[k] = v
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
                  if ts[0][0] == '}'
         
     | 
| 
      
 104 
     | 
    
         
            +
                    return obj, ts[1..-1]
         
     | 
| 
      
 105 
     | 
    
         
            +
                  end
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
                  loop do
         
     | 
| 
      
 108 
     | 
    
         
            +
                    ts = eat(',', ts)
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                    k, v, ts = pairparse(ts)
         
     | 
| 
      
 111 
     | 
    
         
            +
                    obj[k] = v
         
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
                    if ts[0][0] == '}'
         
     | 
| 
      
 114 
     | 
    
         
            +
                      return obj, ts[1..-1]
         
     | 
| 
      
 115 
     | 
    
         
            +
                    end
         
     | 
| 
      
 116 
     | 
    
         
            +
                  end
         
     | 
| 
      
 117 
     | 
    
         
            +
                end
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
                # Parses a "member" in the sense of RFC 4627.
         
     | 
| 
      
 121 
     | 
    
         
            +
                # Returns the parsed values and any trailing tokens.
         
     | 
| 
      
 122 
     | 
    
         
            +
                def pairparse(ts)
         
     | 
| 
      
 123 
     | 
    
         
            +
                  (typ, _, k), ts = ts[0], ts[1..-1]
         
     | 
| 
      
 124 
     | 
    
         
            +
                  if typ != :str
         
     | 
| 
      
 125 
     | 
    
         
            +
                    raise Error, "unexpected #{k.inspect}"
         
     | 
| 
      
 126 
     | 
    
         
            +
                  end
         
     | 
| 
      
 127 
     | 
    
         
            +
                  ts = eat(':', ts)
         
     | 
| 
      
 128 
     | 
    
         
            +
                  v, ts = valparse(ts)
         
     | 
| 
      
 129 
     | 
    
         
            +
                  [k, v, ts]
         
     | 
| 
      
 130 
     | 
    
         
            +
                end
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
                # Parses an "array" in the sense of RFC 4627.
         
     | 
| 
      
 134 
     | 
    
         
            +
                # Returns the parsed value and any trailing tokens.
         
     | 
| 
      
 135 
     | 
    
         
            +
                def arrparse(ts)
         
     | 
| 
      
 136 
     | 
    
         
            +
                  ts = eat('[', ts)
         
     | 
| 
      
 137 
     | 
    
         
            +
                  arr = []
         
     | 
| 
      
 138 
     | 
    
         
            +
             
     | 
| 
      
 139 
     | 
    
         
            +
                  if ts[0][0] == ']'
         
     | 
| 
      
 140 
     | 
    
         
            +
                    return arr, ts[1..-1]
         
     | 
| 
      
 141 
     | 
    
         
            +
                  end
         
     | 
| 
      
 142 
     | 
    
         
            +
             
     | 
| 
      
 143 
     | 
    
         
            +
                  v, ts = valparse(ts)
         
     | 
| 
      
 144 
     | 
    
         
            +
                  arr << v
         
     | 
| 
      
 145 
     | 
    
         
            +
             
     | 
| 
      
 146 
     | 
    
         
            +
                  if ts[0][0] == ']'
         
     | 
| 
      
 147 
     | 
    
         
            +
                    return arr, ts[1..-1]
         
     | 
| 
      
 148 
     | 
    
         
            +
                  end
         
     | 
| 
      
 149 
     | 
    
         
            +
             
     | 
| 
      
 150 
     | 
    
         
            +
                  loop do
         
     | 
| 
      
 151 
     | 
    
         
            +
                    ts = eat(',', ts)
         
     | 
| 
      
 152 
     | 
    
         
            +
             
     | 
| 
      
 153 
     | 
    
         
            +
                    v, ts = valparse(ts)
         
     | 
| 
      
 154 
     | 
    
         
            +
                    arr << v
         
     | 
| 
      
 155 
     | 
    
         
            +
             
     | 
| 
      
 156 
     | 
    
         
            +
                    if ts[0][0] == ']'
         
     | 
| 
      
 157 
     | 
    
         
            +
                      return arr, ts[1..-1]
         
     | 
| 
      
 158 
     | 
    
         
            +
                    end
         
     | 
| 
      
 159 
     | 
    
         
            +
                  end
         
     | 
| 
      
 160 
     | 
    
         
            +
                end
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
             
     | 
| 
      
 163 
     | 
    
         
            +
                def eat(typ, ts)
         
     | 
| 
      
 164 
     | 
    
         
            +
                  if ts[0][0] != typ
         
     | 
| 
      
 165 
     | 
    
         
            +
                    raise Error, "expected #{typ} (got #{ts[0].inspect})"
         
     | 
| 
      
 166 
     | 
    
         
            +
                  end
         
     | 
| 
      
 167 
     | 
    
         
            +
                  ts[1..-1]
         
     | 
| 
      
 168 
     | 
    
         
            +
                end
         
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
             
     | 
| 
      
 171 
     | 
    
         
            +
                # Scans s and returns a list of json tokens,
         
     | 
| 
      
 172 
     | 
    
         
            +
                # excluding white space (as defined in RFC 4627).
         
     | 
| 
      
 173 
     | 
    
         
            +
                def lex(s)
         
     | 
| 
      
 174 
     | 
    
         
            +
                  ts = []
         
     | 
| 
      
 175 
     | 
    
         
            +
                  while s.length > 0
         
     | 
| 
      
 176 
     | 
    
         
            +
                    typ, lexeme, val = tok(s)
         
     | 
| 
      
 177 
     | 
    
         
            +
                    if typ == nil
         
     | 
| 
      
 178 
     | 
    
         
            +
                      raise Error, "invalid character at #{s[0,10].inspect}"
         
     | 
| 
      
 179 
     | 
    
         
            +
                    end
         
     | 
| 
      
 180 
     | 
    
         
            +
                    if typ != :space
         
     | 
| 
      
 181 
     | 
    
         
            +
                      ts << [typ, lexeme, val]
         
     | 
| 
      
 182 
     | 
    
         
            +
                    end
         
     | 
| 
      
 183 
     | 
    
         
            +
                    s = s[lexeme.length..-1]
         
     | 
| 
      
 184 
     | 
    
         
            +
                  end
         
     | 
| 
      
 185 
     | 
    
         
            +
                  ts
         
     | 
| 
      
 186 
     | 
    
         
            +
                end
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
                # Scans the first token in s and
         
     | 
| 
      
 190 
     | 
    
         
            +
                # returns a 3-element list, or nil
         
     | 
| 
      
 191 
     | 
    
         
            +
                # if s does not begin with a valid token.
         
     | 
| 
      
 192 
     | 
    
         
            +
                #
         
     | 
| 
      
 193 
     | 
    
         
            +
                # The first list element is one of
         
     | 
| 
      
 194 
     | 
    
         
            +
                # '{', '}', ':', ',', '[', ']',
         
     | 
| 
      
 195 
     | 
    
         
            +
                # :val, :str, and :space.
         
     | 
| 
      
 196 
     | 
    
         
            +
                #
         
     | 
| 
      
 197 
     | 
    
         
            +
                # The second element is the lexeme.
         
     | 
| 
      
 198 
     | 
    
         
            +
                #
         
     | 
| 
      
 199 
     | 
    
         
            +
                # The third element is the value of the
         
     | 
| 
      
 200 
     | 
    
         
            +
                # token for :val and :str, otherwise
         
     | 
| 
      
 201 
     | 
    
         
            +
                # it is the lexeme.
         
     | 
| 
      
 202 
     | 
    
         
            +
                def tok(s)
         
     | 
| 
      
 203 
     | 
    
         
            +
                  case s[0]
         
     | 
| 
      
 204 
     | 
    
         
            +
                  when ?{  then ['{', s[0,1], s[0,1]]
         
     | 
| 
      
 205 
     | 
    
         
            +
                  when ?}  then ['}', s[0,1], s[0,1]]
         
     | 
| 
      
 206 
     | 
    
         
            +
                  when ?:  then [':', s[0,1], s[0,1]]
         
     | 
| 
      
 207 
     | 
    
         
            +
                  when ?,  then [',', s[0,1], s[0,1]]
         
     | 
| 
      
 208 
     | 
    
         
            +
                  when ?[  then ['[', s[0,1], s[0,1]]
         
     | 
| 
      
 209 
     | 
    
         
            +
                  when ?]  then [']', s[0,1], s[0,1]]
         
     | 
| 
      
 210 
     | 
    
         
            +
                  when ?n  then nulltok(s)
         
     | 
| 
      
 211 
     | 
    
         
            +
                  when ?t  then truetok(s)
         
     | 
| 
      
 212 
     | 
    
         
            +
                  when ?f  then falsetok(s)
         
     | 
| 
      
 213 
     | 
    
         
            +
                  when ?"  then strtok(s)
         
     | 
| 
      
 214 
     | 
    
         
            +
                  when Spc then [:space, s[0,1], s[0,1]]
         
     | 
| 
      
 215 
     | 
    
         
            +
                  when ?\t then [:space, s[0,1], s[0,1]]
         
     | 
| 
      
 216 
     | 
    
         
            +
                  when ?\n then [:space, s[0,1], s[0,1]]
         
     | 
| 
      
 217 
     | 
    
         
            +
                  when ?\r then [:space, s[0,1], s[0,1]]
         
     | 
| 
      
 218 
     | 
    
         
            +
                  else          numtok(s)
         
     | 
| 
      
 219 
     | 
    
         
            +
                  end
         
     | 
| 
      
 220 
     | 
    
         
            +
                end
         
     | 
| 
      
 221 
     | 
    
         
            +
             
     | 
| 
      
 222 
     | 
    
         
            +
             
     | 
| 
      
 223 
     | 
    
         
            +
                def nulltok(s);  s[0,4] == 'null'  ? [:val, 'null',  nil]   : [] end
         
     | 
| 
      
 224 
     | 
    
         
            +
                def truetok(s);  s[0,4] == 'true'  ? [:val, 'true',  true]  : [] end
         
     | 
| 
      
 225 
     | 
    
         
            +
                def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end
         
     | 
| 
      
 226 
     | 
    
         
            +
             
     | 
| 
      
 227 
     | 
    
         
            +
             
     | 
| 
      
 228 
     | 
    
         
            +
                def numtok(s)
         
     | 
| 
      
 229 
     | 
    
         
            +
                  m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
         
     | 
| 
      
 230 
     | 
    
         
            +
                  if m && m.begin(0) == 0
         
     | 
| 
      
 231 
     | 
    
         
            +
                    if m[3] && !m[2]
         
     | 
| 
      
 232 
     | 
    
         
            +
                      [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))]
         
     | 
| 
      
 233 
     | 
    
         
            +
                    elsif m[2]
         
     | 
| 
      
 234 
     | 
    
         
            +
                      [:val, m[0], Float(m[0])]
         
     | 
| 
      
 235 
     | 
    
         
            +
                    else
         
     | 
| 
      
 236 
     | 
    
         
            +
                      [:val, m[0], Integer(m[0])]
         
     | 
| 
      
 237 
     | 
    
         
            +
                    end
         
     | 
| 
      
 238 
     | 
    
         
            +
                  else
         
     | 
| 
      
 239 
     | 
    
         
            +
                    []
         
     | 
| 
      
 240 
     | 
    
         
            +
                  end
         
     | 
| 
      
 241 
     | 
    
         
            +
                end
         
     | 
| 
      
 242 
     | 
    
         
            +
             
     | 
| 
      
 243 
     | 
    
         
            +
             
     | 
| 
      
 244 
     | 
    
         
            +
                def strtok(s)
         
     | 
| 
      
 245 
     | 
    
         
            +
                  m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
         
     | 
| 
      
 246 
     | 
    
         
            +
                  if ! m
         
     | 
| 
      
 247 
     | 
    
         
            +
                    raise Error, "invalid string literal at #{abbrev(s)}"
         
     | 
| 
      
 248 
     | 
    
         
            +
                  end
         
     | 
| 
      
 249 
     | 
    
         
            +
                  [:str, m[0], unquote(m[0])]
         
     | 
| 
      
 250 
     | 
    
         
            +
                end
         
     | 
| 
      
 251 
     | 
    
         
            +
             
     | 
| 
      
 252 
     | 
    
         
            +
             
     | 
| 
      
 253 
     | 
    
         
            +
                def abbrev(s)
         
     | 
| 
      
 254 
     | 
    
         
            +
                  t = s[0,10]
         
     | 
| 
      
 255 
     | 
    
         
            +
                  p = t['`']
         
     | 
| 
      
 256 
     | 
    
         
            +
                  t = t[0,p] if p
         
     | 
| 
      
 257 
     | 
    
         
            +
                  t = t + '...' if t.length < s.length
         
     | 
| 
      
 258 
     | 
    
         
            +
                  '`' + t + '`'
         
     | 
| 
      
 259 
     | 
    
         
            +
                end
         
     | 
| 
      
 260 
     | 
    
         
            +
             
     | 
| 
      
 261 
     | 
    
         
            +
             
     | 
| 
      
 262 
     | 
    
         
            +
                # Converts a quoted json string literal q into a UTF-8-encoded string.
         
     | 
| 
      
 263 
     | 
    
         
            +
                # The rules are different than for Ruby, so we cannot use eval.
         
     | 
| 
      
 264 
     | 
    
         
            +
                # Unquote will raise an error if q contains control characters.
         
     | 
| 
      
 265 
     | 
    
         
            +
                def unquote(q)
         
     | 
| 
      
 266 
     | 
    
         
            +
                  q = q[1...-1]
         
     | 
| 
      
 267 
     | 
    
         
            +
                  a = q.dup # allocate a big enough string
         
     | 
| 
      
 268 
     | 
    
         
            +
                  rubydoesenc = false
         
     | 
| 
      
 269 
     | 
    
         
            +
                  # In ruby >= 1.9, a[w] is a codepoint, not a byte.
         
     | 
| 
      
 270 
     | 
    
         
            +
                  if a.class.method_defined?(:force_encoding)
         
     | 
| 
      
 271 
     | 
    
         
            +
                    a.force_encoding('UTF-8')
         
     | 
| 
      
 272 
     | 
    
         
            +
                    rubydoesenc = true
         
     | 
| 
      
 273 
     | 
    
         
            +
                  end
         
     | 
| 
      
 274 
     | 
    
         
            +
                  r, w = 0, 0
         
     | 
| 
      
 275 
     | 
    
         
            +
                  while r < q.length
         
     | 
| 
      
 276 
     | 
    
         
            +
                    c = q[r]
         
     | 
| 
      
 277 
     | 
    
         
            +
                    case true
         
     | 
| 
      
 278 
     | 
    
         
            +
                    when c == ?\\
         
     | 
| 
      
 279 
     | 
    
         
            +
                      r += 1
         
     | 
| 
      
 280 
     | 
    
         
            +
                      if r >= q.length
         
     | 
| 
      
 281 
     | 
    
         
            +
                        raise Error, "string literal ends with a \"\\\": \"#{q}\""
         
     | 
| 
      
 282 
     | 
    
         
            +
                      end
         
     | 
| 
      
 283 
     | 
    
         
            +
             
     | 
| 
      
 284 
     | 
    
         
            +
                      case q[r]
         
     | 
| 
      
 285 
     | 
    
         
            +
                      when ?",?\\,?/,?'
         
     | 
| 
      
 286 
     | 
    
         
            +
                        a[w] = q[r]
         
     | 
| 
      
 287 
     | 
    
         
            +
                        r += 1
         
     | 
| 
      
 288 
     | 
    
         
            +
                        w += 1
         
     | 
| 
      
 289 
     | 
    
         
            +
                      when ?b,?f,?n,?r,?t
         
     | 
| 
      
 290 
     | 
    
         
            +
                        a[w] = Unesc[q[r]]
         
     | 
| 
      
 291 
     | 
    
         
            +
                        r += 1
         
     | 
| 
      
 292 
     | 
    
         
            +
                        w += 1
         
     | 
| 
      
 293 
     | 
    
         
            +
                      when ?u
         
     | 
| 
      
 294 
     | 
    
         
            +
                        r += 1
         
     | 
| 
      
 295 
     | 
    
         
            +
                        uchar = begin
         
     | 
| 
      
 296 
     | 
    
         
            +
                          hexdec4(q[r,4])
         
     | 
| 
      
 297 
     | 
    
         
            +
                        rescue RuntimeError => e
         
     | 
| 
      
 298 
     | 
    
         
            +
                          raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}"
         
     | 
| 
      
 299 
     | 
    
         
            +
                        end
         
     | 
| 
      
 300 
     | 
    
         
            +
                        r += 4
         
     | 
| 
      
 301 
     | 
    
         
            +
                        if surrogate? uchar
         
     | 
| 
      
 302 
     | 
    
         
            +
                          if q.length >= r+6
         
     | 
| 
      
 303 
     | 
    
         
            +
                            uchar1 = hexdec4(q[r+2,4])
         
     | 
| 
      
 304 
     | 
    
         
            +
                            uchar = subst(uchar, uchar1)
         
     | 
| 
      
 305 
     | 
    
         
            +
                            if uchar != Ucharerr
         
     | 
| 
      
 306 
     | 
    
         
            +
                              # A valid pair; consume.
         
     | 
| 
      
 307 
     | 
    
         
            +
                              r += 6
         
     | 
| 
      
 308 
     | 
    
         
            +
                            end
         
     | 
| 
      
 309 
     | 
    
         
            +
                          end
         
     | 
| 
      
 310 
     | 
    
         
            +
                        end
         
     | 
| 
      
 311 
     | 
    
         
            +
                        if rubydoesenc
         
     | 
| 
      
 312 
     | 
    
         
            +
                          a[w] = '' << uchar
         
     | 
| 
      
 313 
     | 
    
         
            +
                          w += 1
         
     | 
| 
      
 314 
     | 
    
         
            +
                        else
         
     | 
| 
      
 315 
     | 
    
         
            +
                          w += ucharenc(a, w, uchar)
         
     | 
| 
      
 316 
     | 
    
         
            +
                        end
         
     | 
| 
      
 317 
     | 
    
         
            +
                      else
         
     | 
| 
      
 318 
     | 
    
         
            +
                        raise Error, "invalid escape char #{q[r]} in \"#{q}\""
         
     | 
| 
      
 319 
     | 
    
         
            +
                      end
         
     | 
| 
      
 320 
     | 
    
         
            +
                    when c == ?", c < Spc
         
     | 
| 
      
 321 
     | 
    
         
            +
                      raise Error, "invalid character in string literal \"#{q}\""
         
     | 
| 
      
 322 
     | 
    
         
            +
                    else
         
     | 
| 
      
 323 
     | 
    
         
            +
                      # Copy anything else byte-for-byte.
         
     | 
| 
      
 324 
     | 
    
         
            +
                      # Valid UTF-8 will remain valid UTF-8.
         
     | 
| 
      
 325 
     | 
    
         
            +
                      # Invalid UTF-8 will remain invalid UTF-8.
         
     | 
| 
      
 326 
     | 
    
         
            +
                      # In ruby >= 1.9, c is a codepoint, not a byte,
         
     | 
| 
      
 327 
     | 
    
         
            +
                      # in which case this is still what we want.
         
     | 
| 
      
 328 
     | 
    
         
            +
                      a[w] = c
         
     | 
| 
      
 329 
     | 
    
         
            +
                      r += 1
         
     | 
| 
      
 330 
     | 
    
         
            +
                      w += 1
         
     | 
| 
      
 331 
     | 
    
         
            +
                    end
         
     | 
| 
      
 332 
     | 
    
         
            +
                  end
         
     | 
| 
      
 333 
     | 
    
         
            +
                  a[0,w]
         
     | 
| 
      
 334 
     | 
    
         
            +
                end
         
     | 
| 
      
 335 
     | 
    
         
            +
             
     | 
| 
      
 336 
     | 
    
         
            +
             
     | 
| 
      
 337 
     | 
    
         
            +
                # Encodes unicode character u as UTF-8
         
     | 
| 
      
 338 
     | 
    
         
            +
                # bytes in string a at position i.
         
     | 
| 
      
 339 
     | 
    
         
            +
                # Returns the number of bytes written.
         
     | 
| 
      
 340 
     | 
    
         
            +
                def ucharenc(a, i, u)
         
     | 
| 
      
 341 
     | 
    
         
            +
                  case true
         
     | 
| 
      
 342 
     | 
    
         
            +
                  when u <= Uchar1max
         
     | 
| 
      
 343 
     | 
    
         
            +
                    a[i] = (u & 0xff).chr
         
     | 
| 
      
 344 
     | 
    
         
            +
                    1
         
     | 
| 
      
 345 
     | 
    
         
            +
                  when u <= Uchar2max
         
     | 
| 
      
 346 
     | 
    
         
            +
                    a[i+0] = (Utag2 | ((u>>6)&0xff)).chr
         
     | 
| 
      
 347 
     | 
    
         
            +
                    a[i+1] = (Utagx | (u&Umaskx)).chr
         
     | 
| 
      
 348 
     | 
    
         
            +
                    2
         
     | 
| 
      
 349 
     | 
    
         
            +
                  when u <= Uchar3max
         
     | 
| 
      
 350 
     | 
    
         
            +
                    a[i+0] = (Utag3 | ((u>>12)&0xff)).chr
         
     | 
| 
      
 351 
     | 
    
         
            +
                    a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr
         
     | 
| 
      
 352 
     | 
    
         
            +
                    a[i+2] = (Utagx | (u&Umaskx)).chr
         
     | 
| 
      
 353 
     | 
    
         
            +
                    3
         
     | 
| 
      
 354 
     | 
    
         
            +
                  else
         
     | 
| 
      
 355 
     | 
    
         
            +
                    a[i+0] = (Utag4 | ((u>>18)&0xff)).chr
         
     | 
| 
      
 356 
     | 
    
         
            +
                    a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr
         
     | 
| 
      
 357 
     | 
    
         
            +
                    a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr
         
     | 
| 
      
 358 
     | 
    
         
            +
                    a[i+3] = (Utagx | (u&Umaskx)).chr
         
     | 
| 
      
 359 
     | 
    
         
            +
                    4
         
     | 
| 
      
 360 
     | 
    
         
            +
                  end
         
     | 
| 
      
 361 
     | 
    
         
            +
                end
         
     | 
| 
      
 362 
     | 
    
         
            +
             
     | 
| 
      
 363 
     | 
    
         
            +
             
     | 
| 
      
 364 
     | 
    
         
            +
                def hexdec4(s)
         
     | 
| 
      
 365 
     | 
    
         
            +
                  if s.length != 4
         
     | 
| 
      
 366 
     | 
    
         
            +
                    raise Error, 'short'
         
     | 
| 
      
 367 
     | 
    
         
            +
                  end
         
     | 
| 
      
 368 
     | 
    
         
            +
                  (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3])
         
     | 
| 
      
 369 
     | 
    
         
            +
                end
         
     | 
| 
      
 370 
     | 
    
         
            +
             
     | 
| 
      
 371 
     | 
    
         
            +
             
     | 
| 
      
 372 
     | 
    
         
            +
                def subst(u1, u2)
         
     | 
| 
      
 373 
     | 
    
         
            +
                  if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
         
     | 
| 
      
 374 
     | 
    
         
            +
                    return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself
         
     | 
| 
      
 375 
     | 
    
         
            +
                  end
         
     | 
| 
      
 376 
     | 
    
         
            +
                  return Ucharerr
         
     | 
| 
      
 377 
     | 
    
         
            +
                end
         
     | 
| 
      
 378 
     | 
    
         
            +
             
     | 
| 
      
 379 
     | 
    
         
            +
             
     | 
| 
      
 380 
     | 
    
         
            +
                def surrogate?(u)
         
     | 
| 
      
 381 
     | 
    
         
            +
                  Usurr1 <= u && u < Usurr3
         
     | 
| 
      
 382 
     | 
    
         
            +
                end
         
     | 
| 
      
 383 
     | 
    
         
            +
             
     | 
| 
      
 384 
     | 
    
         
            +
             
     | 
| 
      
 385 
     | 
    
         
            +
                def nibble(c)
         
     | 
| 
      
 386 
     | 
    
         
            +
                  case true
         
     | 
| 
      
 387 
     | 
    
         
            +
                  when ?0 <= c && c <= ?9 then c.ord - ?0.ord
         
     | 
| 
      
 388 
     | 
    
         
            +
                  when ?a <= c && c <= ?z then c.ord - ?a.ord + 10
         
     | 
| 
      
 389 
     | 
    
         
            +
                  when ?A <= c && c <= ?Z then c.ord - ?A.ord + 10
         
     | 
| 
      
 390 
     | 
    
         
            +
                  else
         
     | 
| 
      
 391 
     | 
    
         
            +
                    raise Error, "invalid hex code #{c}"
         
     | 
| 
      
 392 
     | 
    
         
            +
                  end
         
     | 
| 
      
 393 
     | 
    
         
            +
                end
         
     | 
| 
      
 394 
     | 
    
         
            +
             
     | 
| 
      
 395 
     | 
    
         
            +
             
     | 
| 
      
 396 
     | 
    
         
            +
                # Encodes x into a json text. It may contain only
         
     | 
| 
      
 397 
     | 
    
         
            +
                # Array, Hash, String, Numeric, true, false, nil.
         
     | 
| 
      
 398 
     | 
    
         
            +
                # (Note, this list excludes Symbol.)
         
     | 
| 
      
 399 
     | 
    
         
            +
                # X itself must be an Array or a Hash.
         
     | 
| 
      
 400 
     | 
    
         
            +
                # No other value can be encoded, and an error will
         
     | 
| 
      
 401 
     | 
    
         
            +
                # be raised if x contains any other value, such as
         
     | 
| 
      
 402 
     | 
    
         
            +
                # Nan, Infinity, Symbol, and Proc, or if a Hash key
         
     | 
| 
      
 403 
     | 
    
         
            +
                # is not a String.
         
     | 
| 
      
 404 
     | 
    
         
            +
                # Strings contained in x must be valid UTF-8.
         
     | 
| 
      
 405 
     | 
    
         
            +
                def encode(x)
         
     | 
| 
      
 406 
     | 
    
         
            +
                  case x
         
     | 
| 
      
 407 
     | 
    
         
            +
                  when Hash    then objenc(x)
         
     | 
| 
      
 408 
     | 
    
         
            +
                  when Array   then arrenc(x)
         
     | 
| 
      
 409 
     | 
    
         
            +
                  else
         
     | 
| 
      
 410 
     | 
    
         
            +
                    raise Error, 'root value must be an Array or a Hash'
         
     | 
| 
      
 411 
     | 
    
         
            +
                  end
         
     | 
| 
      
 412 
     | 
    
         
            +
                end
         
     | 
| 
      
 413 
     | 
    
         
            +
             
     | 
| 
      
 414 
     | 
    
         
            +
             
     | 
| 
      
 415 
     | 
    
         
            +
                def valenc(x)
         
     | 
| 
      
 416 
     | 
    
         
            +
                  case x
         
     | 
| 
      
 417 
     | 
    
         
            +
                  when Hash    then objenc(x)
         
     | 
| 
      
 418 
     | 
    
         
            +
                  when Array   then arrenc(x)
         
     | 
| 
      
 419 
     | 
    
         
            +
                  when String  then strenc(x)
         
     | 
| 
      
 420 
     | 
    
         
            +
                  when Numeric then numenc(x)
         
     | 
| 
      
 421 
     | 
    
         
            +
                  when true    then "true"
         
     | 
| 
      
 422 
     | 
    
         
            +
                  when false   then "false"
         
     | 
| 
      
 423 
     | 
    
         
            +
                  when nil     then "null"
         
     | 
| 
      
 424 
     | 
    
         
            +
                  else
         
     | 
| 
      
 425 
     | 
    
         
            +
                    raise Error, "cannot encode #{x.class}: #{x.inspect}"
         
     | 
| 
      
 426 
     | 
    
         
            +
                  end
         
     | 
| 
      
 427 
     | 
    
         
            +
                end
         
     | 
| 
      
 428 
     | 
    
         
            +
             
     | 
| 
      
 429 
     | 
    
         
            +
             
     | 
| 
      
 430 
     | 
    
         
            +
                def objenc(x)
         
     | 
| 
      
 431 
     | 
    
         
            +
                  '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}'
         
     | 
| 
      
 432 
     | 
    
         
            +
                end
         
     | 
| 
      
 433 
     | 
    
         
            +
             
     | 
| 
      
 434 
     | 
    
         
            +
             
     | 
| 
      
 435 
     | 
    
         
            +
                def arrenc(a)
         
     | 
| 
      
 436 
     | 
    
         
            +
                  '[' + a.map{|x| valenc(x)}.join(',') + ']'
         
     | 
| 
      
 437 
     | 
    
         
            +
                end
         
     | 
| 
      
 438 
     | 
    
         
            +
             
     | 
| 
      
 439 
     | 
    
         
            +
             
     | 
| 
      
 440 
     | 
    
         
            +
                def keyenc(k)
         
     | 
| 
      
 441 
     | 
    
         
            +
                  case k
         
     | 
| 
      
 442 
     | 
    
         
            +
                  when String then strenc(k)
         
     | 
| 
      
 443 
     | 
    
         
            +
                  else
         
     | 
| 
      
 444 
     | 
    
         
            +
                    raise Error, "Hash key is not a string: #{k.inspect}"
         
     | 
| 
      
 445 
     | 
    
         
            +
                  end
         
     | 
| 
      
 446 
     | 
    
         
            +
                end
         
     | 
| 
      
 447 
     | 
    
         
            +
             
     | 
| 
      
 448 
     | 
    
         
            +
             
     | 
| 
      
 449 
     | 
    
         
            +
                def strenc(s)
         
     | 
| 
      
 450 
     | 
    
         
            +
                  t = StringIO.new
         
     | 
| 
      
 451 
     | 
    
         
            +
                  t.putc(?")
         
     | 
| 
      
 452 
     | 
    
         
            +
                  r = 0
         
     | 
| 
      
 453 
     | 
    
         
            +
             
     | 
| 
      
 454 
     | 
    
         
            +
                  # In ruby >= 1.9, s[r] is a codepoint, not a byte.
         
     | 
| 
      
 455 
     | 
    
         
            +
                  rubydoesenc = s.class.method_defined?(:encoding)
         
     | 
| 
      
 456 
     | 
    
         
            +
             
     | 
| 
      
 457 
     | 
    
         
            +
                  while r < s.length
         
     | 
| 
      
 458 
     | 
    
         
            +
                    case s[r]
         
     | 
| 
      
 459 
     | 
    
         
            +
                    when ?"  then t.print('\\"')
         
     | 
| 
      
 460 
     | 
    
         
            +
                    when ?\\ then t.print('\\\\')
         
     | 
| 
      
 461 
     | 
    
         
            +
                    when ?\b then t.print('\\b')
         
     | 
| 
      
 462 
     | 
    
         
            +
                    when ?\f then t.print('\\f')
         
     | 
| 
      
 463 
     | 
    
         
            +
                    when ?\n then t.print('\\n')
         
     | 
| 
      
 464 
     | 
    
         
            +
                    when ?\r then t.print('\\r')
         
     | 
| 
      
 465 
     | 
    
         
            +
                    when ?\t then t.print('\\t')
         
     | 
| 
      
 466 
     | 
    
         
            +
                    else
         
     | 
| 
      
 467 
     | 
    
         
            +
                      c = s[r]
         
     | 
| 
      
 468 
     | 
    
         
            +
                      case true
         
     | 
| 
      
 469 
     | 
    
         
            +
                      when rubydoesenc
         
     | 
| 
      
 470 
     | 
    
         
            +
                        begin
         
     | 
| 
      
 471 
     | 
    
         
            +
                          c.ord # will raise an error if c is invalid UTF-8
         
     | 
| 
      
 472 
     | 
    
         
            +
                          t.write(c)
         
     | 
| 
      
 473 
     | 
    
         
            +
                        rescue
         
     | 
| 
      
 474 
     | 
    
         
            +
                          t.write(Ustrerr)
         
     | 
| 
      
 475 
     | 
    
         
            +
                        end
         
     | 
| 
      
 476 
     | 
    
         
            +
                      when Spc <= c && c <= ?~
         
     | 
| 
      
 477 
     | 
    
         
            +
                        t.putc(c)
         
     | 
| 
      
 478 
     | 
    
         
            +
                      else
         
     | 
| 
      
 479 
     | 
    
         
            +
                        n = ucharcopy(t, s, r) # ensure valid UTF-8 output
         
     | 
| 
      
 480 
     | 
    
         
            +
                        r += n - 1 # r is incremented below
         
     | 
| 
      
 481 
     | 
    
         
            +
                      end
         
     | 
| 
      
 482 
     | 
    
         
            +
                    end
         
     | 
| 
      
 483 
     | 
    
         
            +
                    r += 1
         
     | 
| 
      
 484 
     | 
    
         
            +
                  end
         
     | 
| 
      
 485 
     | 
    
         
            +
                  t.putc(?")
         
     | 
| 
      
 486 
     | 
    
         
            +
                  t.string
         
     | 
| 
      
 487 
     | 
    
         
            +
                end
         
     | 
| 
      
 488 
     | 
    
         
            +
             
     | 
| 
      
 489 
     | 
    
         
            +
             
     | 
| 
      
 490 
     | 
    
         
            +
                def numenc(x)
         
     | 
| 
      
 491 
     | 
    
         
            +
                  if ((x.nan? || x.infinite?) rescue false)
         
     | 
| 
      
 492 
     | 
    
         
            +
                    raise Error, "Numeric cannot be represented: #{x}"
         
     | 
| 
      
 493 
     | 
    
         
            +
                  end
         
     | 
| 
      
 494 
     | 
    
         
            +
                  "#{x}"
         
     | 
| 
      
 495 
     | 
    
         
            +
                end
         
     | 
| 
      
 496 
     | 
    
         
            +
             
     | 
| 
      
 497 
     | 
    
         
            +
             
     | 
| 
      
 498 
     | 
    
         
            +
                # Copies the valid UTF-8 bytes of a single character
         
     | 
| 
      
 499 
     | 
    
         
            +
                # from string s at position i to I/O object t, and
         
     | 
| 
      
 500 
     | 
    
         
            +
                # returns the number of bytes copied.
         
     | 
| 
      
 501 
     | 
    
         
            +
                # If no valid UTF-8 char exists at position i,
         
     | 
| 
      
 502 
     | 
    
         
            +
                # ucharcopy writes Ustrerr and returns 1.
         
     | 
| 
      
 503 
     | 
    
         
            +
                def ucharcopy(t, s, i)
         
     | 
| 
      
 504 
     | 
    
         
            +
                  n = s.length - i
         
     | 
| 
      
 505 
     | 
    
         
            +
                  raise Utf8Error if n < 1
         
     | 
| 
      
 506 
     | 
    
         
            +
             
     | 
| 
      
 507 
     | 
    
         
            +
                  c0 = s[i].ord
         
     | 
| 
      
 508 
     | 
    
         
            +
             
     | 
| 
      
 509 
     | 
    
         
            +
                  # 1-byte, 7-bit sequence?
         
     | 
| 
      
 510 
     | 
    
         
            +
                  if c0 < Utagx
         
     | 
| 
      
 511 
     | 
    
         
            +
                    t.putc(c0)
         
     | 
| 
      
 512 
     | 
    
         
            +
                    return 1
         
     | 
| 
      
 513 
     | 
    
         
            +
                  end
         
     | 
| 
      
 514 
     | 
    
         
            +
             
     | 
| 
      
 515 
     | 
    
         
            +
                  raise Utf8Error if c0 < Utag2 # unexpected continuation byte?
         
     | 
| 
      
 516 
     | 
    
         
            +
             
     | 
| 
      
 517 
     | 
    
         
            +
                  raise Utf8Error if n < 2 # need continuation byte
         
     | 
| 
      
 518 
     | 
    
         
            +
                  c1 = s[i+1].ord
         
     | 
| 
      
 519 
     | 
    
         
            +
                  raise Utf8Error if c1 < Utagx || Utag2 <= c1
         
     | 
| 
      
 520 
     | 
    
         
            +
             
     | 
| 
      
 521 
     | 
    
         
            +
                  # 2-byte, 11-bit sequence?
         
     | 
| 
      
 522 
     | 
    
         
            +
                  if c0 < Utag3
         
     | 
| 
      
 523 
     | 
    
         
            +
                    raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max
         
     | 
| 
      
 524 
     | 
    
         
            +
                    t.putc(c0)
         
     | 
| 
      
 525 
     | 
    
         
            +
                    t.putc(c1)
         
     | 
| 
      
 526 
     | 
    
         
            +
                    return 2
         
     | 
| 
      
 527 
     | 
    
         
            +
                  end
         
     | 
| 
      
 528 
     | 
    
         
            +
             
     | 
| 
      
 529 
     | 
    
         
            +
                  # need second continuation byte
         
     | 
| 
      
 530 
     | 
    
         
            +
                  raise Utf8Error if n < 3
         
     | 
| 
      
 531 
     | 
    
         
            +
             
     | 
| 
      
 532 
     | 
    
         
            +
                  c2 = s[i+2].ord
         
     | 
| 
      
 533 
     | 
    
         
            +
                  raise Utf8Error if c2 < Utagx || Utag2 <= c2
         
     | 
| 
      
 534 
     | 
    
         
            +
             
     | 
| 
      
 535 
     | 
    
         
            +
                  # 3-byte, 16-bit sequence?
         
     | 
| 
      
 536 
     | 
    
         
            +
                  if c0 < Utag4
         
     | 
| 
      
 537 
     | 
    
         
            +
                    u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
         
     | 
| 
      
 538 
     | 
    
         
            +
                    raise Utf8Error if u <= Uchar2max
         
     | 
| 
      
 539 
     | 
    
         
            +
                    t.putc(c0)
         
     | 
| 
      
 540 
     | 
    
         
            +
                    t.putc(c1)
         
     | 
| 
      
 541 
     | 
    
         
            +
                    t.putc(c2)
         
     | 
| 
      
 542 
     | 
    
         
            +
                    return 3
         
     | 
| 
      
 543 
     | 
    
         
            +
                  end
         
     | 
| 
      
 544 
     | 
    
         
            +
             
     | 
| 
      
 545 
     | 
    
         
            +
                  # need third continuation byte
         
     | 
| 
      
 546 
     | 
    
         
            +
                  raise Utf8Error if n < 4
         
     | 
| 
      
 547 
     | 
    
         
            +
                  c3 = s[i+3].ord
         
     | 
| 
      
 548 
     | 
    
         
            +
                  raise Utf8Error if c3 < Utagx || Utag2 <= c3
         
     | 
| 
      
 549 
     | 
    
         
            +
             
     | 
| 
      
 550 
     | 
    
         
            +
                  # 4-byte, 21-bit sequence?
         
     | 
| 
      
 551 
     | 
    
         
            +
                  if c0 < Utag5
         
     | 
| 
      
 552 
     | 
    
         
            +
                    u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
         
     | 
| 
      
 553 
     | 
    
         
            +
                    raise Utf8Error if u <= Uchar3max
         
     | 
| 
      
 554 
     | 
    
         
            +
                    t.putc(c0)
         
     | 
| 
      
 555 
     | 
    
         
            +
                    t.putc(c1)
         
     | 
| 
      
 556 
     | 
    
         
            +
                    t.putc(c2)
         
     | 
| 
      
 557 
     | 
    
         
            +
                    t.putc(c3)
         
     | 
| 
      
 558 
     | 
    
         
            +
                    return 4
         
     | 
| 
      
 559 
     | 
    
         
            +
                  end
         
     | 
| 
      
 560 
     | 
    
         
            +
             
     | 
| 
      
 561 
     | 
    
         
            +
                  raise Utf8Error
         
     | 
| 
      
 562 
     | 
    
         
            +
                rescue Utf8Error
         
     | 
| 
      
 563 
     | 
    
         
            +
                  t.write(Ustrerr)
         
     | 
| 
      
 564 
     | 
    
         
            +
                  return 1
         
     | 
| 
      
 565 
     | 
    
         
            +
                end
         
     | 
| 
      
 566 
     | 
    
         
            +
             
     | 
| 
      
 567 
     | 
    
         
            +
             
     | 
| 
      
 568 
     | 
    
         
            +
                class Utf8Error < ::StandardError
         
     | 
| 
      
 569 
     | 
    
         
            +
                end
         
     | 
| 
      
 570 
     | 
    
         
            +
             
     | 
| 
      
 571 
     | 
    
         
            +
             
     | 
| 
      
 572 
     | 
    
         
            +
                class Error < ::StandardError
         
     | 
| 
      
 573 
     | 
    
         
            +
                end
         
     | 
| 
      
 574 
     | 
    
         
            +
             
     | 
| 
      
 575 
     | 
    
         
            +
             
     | 
| 
      
 576 
     | 
    
         
            +
                Utagx = 0x80 # 1000 0000
         
     | 
| 
      
 577 
     | 
    
         
            +
                Utag2 = 0xc0 # 1100 0000
         
     | 
| 
      
 578 
     | 
    
         
            +
                Utag3 = 0xe0 # 1110 0000
         
     | 
| 
      
 579 
     | 
    
         
            +
                Utag4 = 0xf0 # 1111 0000
         
     | 
| 
      
 580 
     | 
    
         
            +
                Utag5 = 0xF8 # 1111 1000
         
     | 
| 
      
 581 
     | 
    
         
            +
                Umaskx = 0x3f # 0011 1111
         
     | 
| 
      
 582 
     | 
    
         
            +
                Umask2 = 0x1f # 0001 1111
         
     | 
| 
      
 583 
     | 
    
         
            +
                Umask3 = 0x0f # 0000 1111
         
     | 
| 
      
 584 
     | 
    
         
            +
                Umask4 = 0x07 # 0000 0111
         
     | 
| 
      
 585 
     | 
    
         
            +
                Uchar1max = (1<<7) - 1
         
     | 
| 
      
 586 
     | 
    
         
            +
                Uchar2max = (1<<11) - 1
         
     | 
| 
      
 587 
     | 
    
         
            +
                Uchar3max = (1<<16) - 1
         
     | 
| 
      
 588 
     | 
    
         
            +
                Ucharerr = 0xFFFD # unicode "replacement char"
         
     | 
| 
      
 589 
     | 
    
         
            +
                Ustrerr = "\xef\xbf\xbd" # unicode "replacement char"
         
     | 
| 
      
 590 
     | 
    
         
            +
                Usurrself = 0x10000
         
     | 
| 
      
 591 
     | 
    
         
            +
                Usurr1 = 0xd800
         
     | 
| 
      
 592 
     | 
    
         
            +
                Usurr2 = 0xdc00
         
     | 
| 
      
 593 
     | 
    
         
            +
                Usurr3 = 0xe000
         
     | 
| 
      
 594 
     | 
    
         
            +
             
     | 
| 
      
 595 
     | 
    
         
            +
                Spc = ' '[0]
         
     | 
| 
      
 596 
     | 
    
         
            +
                Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
         
     | 
| 
      
 597 
     | 
    
         
            +
              end
         
     | 
| 
      
 598 
     | 
    
         
            +
            end
         
     |