rsyntaxtree 0.9.2 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.tags +171 -0
- data/Gemfile +2 -0
- data/README.md +0 -1
- data/Rakefile +7 -0
- data/bin/rsyntaxtree +38 -31
- data/fonts/OpenMoji-Black.ttf +0 -0
- data/fonts/OpenMoji-Color.ttf +0 -0
- data/lib/rsyntaxtree/base_graph.rb +262 -0
- data/lib/rsyntaxtree/element.rb +156 -25
- data/lib/rsyntaxtree/elementlist.rb +16 -13
- data/lib/rsyntaxtree/markup_parser.rb +208 -0
- data/lib/rsyntaxtree/string_parser.rb +187 -204
- data/lib/rsyntaxtree/svg_graph.rb +446 -299
- data/lib/rsyntaxtree/utils.rb +49 -6
- data/lib/rsyntaxtree/version.rb +1 -1
- data/lib/rsyntaxtree.rb +143 -161
- data/rsyntaxtree.gemspec +2 -0
- data/syntree.png +0 -0
- data/syntree.svg +24 -0
- data/test/markup_parser_test.rb +207 -0
- metadata +39 -11
- data/fonts/latinmodern-math.otf +0 -0
- data/fonts/lmroman10-bold.otf +0 -0
- data/fonts/lmroman10-bolditalic.otf +0 -0
- data/fonts/lmroman10-italic.otf +0 -0
- data/fonts/lmroman10-regular.otf +0 -0
- data/lib/rsyntaxtree/error_message.rb +0 -68
- data/lib/rsyntaxtree/graph.rb +0 -312
- data/lib/rsyntaxtree/tree_graph.rb +0 -327
| @@ -7,248 +7,231 @@ | |
| 7 7 | 
             
            #
         | 
| 8 8 | 
             
            # Parses a phrase into leafs and nodes and store the result in an element list
         | 
| 9 9 | 
             
            # (see element_list.rb)
         | 
| 10 | 
            -
            #
         | 
| 11 | 
            -
            # This file is part of RSyntaxTree, which is a ruby port of Andre Eisenbach's
         | 
| 12 | 
            -
            # excellent program phpSyntaxTree.
         | 
| 13 | 
            -
            #
         | 
| 14 10 | 
             
            # Copyright (c) 2007-2021 Yoichiro Hasebe <yohasebe@gmail.com>
         | 
| 15 | 
            -
            # Copyright (c) 2003-2004 Andre Eisenbach <andre@ironcreek.net>
         | 
| 16 11 |  | 
| 17 12 | 
             
            require 'elementlist'
         | 
| 18 13 | 
             
            require 'element'
         | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
            # | 
| 27 | 
            -
            # | 
| 28 | 
            -
             | 
| 29 | 
            -
            # | 
| 30 | 
            -
             | 
| 31 | 
            -
            #  | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
                @elist = ElementList.new # Initialize internal element list 
         | 
| 45 | 
            -
                @pos = 0 # Position in the sentence
         | 
| 46 | 
            -
                @id = 1 # ID for the next element
         | 
| 47 | 
            -
                @level = 0 # Level in the diagram
         | 
| 48 | 
            -
                @tncnt = Hash.new # Node type counts
         | 
| 49 | 
            -
              end
         | 
| 50 | 
            -
             | 
| 51 | 
            -
              # caution: quick and dirty solution      
         | 
| 52 | 
            -
              def valid?
         | 
| 53 | 
            -
                if(@data.length < 1)
         | 
| 54 | 
            -
                  return false
         | 
| 55 | 
            -
                end
         | 
| 56 | 
            -
             | 
| 57 | 
            -
                if /\[\s*\]/m =~ @data
         | 
| 58 | 
            -
                  return false  
         | 
| 59 | 
            -
                end
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                if /\[\_/ =~ @data
         | 
| 62 | 
            -
                  return false
         | 
| 63 | 
            -
                end
         | 
| 64 | 
            -
             | 
| 65 | 
            -
                text = @data.strip
         | 
| 66 | 
            -
                text_r = text.split(//)
         | 
| 67 | 
            -
                open_br, close_br = [], []
         | 
| 68 | 
            -
                escape = false
         | 
| 69 | 
            -
                text_r.each do |chr|
         | 
| 70 | 
            -
                  if chr == "\\"
         | 
| 71 | 
            -
                    escape = true
         | 
| 72 | 
            -
                  elsif chr == '[' && !escape
         | 
| 73 | 
            -
                    open_br.push(chr)
         | 
| 74 | 
            -
                  elsif chr == ']' && !escape
         | 
| 75 | 
            -
                    close_br.push(chr)
         | 
| 76 | 
            -
                    if open_br.length < close_br.length
         | 
| 77 | 
            -
                      break
         | 
| 14 | 
            +
            require 'utils'
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            module RSyntaxTree
         | 
| 17 | 
            +
              class StringParser
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                attr_accessor :data, :elist, :pos, :id, :level
         | 
| 20 | 
            +
                def initialize(str, fontset, fontsize)
         | 
| 21 | 
            +
                  # Clean up the data a little to make processing easier
         | 
| 22 | 
            +
                  # repeated newlines => a newline
         | 
| 23 | 
            +
                  string = str.gsub(/[\n\r]+/m, "\n")
         | 
| 24 | 
            +
                  # a backslash followed by a newline => a backslash followed by an 'n'
         | 
| 25 | 
            +
                  string.gsub!(/\\\n\s*/m, "\\n")
         | 
| 26 | 
            +
                  # repeated whitespace characters => " "
         | 
| 27 | 
            +
                  string.gsub!(/\s+/, " ")
         | 
| 28 | 
            +
                  string.gsub!(/\]\s+\[/, "][")
         | 
| 29 | 
            +
                  string.gsub!(/\s+\[/, "[")
         | 
| 30 | 
            +
                  string.gsub!(/\[\s+/, "[")
         | 
| 31 | 
            +
                  string.gsub!(/\s+\]/, "]")
         | 
| 32 | 
            +
                  string.gsub!(/\]\s+/, "]")
         | 
| 33 | 
            +
                  string.gsub!(/<(\d*)>/) do
         | 
| 34 | 
            +
                    num_padding = $1.to_i
         | 
| 35 | 
            +
                    if num_padding > 0
         | 
| 36 | 
            +
                      result = WHITESPACE_BLOCK * num_padding
         | 
| 37 | 
            +
                    else
         | 
| 38 | 
            +
                      result = WHITESPACE_BLOCK
         | 
| 78 39 | 
             
                    end
         | 
| 79 | 
            -
             | 
| 80 | 
            -
                    escape = false
         | 
| 40 | 
            +
                    result
         | 
| 81 41 | 
             
                  end
         | 
| 82 | 
            -
                end
         | 
| 83 | 
            -
             | 
| 84 | 
            -
                return false unless open_br.length == close_br.length
         | 
| 85 | 
            -
                # make_tree(0)
         | 
| 86 | 
            -
                # return false if @tncnt.empty?
         | 
| 87 | 
            -
                # @tncnt.each do |key, value|
         | 
| 88 | 
            -
                #   return false if key == ""
         | 
| 89 | 
            -
                # end
         | 
| 90 | 
            -
                return true
         | 
| 91 | 
            -
              end 
         | 
| 92 | 
            -
             | 
| 93 | 
            -
              def parse
         | 
| 94 | 
            -
                make_tree(0);
         | 
| 95 | 
            -
              end
         | 
| 96 42 |  | 
| 97 | 
            -
             | 
| 98 | 
            -
             | 
| 99 | 
            -
             | 
| 43 | 
            +
                  @data = string # Store it for later...
         | 
| 44 | 
            +
                  if @data.contains_cjk?
         | 
| 45 | 
            +
                    fontset[:normal] = fontset[:cjk]
         | 
| 46 | 
            +
                  end
         | 
| 47 | 
            +
                  @elist = ElementList.new # Initialize internal element list
         | 
| 48 | 
            +
                  @pos = 0 # Position in the sentence
         | 
| 49 | 
            +
                  @id = 1 # ID for the next element
         | 
| 50 | 
            +
                  @level = 0 # Level in the diagram
         | 
| 51 | 
            +
                  @fontset = fontset
         | 
| 52 | 
            +
                  @fontsize = fontsize
         | 
| 53 | 
            +
                end
         | 
| 100 54 |  | 
| 101 | 
            -
             | 
| 102 | 
            -
             | 
| 103 | 
            -
             | 
| 104 | 
            -
             | 
| 105 | 
            -
                  if(element.type == ETYPE_NODE)
         | 
| 106 | 
            -
                    count = 1
         | 
| 107 | 
            -
                    content = element.content
         | 
| 55 | 
            +
                def self.valid?(data)
         | 
| 56 | 
            +
                  if(data.length < 1)
         | 
| 57 | 
            +
                    raise RSTError, "Error: input text is empty"
         | 
| 58 | 
            +
                  end
         | 
| 108 59 |  | 
| 109 | 
            -
             | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 60 | 
            +
                  if /\[\s*\]/m =~ data
         | 
| 61 | 
            +
                    raise RSTError, "Error: inside the brackets is empty"
         | 
| 62 | 
            +
                  end
         | 
| 112 63 |  | 
| 113 | 
            -
             | 
| 114 | 
            -
             | 
| 115 | 
            -
             | 
| 64 | 
            +
                  text = data.strip
         | 
| 65 | 
            +
                  text_r = text.split(//)
         | 
| 66 | 
            +
                  open_br, close_br = [], []
         | 
| 67 | 
            +
                  escape = false
         | 
| 68 | 
            +
                  text_r.each do |chr|
         | 
| 69 | 
            +
                    if chr == "\\"
         | 
| 70 | 
            +
                      if escape
         | 
| 71 | 
            +
                        escape = false
         | 
| 116 72 | 
             
                      else
         | 
| 117 | 
            -
                         | 
| 73 | 
            +
                        escape = true
         | 
| 118 74 | 
             
                      end
         | 
| 75 | 
            +
                      next
         | 
| 76 | 
            +
                    end
         | 
| 119 77 |  | 
| 120 | 
            -
             | 
| 78 | 
            +
                    if escape && /[\[\]]/ =~ chr
         | 
| 79 | 
            +
                      escape = false
         | 
| 80 | 
            +
                      next
         | 
| 81 | 
            +
                    elsif chr == '['
         | 
| 82 | 
            +
                      open_br.push(chr)
         | 
| 83 | 
            +
                    elsif chr == ']'
         | 
| 84 | 
            +
                      close_br.push(chr)
         | 
| 85 | 
            +
                      if open_br.length < close_br.length
         | 
| 86 | 
            +
                        break
         | 
| 87 | 
            +
                      end
         | 
| 121 88 | 
             
                    end
         | 
| 89 | 
            +
                    escape = false
         | 
| 90 | 
            +
                  end
         | 
| 122 91 |  | 
| 92 | 
            +
                  if open_br.empty? && close_br.empty?
         | 
| 93 | 
            +
                    raise RSTError, "Error: input text does not contain paired brackets"
         | 
| 94 | 
            +
                  elsif open_br.length == close_br.length
         | 
| 95 | 
            +
                    return true
         | 
| 96 | 
            +
                  else
         | 
| 97 | 
            +
                    raise RSTError, "Error: open and close brackets do not match"
         | 
| 123 98 | 
             
                  end
         | 
| 124 | 
            -
                end  
         | 
| 125 | 
            -
                @tncnt
         | 
| 126 | 
            -
              end
         | 
| 127 | 
            -
             
         | 
| 128 | 
            -
              def count_node(name)
         | 
| 129 | 
            -
                name = name.strip
         | 
| 130 | 
            -
                if @tncnt[name]
         | 
| 131 | 
            -
                  @tncnt[name] += 1
         | 
| 132 | 
            -
                else
         | 
| 133 | 
            -
                  @tncnt[name] = 1
         | 
| 134 99 | 
             
                end
         | 
| 135 | 
            -
              end
         | 
| 136 100 |  | 
| 137 | 
            -
             | 
| 138 | 
            -
             | 
| 139 | 
            -
             | 
| 140 | 
            -
                 | 
| 141 | 
            -
                i = 0
         | 
| 101 | 
            +
                def parse
         | 
| 102 | 
            +
                  make_tree(0);
         | 
| 103 | 
            +
                  @elist.set_hierarchy
         | 
| 104 | 
            +
                end
         | 
| 142 105 |  | 
| 143 | 
            -
                 | 
| 144 | 
            -
                   | 
| 106 | 
            +
                def get_elementlist
         | 
| 107 | 
            +
                  @elist;
         | 
| 145 108 | 
             
                end
         | 
| 146 109 |  | 
| 147 | 
            -
                 | 
| 148 | 
            -
             | 
| 149 | 
            -
                   | 
| 150 | 
            -
                   | 
| 151 | 
            -
                   | 
| 152 | 
            -
             | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 110 | 
            +
                def get_next_token
         | 
| 111 | 
            +
                  data = @data.split(//)
         | 
| 112 | 
            +
                  gottoken = false
         | 
| 113 | 
            +
                  token = ""
         | 
| 114 | 
            +
                  i = 0
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                  if((@pos + 1) >= data.length)
         | 
| 117 | 
            +
                    return ""
         | 
| 118 | 
            +
                  end
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                  escape = false
         | 
| 121 | 
            +
                  while(((@pos + i) < data.length) && !gottoken)
         | 
| 122 | 
            +
                    ch = data[@pos + i];
         | 
| 123 | 
            +
                    case ch
         | 
| 124 | 
            +
                    when "["
         | 
| 125 | 
            +
                      if escape
         | 
| 126 | 
            +
                        token += ch
         | 
| 127 | 
            +
                        escape = false
         | 
| 128 | 
            +
                      else
         | 
| 129 | 
            +
                        if(i > 0)
         | 
| 130 | 
            +
                          gottoken = true
         | 
| 131 | 
            +
                        else
         | 
| 132 | 
            +
                          token += ch
         | 
| 133 | 
            +
                        end
         | 
| 134 | 
            +
                      end
         | 
| 135 | 
            +
                    when "]"
         | 
| 136 | 
            +
                      if escape
         | 
| 137 | 
            +
                        token += ch
         | 
| 138 | 
            +
                        escape = false
         | 
| 139 | 
            +
                      else
         | 
| 140 | 
            +
                        if(i == 0 )
         | 
| 141 | 
            +
                          token += ch
         | 
| 142 | 
            +
                        end
         | 
| 157 143 | 
             
                        gottoken = true
         | 
| 144 | 
            +
                      end
         | 
| 145 | 
            +
                    when "\\"
         | 
| 146 | 
            +
                      if escape
         | 
| 147 | 
            +
                        token += '\\\\'
         | 
| 148 | 
            +
                        escape = false
         | 
| 149 | 
            +
                      else
         | 
| 150 | 
            +
                        escape = true
         | 
| 151 | 
            +
                      end
         | 
| 152 | 
            +
                    when " "
         | 
| 153 | 
            +
                      if escape
         | 
| 154 | 
            +
                        token += '\\n'
         | 
| 155 | 
            +
                        escape = false
         | 
| 158 156 | 
             
                      else
         | 
| 159 157 | 
             
                        token += ch
         | 
| 160 158 | 
             
                      end
         | 
| 161 | 
            -
                     | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
                       | 
| 166 | 
            -
                    else
         | 
| 167 | 
            -
                      if(i == 0 )
         | 
| 159 | 
            +
                    when /[n{}<>^+*_=~\|\-]/
         | 
| 160 | 
            +
                      if escape
         | 
| 161 | 
            +
                        token += '\\' + ch
         | 
| 162 | 
            +
                        escape = false
         | 
| 163 | 
            +
                      else
         | 
| 168 164 | 
             
                        token += ch
         | 
| 169 165 | 
             
                      end
         | 
| 170 | 
            -
                      gottoken = true
         | 
| 171 | 
            -
                    end
         | 
| 172 | 
            -
                  when "\\"
         | 
| 173 | 
            -
                    escape = true
         | 
| 174 | 
            -
                  when "n", " ", "+", "-", "=", "~", "#", "*"
         | 
| 175 | 
            -
                    if escape
         | 
| 176 | 
            -
                      token += "\\#{ch}"
         | 
| 177 | 
            -
                      escape = false
         | 
| 178 166 | 
             
                    else
         | 
| 179 | 
            -
                       | 
| 167 | 
            +
                      if escape
         | 
| 168 | 
            +
                        token += ch
         | 
| 169 | 
            +
                        escape = false
         | 
| 170 | 
            +
                      else
         | 
| 171 | 
            +
                        token += ch
         | 
| 172 | 
            +
                      end
         | 
| 180 173 | 
             
                    end
         | 
| 181 | 
            -
             | 
| 182 | 
            -
                  #   gottoken = false # same as do nothing
         | 
| 183 | 
            -
                  else
         | 
| 184 | 
            -
                    token += ch
         | 
| 185 | 
            -
                    escape = false if escape
         | 
| 174 | 
            +
                    i += 1
         | 
| 186 175 | 
             
                  end
         | 
| 187 | 
            -
                  i += 1
         | 
| 188 | 
            -
                end
         | 
| 189 176 |  | 
| 190 | 
            -
             | 
| 191 | 
            -
             | 
| 192 | 
            -
             | 
| 193 | 
            -
             | 
| 177 | 
            +
                  if(i > 1)
         | 
| 178 | 
            +
                    @pos += (i - 1)
         | 
| 179 | 
            +
                  else
         | 
| 180 | 
            +
                    @pos += 1
         | 
| 181 | 
            +
                  end
         | 
| 182 | 
            +
                  return token
         | 
| 194 183 | 
             
                end
         | 
| 195 | 
            -
                return token
         | 
| 196 | 
            -
              end
         | 
| 197 184 |  | 
| 198 | 
            -
             | 
| 199 | 
            -
             | 
| 200 | 
            -
             | 
| 201 | 
            -
             | 
| 202 | 
            -
             | 
| 203 | 
            -
             | 
| 204 | 
            -
             | 
| 205 | 
            -
             | 
| 206 | 
            -
             | 
| 207 | 
            -
             | 
| 208 | 
            -
             | 
| 209 | 
            -
             | 
| 210 | 
            -
             | 
| 211 | 
            -
             | 
| 212 | 
            -
             | 
| 213 | 
            -
             | 
| 214 | 
            -
             | 
| 215 | 
            -
             | 
| 216 | 
            -
             | 
| 217 | 
            -
             | 
| 218 | 
            -
             | 
| 219 | 
            -
             | 
| 220 | 
            -
             | 
| 221 | 
            -
             | 
| 222 | 
            -
             | 
| 223 | 
            -
             | 
| 224 | 
            -
             | 
| 225 | 
            -
                       | 
| 226 | 
            -
             | 
| 227 | 
            -
             | 
| 228 | 
            -
             | 
| 229 | 
            -
             | 
| 230 | 
            -
             | 
| 231 | 
            -
                       | 
| 232 | 
            -
                      newparent = element.id
         | 
| 233 | 
            -
                      @elist.add(element)
         | 
| 234 | 
            -
                      count_node(joined)
         | 
| 235 | 
            -
                    end 
         | 
| 185 | 
            +
                def make_tree(parent)
         | 
| 186 | 
            +
                  token = get_next_token.strip
         | 
| 187 | 
            +
                  parts = Array.new
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                  while(token != "" && token != "]" )
         | 
| 190 | 
            +
                    token_r = token.split(//)
         | 
| 191 | 
            +
                    case token_r[0]
         | 
| 192 | 
            +
                    when "["
         | 
| 193 | 
            +
                      tl = token_r.length
         | 
| 194 | 
            +
                      token_r = token_r[1, tl - 1]
         | 
| 195 | 
            +
                      spaceat = token_r.index(" ")
         | 
| 196 | 
            +
                      newparent  = -1
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                      if spaceat
         | 
| 199 | 
            +
                        parts[0] = token_r[0, spaceat].join
         | 
| 200 | 
            +
             | 
| 201 | 
            +
                        tl =token_r.length
         | 
| 202 | 
            +
                        parts[1] = token_r[spaceat, tl - spaceat].join
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                        element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize)
         | 
| 205 | 
            +
                        @id += 1
         | 
| 206 | 
            +
                        @elist.add(element)
         | 
| 207 | 
            +
                        newparent = element.id
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                        element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize)
         | 
| 210 | 
            +
                        @id += 1
         | 
| 211 | 
            +
                        @elist.add(element)
         | 
| 212 | 
            +
                      else
         | 
| 213 | 
            +
                        joined = token_r.join
         | 
| 214 | 
            +
                        element = Element.new(@id, parent, joined, @level, @fontset,  @fontsize)
         | 
| 215 | 
            +
                        @id += 1
         | 
| 216 | 
            +
                        newparent = element.id
         | 
| 217 | 
            +
                        @elist.add(element)
         | 
| 218 | 
            +
                      end
         | 
| 236 219 |  | 
| 237 | 
            -
             | 
| 238 | 
            -
             | 
| 220 | 
            +
                      @level += 1
         | 
| 221 | 
            +
                      make_tree(newparent)
         | 
| 239 222 |  | 
| 240 | 
            -
             | 
| 241 | 
            -
             | 
| 242 | 
            -
             | 
| 243 | 
            -
             | 
| 244 | 
            -
             | 
| 245 | 
            -
                       | 
| 223 | 
            +
                    else
         | 
| 224 | 
            +
                      if token.strip != ""
         | 
| 225 | 
            +
                        element = Element.new(@id, parent, token, @level, @fontset, @fontsize)
         | 
| 226 | 
            +
                        @id += 1
         | 
| 227 | 
            +
                        @elist.add(element)
         | 
| 228 | 
            +
                      end
         | 
| 246 229 | 
             
                    end
         | 
| 247 | 
            -
                  end
         | 
| 248 230 |  | 
| 249 | 
            -
             | 
| 231 | 
            +
                    token = get_next_token
         | 
| 232 | 
            +
                  end
         | 
| 233 | 
            +
                  @level -= 1
         | 
| 250 234 | 
             
                end
         | 
| 251 | 
            -
                @level -= 1
         | 
| 252 235 | 
             
              end
         | 
| 253 236 | 
             
            end
         | 
| 254 237 |  |