rLexer 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/lib/rLexer/tags.rb +12 -0
 - data/lib/rLexer/tokenizer.rb +150 -0
 - data/lib/rLexer.rb +1 -0
 - metadata +46 -0
 
    
        checksums.yaml
    ADDED
    
    | 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ---
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA256:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 9e539be93197746639346f23cbe8f1ef3ebf919be3936970071c0514cacbf311
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: abeab5b45dafbca62bacc24b02b6a4b262d84d02cbd4965c8f8bc93b0ce4b281
         
     | 
| 
      
 5 
     | 
    
         
            +
            SHA512:
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 0152746babc3149ebc2f6efee3ec20f9753c0567a04e695b63d2326ab0318f7d32e0cb3a77f0bb341a7540210fcb57c5537434e96f7f054a7bd504eac393dc03
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: f5912634d0b97eafe9924bb5f50a0c1f72587f0e159999ceb0def40ef5a5488acf71373b8f866bb3febf68422e89ced67eab9e4daaeca2e85986fb673a75eddb
         
     | 
    
        data/lib/rLexer/tags.rb
    ADDED
    
    
| 
         @@ -0,0 +1,150 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require_relative 'tags'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            	class Tokenizer
         
     | 
| 
      
 4 
     | 
    
         
            +
            		attr_accessor :html, :type, :Tags
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            		def initialize(html)
         
     | 
| 
      
 7 
     | 
    
         
            +
            			@html = html.gsub('"', '\'')
         
     | 
| 
      
 8 
     | 
    
         
            +
            			@type = :EOF
         
     | 
| 
      
 9 
     | 
    
         
            +
            			@tokens = []
         
     | 
| 
      
 10 
     | 
    
         
            +
            		end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            		def tokenize
         
     | 
| 
      
 13 
     | 
    
         
            +
            			@html.each_char.with_index do |ch, idx|
         
     | 
| 
      
 14 
     | 
    
         
            +
            				comment_end(idx)
         
     | 
| 
      
 15 
     | 
    
         
            +
            				next if @type == :COMMENT
         
     | 
| 
      
 16 
     | 
    
         
            +
            				if open_tag?(ch) or close_tag?(ch)
         
     | 
| 
      
 17 
     | 
    
         
            +
            					process(idx)
         
     | 
| 
      
 18 
     | 
    
         
            +
            				end
         
     | 
| 
      
 19 
     | 
    
         
            +
            			end
         
     | 
| 
      
 20 
     | 
    
         
            +
            			consume_attributes
         
     | 
| 
      
 21 
     | 
    
         
            +
            		end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            		def process(idx)
         
     | 
| 
      
 24 
     | 
    
         
            +
            			set_type(idx); consume(idx)
         
     | 
| 
      
 25 
     | 
    
         
            +
            		end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            		def consume(idx)
         
     | 
| 
      
 28 
     | 
    
         
            +
            			if @type == :COMMENT
         
     | 
| 
      
 29 
     | 
    
         
            +
            				consume_comment(idx)
         
     | 
| 
      
 30 
     | 
    
         
            +
            			elsif @type == :OPEN or @type == :CLOSE
         
     | 
| 
      
 31 
     | 
    
         
            +
            				consume_tag(idx)
         
     | 
| 
      
 32 
     | 
    
         
            +
            			elsif @type == :DOCTYPE
         
     | 
| 
      
 33 
     | 
    
         
            +
            				#consume_doctype(idx)
         
     | 
| 
      
 34 
     | 
    
         
            +
            			elsif @type == :DATA
         
     | 
| 
      
 35 
     | 
    
         
            +
            				consume_data(idx)
         
     | 
| 
      
 36 
     | 
    
         
            +
            			end
         
     | 
| 
      
 37 
     | 
    
         
            +
            		end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            		def set_type(idx)
         
     | 
| 
      
 40 
     | 
    
         
            +
            			if comment_start?(idx)
         
     | 
| 
      
 41 
     | 
    
         
            +
            				@type = :COMMENT
         
     | 
| 
      
 42 
     | 
    
         
            +
            			elsif end_tag?(idx)
         
     | 
| 
      
 43 
     | 
    
         
            +
            				@type = :CLOSE
         
     | 
| 
      
 44 
     | 
    
         
            +
            			elsif doctype?(idx)
         
     | 
| 
      
 45 
     | 
    
         
            +
            				@type = :DOCTYPE
         
     | 
| 
      
 46 
     | 
    
         
            +
            			elsif close_tag?(current_char(idx)) or comment_end?(idx)
         
     | 
| 
      
 47 
     | 
    
         
            +
            				@type = :DATA
         
     | 
| 
      
 48 
     | 
    
         
            +
            			elsif open_tag?(current_char(idx))
         
     | 
| 
      
 49 
     | 
    
         
            +
            				@type = :OPEN
         
     | 
| 
      
 50 
     | 
    
         
            +
            			end
         
     | 
| 
      
 51 
     | 
    
         
            +
            		end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            		def set_token(slice)
         
     | 
| 
      
 54 
     | 
    
         
            +
            			@tokens.push([@type, slice])
         
     | 
| 
      
 55 
     | 
    
         
            +
            		end
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
            		def consume_comment(idx)
         
     | 
| 
      
 58 
     | 
    
         
            +
            			slice = @html[idx..-1]
         
     | 
| 
      
 59 
     | 
    
         
            +
            			slice = slice[Tags::START_COMMENT.length..end_comment_index(slice)]
         
     | 
| 
      
 60 
     | 
    
         
            +
            			set_token(slice)
         
     | 
| 
      
 61 
     | 
    
         
            +
            		end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
            		def consume_tag(idx)
         
     | 
| 
      
 64 
     | 
    
         
            +
            			slice = @html[idx..-1]
         
     | 
| 
      
 65 
     | 
    
         
            +
            			slice = slice[tag_index(slice)..slice.index(Tags::CLOSE_TAG) -1]
         
     | 
| 
      
 66 
     | 
    
         
            +
            			set_token(slice)
         
     | 
| 
      
 67 
     | 
    
         
            +
            		end
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
            		def consume_attributes
         
     | 
| 
      
 70 
     | 
    
         
            +
            			atts_new = []
         
     | 
| 
      
 71 
     | 
    
         
            +
            			@tokens.each.with_index do |token, i|
         
     | 
| 
      
 72 
     | 
    
         
            +
            				atts = token[1].split(' ')[1..-1]
         
     | 
| 
      
 73 
     | 
    
         
            +
            				if token[0] == :OPEN and !atts[0].nil?
         
     | 
| 
      
 74 
     | 
    
         
            +
            					atts_new.push([i, atts.join(' ').split("' ")])
         
     | 
| 
      
 75 
     | 
    
         
            +
            				end
         
     | 
| 
      
 76 
     | 
    
         
            +
            				@tokens[i][1] = @tokens[i][1].split(' ')[0] unless @tokens[i][0] == :COMMENT or @tokens[i][0] == :DATA
         
     | 
| 
      
 77 
     | 
    
         
            +
            			end
         
     | 
| 
      
 78 
     | 
    
         
            +
            			c = 1
         
     | 
| 
      
 79 
     | 
    
         
            +
            			atts_new.each.with_index do |x|
         
     | 
| 
      
 80 
     | 
    
         
            +
            				@tokens.insert(x[0] + c, [:ATTRIBUTES, x[1]])
         
     | 
| 
      
 81 
     | 
    
         
            +
            				c += 1
         
     | 
| 
      
 82 
     | 
    
         
            +
            			end
         
     | 
| 
      
 83 
     | 
    
         
            +
            		end
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
            		def consume_data(idx)
         
     | 
| 
      
 86 
     | 
    
         
            +
            			return if next_char?(idx)
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
            			slice = @html[idx..-1]
         
     | 
| 
      
 89 
     | 
    
         
            +
            			slice = slice[Tags::CLOSE_TAG.length..slice.index(Tags::OPEN_TAG) -1]
         
     | 
| 
      
 90 
     | 
    
         
            +
            			slice.strip!
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
            			set_token(slice) unless slice == ''
         
     | 
| 
      
 93 
     | 
    
         
            +
            		end
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
            		def current_char(idx)
         
     | 
| 
      
 96 
     | 
    
         
            +
            			@html[idx]
         
     | 
| 
      
 97 
     | 
    
         
            +
            		end
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
            		def end_comment_index(html)
         
     | 
| 
      
 100 
     | 
    
         
            +
            			idx = html.index(Tags::END_COMMENT)
         
     | 
| 
      
 101 
     | 
    
         
            +
            			(not idx.nil?) ? (idx + 2) - Tags::END_COMMENT.length : -1
         
     | 
| 
      
 102 
     | 
    
         
            +
            		end
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
            		def tag_index(html)
         
     | 
| 
      
 105 
     | 
    
         
            +
            			(@type == :OPEN) ? Tags::OPEN_TAG.length : Tags::CLOSING_TAG.length
         
     | 
| 
      
 106 
     | 
    
         
            +
            		end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
            		def comment_end(idx)
         
     | 
| 
      
 109 
     | 
    
         
            +
            			return if not @type == :COMMENT
         
     | 
| 
      
 110 
     | 
    
         
            +
            			if comment_end?(idx)
         
     | 
| 
      
 111 
     | 
    
         
            +
            				set_type(idx)
         
     | 
| 
      
 112 
     | 
    
         
            +
            			end
         
     | 
| 
      
 113 
     | 
    
         
            +
            		end
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
            		def comment_end?(idx)
         
     | 
| 
      
 116 
     | 
    
         
            +
            			suitable?(idx, Tags::END_COMMENT)
         
     | 
| 
      
 117 
     | 
    
         
            +
            		end
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
            		def next_char?(idx)
         
     | 
| 
      
 120 
     | 
    
         
            +
            			@html[idx +1] == Tags::OPEN_TAG or @html[idx +1].nil?
         
     | 
| 
      
 121 
     | 
    
         
            +
            		end
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
            		def end_tag?(idx)
         
     | 
| 
      
 124 
     | 
    
         
            +
            			suitable?(idx, Tags::CLOSING_TAG)
         
     | 
| 
      
 125 
     | 
    
         
            +
            		end
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
            		def doctype?(idx)
         
     | 
| 
      
 128 
     | 
    
         
            +
            			false
         
     | 
| 
      
 129 
     | 
    
         
            +
            		end
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
            		def comment_start?(idx)
         
     | 
| 
      
 132 
     | 
    
         
            +
            			suitable?(idx, Tags::START_COMMENT)
         
     | 
| 
      
 133 
     | 
    
         
            +
            		end
         
     | 
| 
      
 134 
     | 
    
         
            +
             
     | 
| 
      
 135 
     | 
    
         
            +
            		def suitable?(idx, tag)
         
     | 
| 
      
 136 
     | 
    
         
            +
            			tag == @html.byteslice(idx, tag.length)
         
     | 
| 
      
 137 
     | 
    
         
            +
            		end
         
     | 
| 
      
 138 
     | 
    
         
            +
             
     | 
| 
      
 139 
     | 
    
         
            +
            		def open_tag?(char)
         
     | 
| 
      
 140 
     | 
    
         
            +
            			char == Tags::OPEN_TAG
         
     | 
| 
      
 141 
     | 
    
         
            +
            		end
         
     | 
| 
      
 142 
     | 
    
         
            +
             
     | 
| 
      
 143 
     | 
    
         
            +
            		def close_tag?(char)
         
     | 
| 
      
 144 
     | 
    
         
            +
            			char == Tags::CLOSE_TAG
         
     | 
| 
      
 145 
     | 
    
         
            +
            		end
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
            		def tokens
         
     | 
| 
      
 148 
     | 
    
         
            +
            			@tokens
         
     | 
| 
      
 149 
     | 
    
         
            +
            		end
         
     | 
| 
      
 150 
     | 
    
         
            +
            	end
         
     | 
    
        data/lib/rLexer.rb
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require_relative 'rLexer/tokenizer'
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: rLexer
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.12
         
     | 
| 
      
 5 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 6 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 7 
     | 
    
         
            +
            - Robert Holland
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 9 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 10 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2020-09-07 00:00:00.000000000 Z
         
     | 
| 
      
 12 
     | 
    
         
            +
            dependencies: []
         
     | 
| 
      
 13 
     | 
    
         
            +
            description: 
         
     | 
| 
      
 14 
     | 
    
         
            +
            email:
         
     | 
| 
      
 15 
     | 
    
         
            +
            - rlexerdevelopment@gmail.com
         
     | 
| 
      
 16 
     | 
    
         
            +
            executables: []
         
     | 
| 
      
 17 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 18 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 19 
     | 
    
         
            +
            files:
         
     | 
| 
      
 20 
     | 
    
         
            +
            - lib/rLexer.rb
         
     | 
| 
      
 21 
     | 
    
         
            +
            - lib/rLexer/tags.rb
         
     | 
| 
      
 22 
     | 
    
         
            +
            - lib/rLexer/tokenizer.rb
         
     | 
| 
      
 23 
     | 
    
         
            +
            homepage: https://github.com/whollandr94/rLexer
         
     | 
| 
      
 24 
     | 
    
         
            +
            licenses:
         
     | 
| 
      
 25 
     | 
    
         
            +
            - MIT
         
     | 
| 
      
 26 
     | 
    
         
            +
            metadata: {}
         
     | 
| 
      
 27 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 28 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 29 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 30 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 31 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 32 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 33 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 34 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 35 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 36 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 37 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 38 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 39 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 40 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 41 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 42 
     | 
    
         
            +
            rubygems_version: 3.1.2
         
     | 
| 
      
 43 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 44 
     | 
    
         
            +
            specification_version: 4
         
     | 
| 
      
 45 
     | 
    
         
            +
            summary: A simple HTML lexer/tokenizer written in Ruby.
         
     | 
| 
      
 46 
     | 
    
         
            +
            test_files: []
         
     |