pertinent_parser 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/pertinent_parser.rb +78 -0
- data/lib/pertinent_parser/rule.rb +79 -0
- data/lib/pertinent_parser/text.rb +27 -0
- data/lib/pertinent_parser/transform.rb +25 -0
- metadata +61 -0
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: 82c1ea5ccb406cf088548e021a771378c318e3d1
         | 
| 4 | 
            +
              data.tar.gz: 9b89619ed72dda0008192856ea343f624b0573b5
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: e3d3d6f55364d41ab21ebce1e48a9d2e2b84aaf737afb7b8390a599bbd206dff1ea72874772f9e5dadb340efd794b0b3c62877540915529ff142a406e839ae37
         | 
| 7 | 
            +
              data.tar.gz: 8238784a074f077775b6d8df61c792a77b5ff0f2e8f8b23c8418f3d4eecd5f7ad15edd4059374744240b8dac1445ac85090a5503e79a603fa1f0da5f9cd54073
         | 
| @@ -0,0 +1,78 @@ | |
| 1 | 
            +
            require "hpricot"
         | 
| 2 | 
            +
            require "pertinent_parser/transform"
         | 
| 3 | 
            +
            require "pertinent_parser/rule"
         | 
| 4 | 
            +
            require "pertinent_parser/text"
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class Hpricot::Elem
         | 
| 7 | 
            +
              def stag
         | 
| 8 | 
            +
                "<#{name}#{attributes_as_html}" +
         | 
| 9 | 
            +
                  ((empty? and not etag) ? " /" : "") +
         | 
| 10 | 
            +
                  ">"
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
            end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            module PertinentParser
         | 
| 15 | 
            +
              class << self
         | 
| 16 | 
            +
                # Better write our own traversal function so that we can screw with the HTML representation the way we like.
         | 
| 17 | 
            +
                def html(html)
         | 
| 18 | 
            +
                  doc = Hpricot(html)
         | 
| 19 | 
            +
                  d = 0
         | 
| 20 | 
            +
                  t = text(doc.inner_text)
         | 
| 21 | 
            +
                  doc.traverse_all_element do |elem|
         | 
| 22 | 
            +
                    if elem.text?
         | 
| 23 | 
            +
                      #puts elem.inner_text
         | 
| 24 | 
            +
                      d += elem.inner_text.size
         | 
| 25 | 
            +
                    else
         | 
| 26 | 
            +
                      #puts elem.stag
         | 
| 27 | 
            +
                      t + wrap_(d...d+elem.inner_text.size, elem.stag)
         | 
| 28 | 
            +
                      #puts "#{d}..#{d+elem.inner_text.size}"
         | 
| 29 | 
            +
                    end
         | 
| 30 | 
            +
                  end
         | 
| 31 | 
            +
                  t
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                def text(s)
         | 
| 35 | 
            +
                  r = Rule.new((0..s.size-1), Transform.new(:identity, ["id"]))
         | 
| 36 | 
            +
                  t = Text.new(s)
         | 
| 37 | 
            +
                  t.rule = r
         | 
| 38 | 
            +
                  t
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def offset_to_r(o)
         | 
| 42 | 
            +
                  (o[0]..o[1]-1)
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                def range_from_specification context, target, number
         | 
| 46 | 
            +
                  count, position = 0, 0
         | 
| 47 | 
            +
                  stored = []
         | 
| 48 | 
            +
                  re = Regexp.new(Regexp.escape(target))
         | 
| 49 | 
            +
                  while (match = context.match(re , position)) do
         | 
| 50 | 
            +
                    temp = match.offset 0
         | 
| 51 | 
            +
                    position += 1; count += 1 if temp != stored
         | 
| 52 | 
            +
                    return offset_to_r(temp) if count == number
         | 
| 53 | 
            +
                    stored = temp
         | 
| 54 | 
            +
                  end
         | 
| 55 | 
            +
                end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                def new_wrap(context, target, number, tag)
         | 
| 58 | 
            +
                  range = range_from_specification(context, target, number)
         | 
| 59 | 
            +
                  wrap_(range, tag)
         | 
| 60 | 
            +
                end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                def rule(range, transform)
         | 
| 63 | 
            +
                  Rule.new(range, transform)
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
             | 
| 67 | 
            +
                def wrap_(range, tag)
         | 
| 68 | 
            +
                  transform = Transform.new(:wrap, [tag, "</"+tag.match(/<(\S*)(\s|>)/)[1]+">" ])
         | 
| 69 | 
            +
                  r = Rule.new(range, transform)
         | 
| 70 | 
            +
                end
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                def new_replace(context, target, number, replacement)
         | 
| 73 | 
            +
                  range = range_from_specification(context, target, number)
         | 
| 74 | 
            +
                  transform = Transform.new(:replacement, replacement)
         | 
| 75 | 
            +
                  r = Rule.new(range, transform)
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
              end
         | 
| 78 | 
            +
            end
         | 
| @@ -0,0 +1,79 @@ | |
| 1 | 
            +
            class Rule
         | 
| 2 | 
            +
              attr_accessor :name, :children, :parent
         | 
| 3 | 
            +
              attr_accessor :transform
         | 
| 4 | 
            +
              attr_accessor :range
         | 
| 5 | 
            +
              def initialize(range, transform=nil, children=[], parent=nil)
         | 
| 6 | 
            +
                @range = range.to_a
         | 
| 7 | 
            +
                @children = children
         | 
| 8 | 
            +
                @parent = parent
         | 
| 9 | 
            +
                @transform = transform
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
              def <=>(r)
         | 
| 12 | 
            +
                range.first <=> r.range.first
         | 
| 13 | 
            +
              end
         | 
| 14 | 
            +
              def apply_recur(s, offset=0)
         | 
| 15 | 
            +
                pre = offset
         | 
| 16 | 
            +
                @children.each do |child|
         | 
| 17 | 
            +
                  offset += child.apply_recur(s, offset)
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
                # This was an optimization gone wrong. Sorry. Applies the transformation to the portion of the text.
         | 
| 20 | 
            +
                return (s[@range.first+pre..@range.last+offset] = @transform.apply(s[@range.first+pre..@range.last+offset])).size - range.size
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
              def apply(str)
         | 
| 23 | 
            +
                s = str.dup
         | 
| 24 | 
            +
                apply_recur(s)
         | 
| 25 | 
            +
                return s
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
              def +(text)
         | 
| 28 | 
            +
                add(text.rule)
         | 
| 29 | 
            +
                return text
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
              def add(new_rule)
         | 
| 32 | 
            +
                intersection = range & new_rule.range
         | 
| 33 | 
            +
                if intersection == new_rule.range
         | 
| 34 | 
            +
                  contain = []
         | 
| 35 | 
            +
                  input = new_rule
         | 
| 36 | 
            +
                  @children.each do |child|
         | 
| 37 | 
            +
                    result = child.add(input)
         | 
| 38 | 
            +
                    case result
         | 
| 39 | 
            +
                    when Rule        
         | 
| 40 | 
            +
                      input = result
         | 
| 41 | 
            +
                    when :inside     
         | 
| 42 | 
            +
                      return :inside
         | 
| 43 | 
            +
                    when :contain    
         | 
| 44 | 
            +
                      contain << child
         | 
| 45 | 
            +
                    when :outside
         | 
| 46 | 
            +
                    end
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
                  @children -= contain
         | 
| 49 | 
            +
                  contain.each do |child|
         | 
| 50 | 
            +
                    input.add child
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
                  @children << input
         | 
| 53 | 
            +
                  @children.sort!
         | 
| 54 | 
            +
                  return :inside
         | 
| 55 | 
            +
                elsif intersection.empty?
         | 
| 56 | 
            +
                  return :outside
         | 
| 57 | 
            +
                elsif intersection == range
         | 
| 58 | 
            +
                  if @parent.nil?
         | 
| 59 | 
            +
                    children = new_rule.children
         | 
| 60 | 
            +
                    new_rule.children = [self]
         | 
| 61 | 
            +
                    children.each do |child|
         | 
| 62 | 
            +
                      new_rule.add child
         | 
| 63 | 
            +
                    end
         | 
| 64 | 
            +
                    return new_rule
         | 
| 65 | 
            +
                  end
         | 
| 66 | 
            +
                  return :contain
         | 
| 67 | 
            +
                else
         | 
| 68 | 
            +
                  difference = new_rule.range - intersection
         | 
| 69 | 
            +
                  transforms = new_rule.transform.split(difference.size)
         | 
| 70 | 
            +
                  if intersection.first < difference.first
         | 
| 71 | 
            +
                    inter_tran, diff_tran = transforms
         | 
| 72 | 
            +
                  else
         | 
| 73 | 
            +
                    diff_tran, inter_tran = transforms
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
                  self.add(Rule.new(intersection, inter_tran))
         | 
| 76 | 
            +
                  return Rule.new(difference, diff_tran)
         | 
| 77 | 
            +
                end
         | 
| 78 | 
            +
              end
         | 
| 79 | 
            +
            end
         | 
| @@ -0,0 +1,27 @@ | |
| 1 | 
            +
            class Text < String
         | 
| 2 | 
            +
              attr_accessor :rule
         | 
| 3 | 
            +
             | 
| 4 | 
            +
              # Return the HTML after all rules are applied
         | 
| 5 | 
            +
              def apply
         | 
| 6 | 
            +
                @rule.apply(self)
         | 
| 7 | 
            +
              end
         | 
| 8 | 
            +
              
         | 
| 9 | 
            +
              undef +
         | 
| 10 | 
            +
                def +(new_rule)
         | 
| 11 | 
            +
                  @rule.add(new_rule)
         | 
| 12 | 
            +
              end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              # Wrap text, falling inside of existing boundaries
         | 
| 15 | 
            +
              def wrap_in(tag, target, number=1)
         | 
| 16 | 
            +
                self.+(PertinentParser.new_wrap(self, target, number, tag))
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              def replace(replacement, target, number=1)
         | 
| 20 | 
            +
                self.+(PertinentParser.new_replace(self, target, number, replacement))
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              # Wrap text, falling outside of existing boundaries
         | 
| 24 | 
            +
              def wrap_out(tag, target, number=1)
         | 
| 25 | 
            +
                PertinentParser.new_wrap(self, target, number, tag).+(self)
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
            end 
         | 
| @@ -0,0 +1,25 @@ | |
| 1 | 
            +
            class Transform
         | 
| 2 | 
            +
              attr_accessor :type, :property
         | 
| 3 | 
            +
             | 
| 4 | 
            +
              def initialize type, property
         | 
| 5 | 
            +
                @type, @property = type, property
         | 
| 6 | 
            +
              end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              def split(n)
         | 
| 9 | 
            +
                if @type == :replacement
         | 
| 10 | 
            +
                  return [Transform.new(:replacement, @property[0..n-1]), Transform.new(:replacement, @property[n..-1])]
         | 
| 11 | 
            +
                elsif @type == :wrap
         | 
| 12 | 
            +
                  return [self, self.dup]
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              def apply(s)
         | 
| 17 | 
            +
                if @type == :identity
         | 
| 18 | 
            +
                  return s
         | 
| 19 | 
            +
                elsif @type == :replacement
         | 
| 20 | 
            +
                  return @property
         | 
| 21 | 
            +
                elsif @type == :wrap
         | 
| 22 | 
            +
                  return @property[0] + s + @property[1]
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
            end   
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,61 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: pertinent_parser
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 0.0.0
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - Matthew Bunday
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
            date: 2014-04-28 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies:
         | 
| 13 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            +
              name: hpricot
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - '='
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: 0.8.6
         | 
| 20 | 
            +
              type: :runtime
         | 
| 21 | 
            +
              prerelease: false
         | 
| 22 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            +
                requirements:
         | 
| 24 | 
            +
                - - '='
         | 
| 25 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            +
                    version: 0.8.6
         | 
| 27 | 
            +
            description: PertinentParser helps you compose HTML tags across existing tag boundaries.
         | 
| 28 | 
            +
            email: mkbunday@gmail.com
         | 
| 29 | 
            +
            executables: []
         | 
| 30 | 
            +
            extensions: []
         | 
| 31 | 
            +
            extra_rdoc_files: []
         | 
| 32 | 
            +
            files:
         | 
| 33 | 
            +
            - lib/pertinent_parser.rb
         | 
| 34 | 
            +
            - lib/pertinent_parser/rule.rb
         | 
| 35 | 
            +
            - lib/pertinent_parser/text.rb
         | 
| 36 | 
            +
            - lib/pertinent_parser/transform.rb
         | 
| 37 | 
            +
            homepage: https://github.com/zencephalon/Pertinent_Parser
         | 
| 38 | 
            +
            licenses:
         | 
| 39 | 
            +
            - MIT
         | 
| 40 | 
            +
            metadata: {}
         | 
| 41 | 
            +
            post_install_message: 
         | 
| 42 | 
            +
            rdoc_options: []
         | 
| 43 | 
            +
            require_paths:
         | 
| 44 | 
            +
            - lib
         | 
| 45 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 46 | 
            +
              requirements:
         | 
| 47 | 
            +
              - - ">="
         | 
| 48 | 
            +
                - !ruby/object:Gem::Version
         | 
| 49 | 
            +
                  version: '0'
         | 
| 50 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
              requirements:
         | 
| 52 | 
            +
              - - ">="
         | 
| 53 | 
            +
                - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                  version: '0'
         | 
| 55 | 
            +
            requirements: []
         | 
| 56 | 
            +
            rubyforge_project: 
         | 
| 57 | 
            +
            rubygems_version: 2.2.2
         | 
| 58 | 
            +
            signing_key: 
         | 
| 59 | 
            +
            specification_version: 4
         | 
| 60 | 
            +
            summary: PertinentParser helps you compose HTML tags across existing tag boundaries.
         | 
| 61 | 
            +
            test_files: []
         |