pertinent_parser 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 82c1ea5ccb406cf088548e021a771378c318e3d1
4
+ data.tar.gz: 9b89619ed72dda0008192856ea343f624b0573b5
5
+ SHA512:
6
+ metadata.gz: e3d3d6f55364d41ab21ebce1e48a9d2e2b84aaf737afb7b8390a599bbd206dff1ea72874772f9e5dadb340efd794b0b3c62877540915529ff142a406e839ae37
7
+ data.tar.gz: 8238784a074f077775b6d8df61c792a77b5ff0f2e8f8b23c8418f3d4eecd5f7ad15edd4059374744240b8dac1445ac85090a5503e79a603fa1f0da5f9cd54073
@@ -0,0 +1,78 @@
1
+ require "hpricot"
2
+ require "pertinent_parser/transform"
3
+ require "pertinent_parser/rule"
4
+ require "pertinent_parser/text"
5
+
6
+ class Hpricot::Elem
7
+ def stag
8
+ "<#{name}#{attributes_as_html}" +
9
+ ((empty? and not etag) ? " /" : "") +
10
+ ">"
11
+ end
12
+ end
13
+
14
+ module PertinentParser
15
+ class << self
16
+ # Better write our own traversal function so that we can screw with the HTML representation the way we like.
17
+ def html(html)
18
+ doc = Hpricot(html)
19
+ d = 0
20
+ t = text(doc.inner_text)
21
+ doc.traverse_all_element do |elem|
22
+ if elem.text?
23
+ #puts elem.inner_text
24
+ d += elem.inner_text.size
25
+ else
26
+ #puts elem.stag
27
+ t + wrap_(d...d+elem.inner_text.size, elem.stag)
28
+ #puts "#{d}..#{d+elem.inner_text.size}"
29
+ end
30
+ end
31
+ t
32
+ end
33
+
34
+ def text(s)
35
+ r = Rule.new((0..s.size-1), Transform.new(:identity, ["id"]))
36
+ t = Text.new(s)
37
+ t.rule = r
38
+ t
39
+ end
40
+
41
+ def offset_to_r(o)
42
+ (o[0]..o[1]-1)
43
+ end
44
+
45
+ def range_from_specification context, target, number
46
+ count, position = 0, 0
47
+ stored = []
48
+ re = Regexp.new(Regexp.escape(target))
49
+ while (match = context.match(re , position)) do
50
+ temp = match.offset 0
51
+ position += 1; count += 1 if temp != stored
52
+ return offset_to_r(temp) if count == number
53
+ stored = temp
54
+ end
55
+ end
56
+
57
+ def new_wrap(context, target, number, tag)
58
+ range = range_from_specification(context, target, number)
59
+ wrap_(range, tag)
60
+ end
61
+
62
+ def rule(range, transform)
63
+ Rule.new(range, transform)
64
+ end
65
+
66
+
67
+ def wrap_(range, tag)
68
+ transform = Transform.new(:wrap, [tag, "</"+tag.match(/<(\S*)(\s|>)/)[1]+">" ])
69
+ r = Rule.new(range, transform)
70
+ end
71
+
72
+ def new_replace(context, target, number, replacement)
73
+ range = range_from_specification(context, target, number)
74
+ transform = Transform.new(:replacement, replacement)
75
+ r = Rule.new(range, transform)
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,79 @@
1
+ class Rule
2
+ attr_accessor :name, :children, :parent
3
+ attr_accessor :transform
4
+ attr_accessor :range
5
+ def initialize(range, transform=nil, children=[], parent=nil)
6
+ @range = range.to_a
7
+ @children = children
8
+ @parent = parent
9
+ @transform = transform
10
+ end
11
+ def <=>(r)
12
+ range.first <=> r.range.first
13
+ end
14
+ def apply_recur(s, offset=0)
15
+ pre = offset
16
+ @children.each do |child|
17
+ offset += child.apply_recur(s, offset)
18
+ end
19
+ # This was an optimization gone wrong. Sorry. Applies the transformation to the portion of the text.
20
+ return (s[@range.first+pre..@range.last+offset] = @transform.apply(s[@range.first+pre..@range.last+offset])).size - range.size
21
+ end
22
+ def apply(str)
23
+ s = str.dup
24
+ apply_recur(s)
25
+ return s
26
+ end
27
+ def +(text)
28
+ add(text.rule)
29
+ return text
30
+ end
31
+ def add(new_rule)
32
+ intersection = range & new_rule.range
33
+ if intersection == new_rule.range
34
+ contain = []
35
+ input = new_rule
36
+ @children.each do |child|
37
+ result = child.add(input)
38
+ case result
39
+ when Rule
40
+ input = result
41
+ when :inside
42
+ return :inside
43
+ when :contain
44
+ contain << child
45
+ when :outside
46
+ end
47
+ end
48
+ @children -= contain
49
+ contain.each do |child|
50
+ input.add child
51
+ end
52
+ @children << input
53
+ @children.sort!
54
+ return :inside
55
+ elsif intersection.empty?
56
+ return :outside
57
+ elsif intersection == range
58
+ if @parent.nil?
59
+ children = new_rule.children
60
+ new_rule.children = [self]
61
+ children.each do |child|
62
+ new_rule.add child
63
+ end
64
+ return new_rule
65
+ end
66
+ return :contain
67
+ else
68
+ difference = new_rule.range - intersection
69
+ transforms = new_rule.transform.split(difference.size)
70
+ if intersection.first < difference.first
71
+ inter_tran, diff_tran = transforms
72
+ else
73
+ diff_tran, inter_tran = transforms
74
+ end
75
+ self.add(Rule.new(intersection, inter_tran))
76
+ return Rule.new(difference, diff_tran)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,27 @@
1
+ class Text < String
2
+ attr_accessor :rule
3
+
4
+ # Return the HTML after all rules are applied
5
+ def apply
6
+ @rule.apply(self)
7
+ end
8
+
9
+ undef +
10
+ def +(new_rule)
11
+ @rule.add(new_rule)
12
+ end
13
+
14
+ # Wrap text, falling inside of existing boundaries
15
+ def wrap_in(tag, target, number=1)
16
+ self.+(PertinentParser.new_wrap(self, target, number, tag))
17
+ end
18
+
19
+ def replace(replacement, target, number=1)
20
+ self.+(PertinentParser.new_replace(self, target, number, replacement))
21
+ end
22
+
23
+ # Wrap text, falling outside of existing boundaries
24
+ def wrap_out(tag, target, number=1)
25
+ PertinentParser.new_wrap(self, target, number, tag).+(self)
26
+ end
27
+ end
@@ -0,0 +1,25 @@
1
+ class Transform
2
+ attr_accessor :type, :property
3
+
4
+ def initialize type, property
5
+ @type, @property = type, property
6
+ end
7
+
8
+ def split(n)
9
+ if @type == :replacement
10
+ return [Transform.new(:replacement, @property[0..n-1]), Transform.new(:replacement, @property[n..-1])]
11
+ elsif @type == :wrap
12
+ return [self, self.dup]
13
+ end
14
+ end
15
+
16
+ def apply(s)
17
+ if @type == :identity
18
+ return s
19
+ elsif @type == :replacement
20
+ return @property
21
+ elsif @type == :wrap
22
+ return @property[0] + s + @property[1]
23
+ end
24
+ end
25
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pertinent_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Bunday
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: hpricot
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.6
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.6
27
+ description: PertinentParser helps you compose HTML tags across existing tag boundaries.
28
+ email: mkbunday@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/pertinent_parser.rb
34
+ - lib/pertinent_parser/rule.rb
35
+ - lib/pertinent_parser/text.rb
36
+ - lib/pertinent_parser/transform.rb
37
+ homepage: https://github.com/zencephalon/Pertinent_Parser
38
+ licenses:
39
+ - MIT
40
+ metadata: {}
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubyforge_project:
57
+ rubygems_version: 2.2.2
58
+ signing_key:
59
+ specification_version: 4
60
+ summary: PertinentParser helps you compose HTML tags across existing tag boundaries.
61
+ test_files: []