pertinent_parser 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 82c1ea5ccb406cf088548e021a771378c318e3d1
4
+ data.tar.gz: 9b89619ed72dda0008192856ea343f624b0573b5
5
+ SHA512:
6
+ metadata.gz: e3d3d6f55364d41ab21ebce1e48a9d2e2b84aaf737afb7b8390a599bbd206dff1ea72874772f9e5dadb340efd794b0b3c62877540915529ff142a406e839ae37
7
+ data.tar.gz: 8238784a074f077775b6d8df61c792a77b5ff0f2e8f8b23c8418f3d4eecd5f7ad15edd4059374744240b8dac1445ac85090a5503e79a603fa1f0da5f9cd54073
@@ -0,0 +1,78 @@
1
+ require "hpricot"
2
+ require "pertinent_parser/transform"
3
+ require "pertinent_parser/rule"
4
+ require "pertinent_parser/text"
5
+
6
+ class Hpricot::Elem
7
+ def stag
8
+ "<#{name}#{attributes_as_html}" +
9
+ ((empty? and not etag) ? " /" : "") +
10
+ ">"
11
+ end
12
+ end
13
+
14
+ module PertinentParser
15
+ class << self
16
+ # Better write our own traversal function so that we can screw with the HTML representation the way we like.
17
+ def html(html)
18
+ doc = Hpricot(html)
19
+ d = 0
20
+ t = text(doc.inner_text)
21
+ doc.traverse_all_element do |elem|
22
+ if elem.text?
23
+ #puts elem.inner_text
24
+ d += elem.inner_text.size
25
+ else
26
+ #puts elem.stag
27
+ t + wrap_(d...d+elem.inner_text.size, elem.stag)
28
+ #puts "#{d}..#{d+elem.inner_text.size}"
29
+ end
30
+ end
31
+ t
32
+ end
33
+
34
+ def text(s)
35
+ r = Rule.new((0..s.size-1), Transform.new(:identity, ["id"]))
36
+ t = Text.new(s)
37
+ t.rule = r
38
+ t
39
+ end
40
+
41
+ def offset_to_r(o)
42
+ (o[0]..o[1]-1)
43
+ end
44
+
45
+ def range_from_specification context, target, number
46
+ count, position = 0, 0
47
+ stored = []
48
+ re = Regexp.new(Regexp.escape(target))
49
+ while (match = context.match(re , position)) do
50
+ temp = match.offset 0
51
+ position += 1; count += 1 if temp != stored
52
+ return offset_to_r(temp) if count == number
53
+ stored = temp
54
+ end
55
+ end
56
+
57
+ def new_wrap(context, target, number, tag)
58
+ range = range_from_specification(context, target, number)
59
+ wrap_(range, tag)
60
+ end
61
+
62
+ def rule(range, transform)
63
+ Rule.new(range, transform)
64
+ end
65
+
66
+
67
+ def wrap_(range, tag)
68
+ transform = Transform.new(:wrap, [tag, "</"+tag.match(/<(\S*)(\s|>)/)[1]+">" ])
69
+ r = Rule.new(range, transform)
70
+ end
71
+
72
+ def new_replace(context, target, number, replacement)
73
+ range = range_from_specification(context, target, number)
74
+ transform = Transform.new(:replacement, replacement)
75
+ r = Rule.new(range, transform)
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,79 @@
1
+ class Rule
2
+ attr_accessor :name, :children, :parent
3
+ attr_accessor :transform
4
+ attr_accessor :range
5
+ def initialize(range, transform=nil, children=[], parent=nil)
6
+ @range = range.to_a
7
+ @children = children
8
+ @parent = parent
9
+ @transform = transform
10
+ end
11
+ def <=>(r)
12
+ range.first <=> r.range.first
13
+ end
14
+ def apply_recur(s, offset=0)
15
+ pre = offset
16
+ @children.each do |child|
17
+ offset += child.apply_recur(s, offset)
18
+ end
19
+ # This was an optimization gone wrong. Sorry. Applies the transformation to the portion of the text.
20
+ return (s[@range.first+pre..@range.last+offset] = @transform.apply(s[@range.first+pre..@range.last+offset])).size - range.size
21
+ end
22
+ def apply(str)
23
+ s = str.dup
24
+ apply_recur(s)
25
+ return s
26
+ end
27
+ def +(text)
28
+ add(text.rule)
29
+ return text
30
+ end
31
+ def add(new_rule)
32
+ intersection = range & new_rule.range
33
+ if intersection == new_rule.range
34
+ contain = []
35
+ input = new_rule
36
+ @children.each do |child|
37
+ result = child.add(input)
38
+ case result
39
+ when Rule
40
+ input = result
41
+ when :inside
42
+ return :inside
43
+ when :contain
44
+ contain << child
45
+ when :outside
46
+ end
47
+ end
48
+ @children -= contain
49
+ contain.each do |child|
50
+ input.add child
51
+ end
52
+ @children << input
53
+ @children.sort!
54
+ return :inside
55
+ elsif intersection.empty?
56
+ return :outside
57
+ elsif intersection == range
58
+ if @parent.nil?
59
+ children = new_rule.children
60
+ new_rule.children = [self]
61
+ children.each do |child|
62
+ new_rule.add child
63
+ end
64
+ return new_rule
65
+ end
66
+ return :contain
67
+ else
68
+ difference = new_rule.range - intersection
69
+ transforms = new_rule.transform.split(difference.size)
70
+ if intersection.first < difference.first
71
+ inter_tran, diff_tran = transforms
72
+ else
73
+ diff_tran, inter_tran = transforms
74
+ end
75
+ self.add(Rule.new(intersection, inter_tran))
76
+ return Rule.new(difference, diff_tran)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,27 @@
1
+ class Text < String
2
+ attr_accessor :rule
3
+
4
+ # Return the HTML after all rules are applied
5
+ def apply
6
+ @rule.apply(self)
7
+ end
8
+
9
+ undef +
10
+ def +(new_rule)
11
+ @rule.add(new_rule)
12
+ end
13
+
14
+ # Wrap text, falling inside of existing boundaries
15
+ def wrap_in(tag, target, number=1)
16
+ self.+(PertinentParser.new_wrap(self, target, number, tag))
17
+ end
18
+
19
+ def replace(replacement, target, number=1)
20
+ self.+(PertinentParser.new_replace(self, target, number, replacement))
21
+ end
22
+
23
+ # Wrap text, falling outside of existing boundaries
24
+ def wrap_out(tag, target, number=1)
25
+ PertinentParser.new_wrap(self, target, number, tag).+(self)
26
+ end
27
+ end
@@ -0,0 +1,25 @@
1
+ class Transform
2
+ attr_accessor :type, :property
3
+
4
+ def initialize type, property
5
+ @type, @property = type, property
6
+ end
7
+
8
+ def split(n)
9
+ if @type == :replacement
10
+ return [Transform.new(:replacement, @property[0..n-1]), Transform.new(:replacement, @property[n..-1])]
11
+ elsif @type == :wrap
12
+ return [self, self.dup]
13
+ end
14
+ end
15
+
16
+ def apply(s)
17
+ if @type == :identity
18
+ return s
19
+ elsif @type == :replacement
20
+ return @property
21
+ elsif @type == :wrap
22
+ return @property[0] + s + @property[1]
23
+ end
24
+ end
25
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pertinent_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Bunday
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: hpricot
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.6
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.6
27
+ description: PertinentParser helps you compose HTML tags across existing tag boundaries.
28
+ email: mkbunday@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/pertinent_parser.rb
34
+ - lib/pertinent_parser/rule.rb
35
+ - lib/pertinent_parser/text.rb
36
+ - lib/pertinent_parser/transform.rb
37
+ homepage: https://github.com/zencephalon/Pertinent_Parser
38
+ licenses:
39
+ - MIT
40
+ metadata: {}
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubyforge_project:
57
+ rubygems_version: 2.2.2
58
+ signing_key:
59
+ specification_version: 4
60
+ summary: PertinentParser helps you compose HTML tags across existing tag boundaries.
61
+ test_files: []