trenni 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,9 +32,16 @@
32
32
  action pcdata_begin {
33
33
  pcdata = ""
34
34
  }
35
-
35
+
36
36
  action pcdata_end {
37
37
  }
38
+
39
+ action text_begin {
40
+ }
41
+
42
+ action text_end {
43
+ delegate.text(pcdata)
44
+ }
38
45
 
39
46
  action characters_begin {
40
47
  characters_begin = p
@@ -185,13 +192,6 @@
185
192
  raise ParseError.new("could not parse cdata", buffer, p)
186
193
  }
187
194
 
188
- action text_begin {
189
- }
190
-
191
- action text_end {
192
- delegate.text(pcdata)
193
- }
194
-
195
195
  # This magic ensures that we process bytes.
196
196
  getkey bytes[p];
197
197
 
@@ -51,6 +51,8 @@ module Trenni
51
51
  def initialize(string = nil, escape = true)
52
52
  if string
53
53
  super(string)
54
+
55
+ # self.replace CGI.escapeHTML(self)
54
56
  ESCAPE.gsub!(self) if escape
55
57
  else
56
58
  super()
@@ -19,5 +19,5 @@
19
19
  # THE SOFTWARE.
20
20
 
21
21
  module Trenni
22
- VERSION = "2.0.1"
22
+ VERSION = "2.0.2"
23
23
  end
@@ -12,11 +12,11 @@
12
12
 
13
13
  include entities "entities.rl";
14
14
 
15
- pcdata_character = (any - [<&]);
16
- pcdata_characters = (pcdata_character+) >characters_begin %characters_end;
17
- pcdata = (pcdata_characters | entity)+ >pcdata_begin %pcdata_end;
15
+ pcdata_character = any - [<&];
16
+ pcdata_characters = pcdata_character+ >characters_begin %characters_end;
17
+ pcdata = ((pcdata_characters | entity) $(pcdata,2) %(pcdata,1))+ %(pcdata,0) >pcdata_begin %pcdata_end;
18
18
 
19
- text = pcdata $(greedy_text,1) >text_begin %text_end;
19
+ text = pcdata >text_begin %text_end;
20
20
 
21
21
  doctype_text = (any* -- '>');
22
22
  doctype = '<!DOCTYPE' >doctype_begin (doctype_text '>') %doctype_end @err(doctype_error);
@@ -39,5 +39,5 @@
39
39
 
40
40
  tag_closing = '</' >tag_closing_begin (identifier '>') %tag_closing_end @err(tag_error);
41
41
 
42
- main := (text >(greedy_text,0) | tag_opening | tag_closing | instruction | comment | doctype | cdata)*;
42
+ main := (text | tag_opening | tag_closing | instruction | comment | doctype | cdata)**;
43
43
  }%%
@@ -3,6 +3,8 @@ require 'benchmark/ips'
3
3
  require 'trenni/parsers'
4
4
  require 'trenni/entities'
5
5
 
6
+ require 'nokogiri'
7
+
6
8
  require 'ruby-prof'
7
9
 
8
10
  RSpec.shared_context "profile" do
@@ -28,7 +30,7 @@ RSpec.describe Trenni::Parsers do
28
30
 
29
31
  it "should be fast to parse large documents" do
30
32
  Benchmark.ips do |x|
31
- x.report("Large Document") do |times|
33
+ x.report("Large (Trenni)") do |times|
32
34
  delegate = Trenni::ParseDelegate.new
33
35
 
34
36
  while (times -= 1) >= 0
@@ -38,6 +40,17 @@ RSpec.describe Trenni::Parsers do
38
40
  end
39
41
  end
40
42
 
43
+ x.report("Large (Nokogiri)") do |times|
44
+ delegate = Trenni::ParseDelegate.new
45
+ parser = Nokogiri::HTML::SAX::Parser.new(delegate)
46
+
47
+ while (times -= 1) >= 0
48
+ parser.parse(xhtml_buffer.read)
49
+
50
+ delegate.events.clear
51
+ end
52
+ end
53
+
41
54
  x.compare!
42
55
  end
43
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trenni
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Williams
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-28 00:00:00.000000000 Z
11
+ date: 2016-11-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler