trenni 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/README.md +30 -0
- data/Rakefile +1 -1
- data/ext/trenni/markup.c +186 -240
- data/ext/trenni/markup.rl +8 -10
- data/ext/trenni/trenni.c +3 -27
- data/ext/trenni/trenni.h +28 -4
- data/lib/trenni/fallback/markup.rb +244 -257
- data/lib/trenni/fallback/markup.rl +8 -8
- data/lib/trenni/markup.rb +2 -0
- data/lib/trenni/version.rb +1 -1
- data/parsers/trenni/markup.rl +5 -5
- data/spec/trenni/parsers_performance_spec.rb +14 -1
- metadata +2 -2
@@ -32,9 +32,16 @@
|
|
32
32
|
action pcdata_begin {
|
33
33
|
pcdata = ""
|
34
34
|
}
|
35
|
-
|
35
|
+
|
36
36
|
action pcdata_end {
|
37
37
|
}
|
38
|
+
|
39
|
+
action text_begin {
|
40
|
+
}
|
41
|
+
|
42
|
+
action text_end {
|
43
|
+
delegate.text(pcdata)
|
44
|
+
}
|
38
45
|
|
39
46
|
action characters_begin {
|
40
47
|
characters_begin = p
|
@@ -185,13 +192,6 @@
|
|
185
192
|
raise ParseError.new("could not parse cdata", buffer, p)
|
186
193
|
}
|
187
194
|
|
188
|
-
action text_begin {
|
189
|
-
}
|
190
|
-
|
191
|
-
action text_end {
|
192
|
-
delegate.text(pcdata)
|
193
|
-
}
|
194
|
-
|
195
195
|
# This magic ensures that we process bytes.
|
196
196
|
getkey bytes[p];
|
197
197
|
|
data/lib/trenni/markup.rb
CHANGED
data/lib/trenni/version.rb
CHANGED
data/parsers/trenni/markup.rl
CHANGED
@@ -12,11 +12,11 @@
|
|
12
12
|
|
13
13
|
include entities "entities.rl";
|
14
14
|
|
15
|
-
pcdata_character =
|
16
|
-
pcdata_characters =
|
17
|
-
pcdata = (pcdata_characters | entity)+ >pcdata_begin %pcdata_end;
|
15
|
+
pcdata_character = any - [<&];
|
16
|
+
pcdata_characters = pcdata_character+ >characters_begin %characters_end;
|
17
|
+
pcdata = ((pcdata_characters | entity) $(pcdata,2) %(pcdata,1))+ %(pcdata,0) >pcdata_begin %pcdata_end;
|
18
18
|
|
19
|
-
text = pcdata
|
19
|
+
text = pcdata >text_begin %text_end;
|
20
20
|
|
21
21
|
doctype_text = (any* -- '>');
|
22
22
|
doctype = '<!DOCTYPE' >doctype_begin (doctype_text '>') %doctype_end @err(doctype_error);
|
@@ -39,5 +39,5 @@
|
|
39
39
|
|
40
40
|
tag_closing = '</' >tag_closing_begin (identifier '>') %tag_closing_end @err(tag_error);
|
41
41
|
|
42
|
-
main := (text
|
42
|
+
main := (text | tag_opening | tag_closing | instruction | comment | doctype | cdata)**;
|
43
43
|
}%%
|
@@ -3,6 +3,8 @@ require 'benchmark/ips'
|
|
3
3
|
require 'trenni/parsers'
|
4
4
|
require 'trenni/entities'
|
5
5
|
|
6
|
+
require 'nokogiri'
|
7
|
+
|
6
8
|
require 'ruby-prof'
|
7
9
|
|
8
10
|
RSpec.shared_context "profile" do
|
@@ -28,7 +30,7 @@ RSpec.describe Trenni::Parsers do
|
|
28
30
|
|
29
31
|
it "should be fast to parse large documents" do
|
30
32
|
Benchmark.ips do |x|
|
31
|
-
x.report("Large
|
33
|
+
x.report("Large (Trenni)") do |times|
|
32
34
|
delegate = Trenni::ParseDelegate.new
|
33
35
|
|
34
36
|
while (times -= 1) >= 0
|
@@ -38,6 +40,17 @@ RSpec.describe Trenni::Parsers do
|
|
38
40
|
end
|
39
41
|
end
|
40
42
|
|
43
|
+
x.report("Large (Nokogiri)") do |times|
|
44
|
+
delegate = Trenni::ParseDelegate.new
|
45
|
+
parser = Nokogiri::HTML::SAX::Parser.new(delegate)
|
46
|
+
|
47
|
+
while (times -= 1) >= 0
|
48
|
+
parser.parse(xhtml_buffer.read)
|
49
|
+
|
50
|
+
delegate.events.clear
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
41
54
|
x.compare!
|
42
55
|
end
|
43
56
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: trenni
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Samuel Williams
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
11
|
+
date: 2016-11-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|