ruby-gumbo 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.mkd +28 -31
- data/Rakefile +60 -59
- data/ext/extconf.rb +17 -9
- data/ext/{gumbo.c → ruby_gumbo_ext.c} +29 -28
- data/lib/gumbo.rb +19 -0
- data/lib/gumbo/element.rb +52 -0
- data/lib/gumbo/{extra.rb → node.rb} +19 -22
- data/lib/gumbo/text.rb +29 -0
- data/vendor/gumbo-parser/src/attribute.c +44 -0
- data/vendor/gumbo-parser/src/attribute.h +37 -0
- data/vendor/gumbo-parser/src/char_ref.c +2561 -0
- data/vendor/gumbo-parser/src/char_ref.h +61 -0
- data/vendor/gumbo-parser/src/error.c +258 -0
- data/vendor/gumbo-parser/src/error.h +227 -0
- data/vendor/gumbo-parser/src/gumbo.h +807 -0
- data/vendor/gumbo-parser/src/insertion_mode.h +57 -0
- data/vendor/gumbo-parser/src/parser.c +3917 -0
- data/vendor/gumbo-parser/src/parser.h +57 -0
- data/vendor/gumbo-parser/src/string_buffer.c +106 -0
- data/vendor/gumbo-parser/src/string_buffer.h +81 -0
- data/vendor/gumbo-parser/src/string_piece.c +49 -0
- data/vendor/gumbo-parser/src/string_piece.h +39 -0
- data/vendor/gumbo-parser/src/tag.c +225 -0
- data/vendor/gumbo-parser/src/token_type.h +40 -0
- data/vendor/gumbo-parser/src/tokenizer.c +2980 -0
- data/vendor/gumbo-parser/src/tokenizer.h +123 -0
- data/vendor/gumbo-parser/src/tokenizer_states.h +103 -0
- data/vendor/gumbo-parser/src/utf8.c +275 -0
- data/vendor/gumbo-parser/src/utf8.h +127 -0
- data/vendor/gumbo-parser/src/util.c +58 -0
- data/vendor/gumbo-parser/src/util.h +62 -0
- data/vendor/gumbo-parser/src/vector.c +123 -0
- data/vendor/gumbo-parser/src/vector.h +69 -0
- metadata +40 -10
- data/ext/extconf.h +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9ab78fd0033219498d2f13464465c342930cdf5a
|
4
|
+
data.tar.gz: 949b7b24664379d5494889e81d44a4361d1dd839
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 20fd4335420189d4bd4b7aa2247c43f681234ac6aca9f8d2e2292c72e398dc459a893b2c8bfbafca0a73f86c5ea73447b8dd48e3b4da6f79e36e9beaaeedc71d
|
7
|
+
data.tar.gz: a358f29bd205baf10a6437db066c92c3035d402b054c720b040cb8876b41809295c2e25a26b6b6e1a06f4c6094b768802f681a6701b8d90c8419704f342d62b4
|
data/LICENSE
CHANGED
data/README.mkd
CHANGED
@@ -1,48 +1,45 @@
|
|
1
1
|
# ruby-gumbo
|
2
2
|
|
3
|
-
|
3
|
+
Ruby bindings for Google's [Gumbo][gumbo] HTML5
|
4
|
+
parser.
|
4
5
|
|
5
|
-
`ruby-gumbo` is a ruby binding for the Gumbo HTML5 parser.
|
6
6
|
|
7
|
-
##
|
7
|
+
## Getting Started
|
8
8
|
|
9
|
-
|
9
|
+
Stick it in your `Gemfile`!
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
install` (the gem file is in the `pkg` directory).
|
11
|
+
```ruby
|
12
|
+
gem 'ruby-gumbo', '~> 1.1'
|
13
|
+
```
|
15
14
|
|
16
|
-
|
15
|
+
And then parse away:
|
17
16
|
|
18
17
|
```ruby
|
19
|
-
require '
|
20
|
-
|
21
|
-
html = Net::HTTP.get URI.parse('http://example.org')
|
22
|
-
Gumbo::parse(html) {|doc| doc.dump_tree}
|
18
|
+
require 'gumbo'
|
19
|
+
doc = Gumbo::parse(html)
|
23
20
|
```
|
24
21
|
|
25
|
-
|
22
|
+
You'll probably want to peruse the [documentation][docs] to see how to navigate
|
23
|
+
a document and its nodes.
|
24
|
+
|
25
|
+
|
26
|
+
## Developing
|
27
|
+
|
28
|
+
Grab the repository, and make sure to include submodules:
|
26
29
|
|
27
30
|
```
|
28
|
-
|
29
|
-
<HEAD>
|
30
|
-
<TITLE>
|
31
|
-
<META charset>
|
32
|
-
<META http-equiv content>
|
33
|
-
<META name content>
|
34
|
-
<STYLE type>
|
35
|
-
<BODY>
|
36
|
-
<DIV>
|
37
|
-
<H1>
|
38
|
-
<P>
|
39
|
-
<P>
|
40
|
-
<A href>
|
31
|
+
git checkout https://github.com/nevir/ruby-gumbo --recursive
|
41
32
|
```
|
42
33
|
|
43
|
-
|
34
|
+
And from there you should be able to `rake build` and `gem install pkg/*.gem`.
|
35
|
+
|
36
|
+
|
37
|
+
## License
|
38
|
+
|
39
|
+
ruby-gumbo is licensed under the [ISC license](LICENSE), and packages the
|
40
|
+
[gumbo-parser library][gumbo] (APACHE v2.0).
|
44
41
|
|
45
|
-
|
42
|
+
This is not a Google product and is not supported by Google in any way.
|
46
43
|
|
47
|
-
|
48
|
-
|
44
|
+
[gumbo]: https://github.com/google/gumbo-parser
|
45
|
+
[docs]: http://rubydoc.info/github/nevir/ruby-gumbo/master/frames
|
data/Rakefile
CHANGED
@@ -1,79 +1,80 @@
|
|
1
|
-
|
2
1
|
require 'rake/clean'
|
3
|
-
|
4
|
-
require 'rdoc/task'
|
5
|
-
|
6
2
|
require 'rubygems/package_task'
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
3
|
+
require 'yard'
|
4
|
+
|
5
|
+
VERSION = '1.1.0'
|
6
|
+
|
7
|
+
BUILT_EXTENSION = "ext/gumbo_ext.#{RbConfig::CONFIG['DLEXT']}"
|
8
|
+
BUILT_FILES = FileList[
|
9
|
+
BUILT_EXTENSION,
|
10
|
+
]
|
11
|
+
EXTENSION_SOURCE_FILES = FileList[
|
12
|
+
'ext/extconf.rb',
|
13
|
+
'ext/ruby_gumbo*.{h,c}',
|
14
|
+
]
|
15
|
+
SOURCE_FILES = FileList[
|
16
|
+
'Rakefile',
|
17
|
+
'LICENSE',
|
18
|
+
'README.mkd',
|
19
|
+
'lib/**/*.rb',
|
20
|
+
*EXTENSION_SOURCE_FILES,
|
21
|
+
]
|
22
|
+
VENDOR_FILES = FileList[
|
23
|
+
'vendor/gumbo-parser/src/*',
|
24
|
+
]
|
25
|
+
PACKAGED_FILES = FileList[
|
26
|
+
*BUILT_EXTENSION,
|
27
|
+
*SOURCE_FILES,
|
28
|
+
*VENDOR_FILES
|
29
|
+
]
|
30
|
+
|
31
|
+
# Building
|
32
|
+
|
33
|
+
task :build => BUILT_EXTENSION
|
34
|
+
|
35
|
+
# Note that this will fail to pick up new files; you'll want to rake clean
|
36
|
+
# after adding/remove files. (The trade off is that versus rebuilding the
|
37
|
+
# Makefile each time an extension source file is touched).
|
38
|
+
file 'ext/Makefile' => ['ext/extconf.rb'] + VENDOR_FILES do
|
39
|
+
Dir.chdir 'ext' do
|
40
|
+
ruby 'extconf.rb'
|
27
41
|
end
|
28
42
|
end
|
29
43
|
|
30
|
-
file
|
31
|
-
Dir
|
32
|
-
|
33
|
-
$stderr.puts "make failed"
|
34
|
-
break
|
35
|
-
end
|
44
|
+
file BUILT_EXTENSION => ['ext/Makefile'] + EXTENSION_SOURCE_FILES do
|
45
|
+
Dir.chdir 'ext' do
|
46
|
+
sh 'make'
|
36
47
|
end
|
37
48
|
end
|
38
49
|
|
39
|
-
desc "Build the native library"
|
40
|
-
task :build => MODULE
|
41
|
-
|
42
50
|
# Documentation
|
43
|
-
RDOC_FILES = FileList["ext/gumbo.c", "lib/gumbo/extra.rb"]
|
44
|
-
|
45
|
-
Rake::RDocTask.new do |task|
|
46
|
-
#task.main = "README.rdoc"
|
47
|
-
task.rdoc_dir = "doc/api"
|
48
|
-
task.rdoc_files.include(RDOC_FILES)
|
49
|
-
end
|
50
51
|
|
51
|
-
Rake::
|
52
|
-
#task.main = "README.rdoc"
|
53
|
-
task.rdoc_dir = "doc/ri"
|
54
|
-
task.options << "--ri-system"
|
55
|
-
task.rdoc_files.include(RDOC_FILES)
|
56
|
-
end
|
52
|
+
YARD::Rake::YardocTask.new(:doc)
|
57
53
|
|
58
54
|
# Packaging
|
59
|
-
PKG_FILES = FileList["Rakefile", "LICENSE", "README.mkd",
|
60
|
-
"lib/gumbo/*.rb",
|
61
|
-
"ext/extconf.rb", "ext/*.[hc]"]
|
62
55
|
|
63
56
|
SPEC = Gem::Specification.new do |spec|
|
64
|
-
spec.name
|
65
|
-
spec.version =
|
66
|
-
spec.summary =
|
67
|
-
spec.
|
68
|
-
spec.email
|
69
|
-
spec.license =
|
57
|
+
spec.name = 'ruby-gumbo'
|
58
|
+
spec.version = VERSION
|
59
|
+
spec.summary = 'Ruby bindings for the gumbo html5 parser'
|
60
|
+
spec.authors = ['Nicolas Martyanoff', 'Ian MacLeod']
|
61
|
+
spec.email = ['khaelin@gmail.com', 'ian@nevir.net']
|
62
|
+
spec.license = 'ISC'
|
70
63
|
|
71
|
-
spec.files
|
72
|
-
spec.extensions =
|
64
|
+
spec.files = SOURCE_FILES + VENDOR_FILES
|
65
|
+
spec.extensions = 'ext/extconf.rb'
|
73
66
|
|
74
|
-
spec.required_ruby_version =
|
67
|
+
spec.required_ruby_version = '>= 1.9.3'
|
75
68
|
end
|
76
69
|
|
77
70
|
Gem::PackageTask.new(SPEC) do |pkg|
|
78
|
-
|
71
|
+
pkg.need_tar = true
|
72
|
+
pkg.need_zip = true
|
79
73
|
end
|
74
|
+
|
75
|
+
# Cleaning
|
76
|
+
|
77
|
+
CLEAN.include('ext/**/*', '.yardoc')
|
78
|
+
CLEAN.exclude(*SOURCE_FILES, *BUILT_FILES)
|
79
|
+
|
80
|
+
CLOBBER.include('doc', *BUILT_FILES)
|
data/ext/extconf.rb
CHANGED
@@ -1,15 +1,23 @@
|
|
1
|
+
require 'mkmf'
|
1
2
|
|
2
|
-
|
3
|
+
$CFLAGS << ' -std=c99'
|
3
4
|
|
4
|
-
|
5
|
+
unless enable_config('packaged-library')
|
6
|
+
pkg_config('libgumbo')
|
7
|
+
end
|
5
8
|
|
6
|
-
|
9
|
+
if enable_config('packaged-library') || !have_library('gumbo', 'gumbo_parse')
|
10
|
+
gumbo_lib_src = File.expand_path('../../vendor/gumbo-parser/src', __FILE__)
|
11
|
+
unless File.directory? gumbo_lib_src
|
12
|
+
abort "Couldn't find the packaged gumbo-parser library. " +
|
13
|
+
"Did you forget to git clone --recursive?"
|
14
|
+
end
|
15
|
+
require 'fileutils'
|
7
16
|
|
8
|
-
|
9
|
-
|
17
|
+
# mkmf doesn't appear to deal well with sources/objects in multiple
|
18
|
+
# directories, so we bring the gumbo source to it.
|
19
|
+
gumbo_sources = Dir[File.join(gumbo_lib_src, '*')]
|
20
|
+
FileUtils.cp(gumbo_sources, File.dirname(__FILE__))
|
10
21
|
end
|
11
22
|
|
12
|
-
|
13
|
-
|
14
|
-
create_header
|
15
|
-
create_makefile(extension_name)
|
23
|
+
create_makefile('gumbo_ext')
|
@@ -43,6 +43,7 @@ static VALUE r_gumbo_quirks_mode_to_symbol(GumboQuirksModeEnum mode);
|
|
43
43
|
static VALUE r_gumbo_namespace_to_symbol(GumboNamespaceEnum ns);
|
44
44
|
static VALUE r_gumbo_tag_to_symbol(GumboTag tag);
|
45
45
|
static VALUE r_gumbo_node_to_value(GumboNode *node);
|
46
|
+
static VALUE r_gumbo_stringpiece_to_str(const GumboStringPiece* string);
|
46
47
|
|
47
48
|
static VALUE r_gumbo_attribute_namespace_to_symbol(GumboAttributeNamespaceEnum ns);
|
48
49
|
static VALUE r_gumbo_attribute_to_value(GumboAttribute *attribute);
|
@@ -55,7 +56,7 @@ static VALUE c_source_position;
|
|
55
56
|
|
56
57
|
|
57
58
|
void
|
58
|
-
|
59
|
+
Init_gumbo_ext(void) {
|
59
60
|
m_gumbo = rb_define_module("Gumbo");
|
60
61
|
rb_define_module_function(m_gumbo, "parse", r_gumbo_parse, 1);
|
61
62
|
|
@@ -76,6 +77,8 @@ Init_gumbo(void) {
|
|
76
77
|
rb_define_attr(c_element, "tag", 1, 0);
|
77
78
|
rb_define_attr(c_element, "original_tag", 1, 0);
|
78
79
|
rb_define_attr(c_element, "original_tag_name", 1, 0);
|
80
|
+
rb_define_attr(c_element, "original_end_tag", 1, 0);
|
81
|
+
rb_define_attr(c_element, "original_end_tag_name", 1, 0);
|
79
82
|
rb_define_attr(c_element, "tag_namespace", 1, 0);
|
80
83
|
rb_define_attr(c_element, "attributes", 1, 0);
|
81
84
|
rb_define_attr(c_element, "children", 1, 0);
|
@@ -89,20 +92,9 @@ Init_gumbo(void) {
|
|
89
92
|
rb_define_attr(c_text, "original_text", 1, 0);
|
90
93
|
rb_define_attr(c_text, "start_pos", 1, 0);
|
91
94
|
|
92
|
-
c_cdata = rb_define_class_under(m_gumbo, "CData",
|
93
|
-
|
94
|
-
|
95
|
-
rb_define_attr(c_cdata, "start_pos", 1, 0);
|
96
|
-
|
97
|
-
c_comment = rb_define_class_under(m_gumbo, "Comment", c_node);
|
98
|
-
rb_define_attr(c_comment, "text", 1, 0);
|
99
|
-
rb_define_attr(c_comment, "original_text", 1, 0);
|
100
|
-
rb_define_attr(c_comment, "start_pos", 1, 0);
|
101
|
-
|
102
|
-
c_whitespace = rb_define_class_under(m_gumbo, "Whitespace", c_node);
|
103
|
-
rb_define_attr(c_whitespace, "text", 1, 0);
|
104
|
-
rb_define_attr(c_whitespace, "original_text", 1, 0);
|
105
|
-
rb_define_attr(c_whitespace, "start_pos", 1, 0);
|
95
|
+
c_cdata = rb_define_class_under(m_gumbo, "CData", c_text);
|
96
|
+
c_comment = rb_define_class_under(m_gumbo, "Comment", c_text);
|
97
|
+
c_whitespace = rb_define_class_under(m_gumbo, "Whitespace", c_text);
|
106
98
|
|
107
99
|
c_attribute = rb_define_class_under(m_gumbo, "Attribute", rb_cObject);
|
108
100
|
rb_define_attr(c_attribute, "namespace", 1, 0);
|
@@ -240,7 +232,7 @@ r_tainted_str_new(const char *str, long len) {
|
|
240
232
|
|
241
233
|
if (str) {
|
242
234
|
val = rb_enc_str_new(str, len, rb_utf8_encoding());
|
243
|
-
OBJ_TAINT(
|
235
|
+
OBJ_TAINT(val);
|
244
236
|
} else {
|
245
237
|
val = Qnil;
|
246
238
|
}
|
@@ -258,6 +250,11 @@ r_tainted_cstr_new(const char *str) {
|
|
258
250
|
return r_tainted_str_new(str, strlen(str));
|
259
251
|
}
|
260
252
|
|
253
|
+
static VALUE
|
254
|
+
r_gumbo_stringpiece_to_str(const GumboStringPiece* string) {
|
255
|
+
return r_tainted_str_new(string->data, string->length);
|
256
|
+
}
|
257
|
+
|
261
258
|
static VALUE
|
262
259
|
r_gumbo_destroy_output(VALUE value) {
|
263
260
|
GumboOutput *output;
|
@@ -431,12 +428,9 @@ r_gumbo_node_to_value(GumboNode *node) {
|
|
431
428
|
rb_iv_set(r_node, "@tag",
|
432
429
|
r_gumbo_tag_to_symbol(element->tag));
|
433
430
|
rb_iv_set(r_node, "@original_tag",
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
rb_iv_set(r_node, "@original_tag_name",
|
438
|
-
r_tainted_str_new(element->original_tag.data,
|
439
|
-
element->original_tag.length));
|
431
|
+
r_gumbo_stringpiece_to_str(&element->original_tag));
|
432
|
+
rb_iv_set(r_node, "@original_end_tag",
|
433
|
+
r_gumbo_stringpiece_to_str(&element->original_end_tag));
|
440
434
|
rb_iv_set(r_node, "@tag_namespace",
|
441
435
|
r_gumbo_namespace_to_symbol(element->tag_namespace));
|
442
436
|
rb_iv_set(r_node, "@start_pos",
|
@@ -444,6 +438,16 @@ r_gumbo_node_to_value(GumboNode *node) {
|
|
444
438
|
rb_iv_set(r_node, "@end_pos",
|
445
439
|
r_gumbo_source_position_to_value(element->end_pos));
|
446
440
|
|
441
|
+
GumboStringPiece original_tag_name = element->original_tag;
|
442
|
+
gumbo_tag_from_original_text(&original_tag_name);
|
443
|
+
rb_iv_set(r_node, "@original_tag_name",
|
444
|
+
r_gumbo_stringpiece_to_str(&original_tag_name));
|
445
|
+
|
446
|
+
GumboStringPiece original_end_tag_name = element->original_end_tag;
|
447
|
+
gumbo_tag_from_original_text(&original_end_tag_name);
|
448
|
+
rb_iv_set(r_node, "@original_end_tag_name",
|
449
|
+
r_gumbo_stringpiece_to_str(&original_end_tag_name));
|
450
|
+
|
447
451
|
r_attributes = rb_ary_new2(element->attributes.length);
|
448
452
|
rb_iv_set(r_node, "@attributes", r_attributes);
|
449
453
|
|
@@ -466,8 +470,7 @@ r_gumbo_node_to_value(GumboNode *node) {
|
|
466
470
|
|
467
471
|
rb_iv_set(r_node, "@text", r_tainted_cstr_new(text->text));
|
468
472
|
rb_iv_set(r_node, "@original_text",
|
469
|
-
|
470
|
-
text->original_text.length));
|
473
|
+
r_gumbo_stringpiece_to_str(&text->original_text));
|
471
474
|
rb_iv_set(r_node, "@start_pos",
|
472
475
|
r_gumbo_source_position_to_value(text->start_pos));
|
473
476
|
}
|
@@ -520,12 +523,10 @@ r_gumbo_attribute_to_value(GumboAttribute *attribute) {
|
|
520
523
|
r_gumbo_attribute_namespace_to_symbol(attribute->attr_namespace));
|
521
524
|
rb_iv_set(r_attribute, "@name", r_tainted_cstr_new(attribute->name));
|
522
525
|
rb_iv_set(r_attribute, "@original_name",
|
523
|
-
|
524
|
-
attribute->original_name.length));
|
526
|
+
r_gumbo_stringpiece_to_str(&attribute->original_name));
|
525
527
|
rb_iv_set(r_attribute, "@value", r_tainted_cstr_new(attribute->value));
|
526
528
|
rb_iv_set(r_attribute, "@original_value",
|
527
|
-
|
528
|
-
attribute->original_value.length));
|
529
|
+
r_gumbo_stringpiece_to_str(&attribute->original_value));
|
529
530
|
rb_iv_set(r_attribute, "@name_start",
|
530
531
|
r_gumbo_source_position_to_value(attribute->name_start));
|
531
532
|
rb_iv_set(r_attribute, "@name_end",
|
data/lib/gumbo.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
# Copyright (c) 2013 Nicolas Martyanoff
|
3
|
+
#
|
4
|
+
# Permission to use, copy, modify, and distribute this software for any
|
5
|
+
# purpose with or without fee is hereby granted, provided that the above
|
6
|
+
# copyright notice and this permission notice appear in all copies.
|
7
|
+
#
|
8
|
+
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
|
16
|
+
require 'gumbo_ext'
|
17
|
+
require 'gumbo/element'
|
18
|
+
require 'gumbo/node'
|
19
|
+
require 'gumbo/text'
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Copyright (c) 2013 Nicolas Martyanoff
|
2
|
+
#
|
3
|
+
# Permission to use, copy, modify, and distribute this software for any
|
4
|
+
# purpose with or without fee is hereby granted, provided that the above
|
5
|
+
# copyright notice and this permission notice appear in all copies.
|
6
|
+
#
|
7
|
+
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
8
|
+
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
9
|
+
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
10
|
+
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
11
|
+
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
12
|
+
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
13
|
+
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
14
|
+
|
15
|
+
require 'gumbo'
|
16
|
+
|
17
|
+
class Gumbo::Element
|
18
|
+
def to_s
|
19
|
+
if original_tag
|
20
|
+
open_tag = original_tag
|
21
|
+
end_tag = original_end_tag || ''
|
22
|
+
else
|
23
|
+
tag_name = original_tag_name || tag
|
24
|
+
open_tag = "<#{tag_name}>"
|
25
|
+
end_tag = "</#{tag_name}>"
|
26
|
+
end
|
27
|
+
|
28
|
+
open_tag + (children || []).map(&:to_s).join + end_tag
|
29
|
+
end
|
30
|
+
alias_method :inspect, :to_s
|
31
|
+
|
32
|
+
# The *byte* offset range where this element was extracted from, or nil if it
|
33
|
+
# was inserted algorithmically.
|
34
|
+
def offset_range
|
35
|
+
return nil unless original_tag
|
36
|
+
if original_end_tag
|
37
|
+
end_offset = end_pos.offset + original_end_tag.bytesize
|
38
|
+
else
|
39
|
+
end_offset = start_pos.offset + original_tag.bytesize
|
40
|
+
end
|
41
|
+
|
42
|
+
start_pos.offset...end_offset
|
43
|
+
end
|
44
|
+
|
45
|
+
# The *byte* offset range where the content inside this node exists, or nil if
|
46
|
+
# the node was inserted algorithmically, or has no content.
|
47
|
+
def content_range
|
48
|
+
return nil unless original_tag && original_end_tag
|
49
|
+
|
50
|
+
(start_pos.offset + original_tag.bytesize)...end_pos.offset
|
51
|
+
end
|
52
|
+
end
|