ruby-gumbo 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.mkd +28 -31
- data/Rakefile +60 -59
- data/ext/extconf.rb +17 -9
- data/ext/{gumbo.c → ruby_gumbo_ext.c} +29 -28
- data/lib/gumbo.rb +19 -0
- data/lib/gumbo/element.rb +52 -0
- data/lib/gumbo/{extra.rb → node.rb} +19 -22
- data/lib/gumbo/text.rb +29 -0
- data/vendor/gumbo-parser/src/attribute.c +44 -0
- data/vendor/gumbo-parser/src/attribute.h +37 -0
- data/vendor/gumbo-parser/src/char_ref.c +2561 -0
- data/vendor/gumbo-parser/src/char_ref.h +61 -0
- data/vendor/gumbo-parser/src/error.c +258 -0
- data/vendor/gumbo-parser/src/error.h +227 -0
- data/vendor/gumbo-parser/src/gumbo.h +807 -0
- data/vendor/gumbo-parser/src/insertion_mode.h +57 -0
- data/vendor/gumbo-parser/src/parser.c +3917 -0
- data/vendor/gumbo-parser/src/parser.h +57 -0
- data/vendor/gumbo-parser/src/string_buffer.c +106 -0
- data/vendor/gumbo-parser/src/string_buffer.h +81 -0
- data/vendor/gumbo-parser/src/string_piece.c +49 -0
- data/vendor/gumbo-parser/src/string_piece.h +39 -0
- data/vendor/gumbo-parser/src/tag.c +225 -0
- data/vendor/gumbo-parser/src/token_type.h +40 -0
- data/vendor/gumbo-parser/src/tokenizer.c +2980 -0
- data/vendor/gumbo-parser/src/tokenizer.h +123 -0
- data/vendor/gumbo-parser/src/tokenizer_states.h +103 -0
- data/vendor/gumbo-parser/src/utf8.c +275 -0
- data/vendor/gumbo-parser/src/utf8.h +127 -0
- data/vendor/gumbo-parser/src/util.c +58 -0
- data/vendor/gumbo-parser/src/util.h +62 -0
- data/vendor/gumbo-parser/src/vector.c +123 -0
- data/vendor/gumbo-parser/src/vector.h +69 -0
- metadata +40 -10
- data/ext/extconf.h +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9ab78fd0033219498d2f13464465c342930cdf5a
|
4
|
+
data.tar.gz: 949b7b24664379d5494889e81d44a4361d1dd839
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 20fd4335420189d4bd4b7aa2247c43f681234ac6aca9f8d2e2292c72e398dc459a893b2c8bfbafca0a73f86c5ea73447b8dd48e3b4da6f79e36e9beaaeedc71d
|
7
|
+
data.tar.gz: a358f29bd205baf10a6437db066c92c3035d402b054c720b040cb8876b41809295c2e25a26b6b6e1a06f4c6094b768802f681a6701b8d90c8419704f342d62b4
|
data/LICENSE
CHANGED
data/README.mkd
CHANGED
@@ -1,48 +1,45 @@
|
|
1
1
|
# ruby-gumbo
|
2
2
|
|
3
|
-
|
3
|
+
Ruby bindings for Google's [Gumbo][gumbo] HTML5
|
4
|
+
parser.
|
4
5
|
|
5
|
-
`ruby-gumbo` is a ruby binding for the Gumbo HTML5 parser.
|
6
6
|
|
7
|
-
##
|
7
|
+
## Getting Started
|
8
8
|
|
9
|
-
|
9
|
+
Stick it in your `Gemfile`!
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
install` (the gem file is in the `pkg` directory).
|
11
|
+
```ruby
|
12
|
+
gem 'ruby-gumbo', '~> 1.1'
|
13
|
+
```
|
15
14
|
|
16
|
-
|
15
|
+
And then parse away:
|
17
16
|
|
18
17
|
```ruby
|
19
|
-
require '
|
20
|
-
|
21
|
-
html = Net::HTTP.get URI.parse('http://example.org')
|
22
|
-
Gumbo::parse(html) {|doc| doc.dump_tree}
|
18
|
+
require 'gumbo'
|
19
|
+
doc = Gumbo::parse(html)
|
23
20
|
```
|
24
21
|
|
25
|
-
|
22
|
+
You'll probably want to peruse the [documentation][docs] to see how to navigate
|
23
|
+
a document and its nodes.
|
24
|
+
|
25
|
+
|
26
|
+
## Developing
|
27
|
+
|
28
|
+
Grab the repository, and make sure to include submodules:
|
26
29
|
|
27
30
|
```
|
28
|
-
|
29
|
-
<HEAD>
|
30
|
-
<TITLE>
|
31
|
-
<META charset>
|
32
|
-
<META http-equiv content>
|
33
|
-
<META name content>
|
34
|
-
<STYLE type>
|
35
|
-
<BODY>
|
36
|
-
<DIV>
|
37
|
-
<H1>
|
38
|
-
<P>
|
39
|
-
<P>
|
40
|
-
<A href>
|
31
|
+
git checkout https://github.com/nevir/ruby-gumbo --recursive
|
41
32
|
```
|
42
33
|
|
43
|
-
|
34
|
+
And from there you should be able to `rake build` and `gem install pkg/*.gem`.
|
35
|
+
|
36
|
+
|
37
|
+
## License
|
38
|
+
|
39
|
+
ruby-gumbo is licensed under the [ISC license](LICENSE), and packages the
|
40
|
+
[gumbo-parser library][gumbo] (APACHE v2.0).
|
44
41
|
|
45
|
-
|
42
|
+
This is not a Google product and is not supported by Google in any way.
|
46
43
|
|
47
|
-
|
48
|
-
|
44
|
+
[gumbo]: https://github.com/google/gumbo-parser
|
45
|
+
[docs]: http://rubydoc.info/github/nevir/ruby-gumbo/master/frames
|
data/Rakefile
CHANGED
@@ -1,79 +1,80 @@
|
|
1
|
-
|
2
1
|
require 'rake/clean'
|
3
|
-
|
4
|
-
require 'rdoc/task'
|
5
|
-
|
6
2
|
require 'rubygems/package_task'
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
3
|
+
require 'yard'
|
4
|
+
|
5
|
+
VERSION = '1.1.0'
|
6
|
+
|
7
|
+
BUILT_EXTENSION = "ext/gumbo_ext.#{RbConfig::CONFIG['DLEXT']}"
|
8
|
+
BUILT_FILES = FileList[
|
9
|
+
BUILT_EXTENSION,
|
10
|
+
]
|
11
|
+
EXTENSION_SOURCE_FILES = FileList[
|
12
|
+
'ext/extconf.rb',
|
13
|
+
'ext/ruby_gumbo*.{h,c}',
|
14
|
+
]
|
15
|
+
SOURCE_FILES = FileList[
|
16
|
+
'Rakefile',
|
17
|
+
'LICENSE',
|
18
|
+
'README.mkd',
|
19
|
+
'lib/**/*.rb',
|
20
|
+
*EXTENSION_SOURCE_FILES,
|
21
|
+
]
|
22
|
+
VENDOR_FILES = FileList[
|
23
|
+
'vendor/gumbo-parser/src/*',
|
24
|
+
]
|
25
|
+
PACKAGED_FILES = FileList[
|
26
|
+
*BUILT_EXTENSION,
|
27
|
+
*SOURCE_FILES,
|
28
|
+
*VENDOR_FILES
|
29
|
+
]
|
30
|
+
|
31
|
+
# Building
|
32
|
+
|
33
|
+
task :build => BUILT_EXTENSION
|
34
|
+
|
35
|
+
# Note that this will fail to pick up new files; you'll want to rake clean
|
36
|
+
# after adding/remove files. (The trade off is that versus rebuilding the
|
37
|
+
# Makefile each time an extension source file is touched).
|
38
|
+
file 'ext/Makefile' => ['ext/extconf.rb'] + VENDOR_FILES do
|
39
|
+
Dir.chdir 'ext' do
|
40
|
+
ruby 'extconf.rb'
|
27
41
|
end
|
28
42
|
end
|
29
43
|
|
30
|
-
file
|
31
|
-
Dir
|
32
|
-
|
33
|
-
$stderr.puts "make failed"
|
34
|
-
break
|
35
|
-
end
|
44
|
+
file BUILT_EXTENSION => ['ext/Makefile'] + EXTENSION_SOURCE_FILES do
|
45
|
+
Dir.chdir 'ext' do
|
46
|
+
sh 'make'
|
36
47
|
end
|
37
48
|
end
|
38
49
|
|
39
|
-
desc "Build the native library"
|
40
|
-
task :build => MODULE
|
41
|
-
|
42
50
|
# Documentation
|
43
|
-
RDOC_FILES = FileList["ext/gumbo.c", "lib/gumbo/extra.rb"]
|
44
|
-
|
45
|
-
Rake::RDocTask.new do |task|
|
46
|
-
#task.main = "README.rdoc"
|
47
|
-
task.rdoc_dir = "doc/api"
|
48
|
-
task.rdoc_files.include(RDOC_FILES)
|
49
|
-
end
|
50
51
|
|
51
|
-
Rake::
|
52
|
-
#task.main = "README.rdoc"
|
53
|
-
task.rdoc_dir = "doc/ri"
|
54
|
-
task.options << "--ri-system"
|
55
|
-
task.rdoc_files.include(RDOC_FILES)
|
56
|
-
end
|
52
|
+
YARD::Rake::YardocTask.new(:doc)
|
57
53
|
|
58
54
|
# Packaging
|
59
|
-
PKG_FILES = FileList["Rakefile", "LICENSE", "README.mkd",
|
60
|
-
"lib/gumbo/*.rb",
|
61
|
-
"ext/extconf.rb", "ext/*.[hc]"]
|
62
55
|
|
63
56
|
SPEC = Gem::Specification.new do |spec|
|
64
|
-
spec.name
|
65
|
-
spec.version =
|
66
|
-
spec.summary =
|
67
|
-
spec.
|
68
|
-
spec.email
|
69
|
-
spec.license =
|
57
|
+
spec.name = 'ruby-gumbo'
|
58
|
+
spec.version = VERSION
|
59
|
+
spec.summary = 'Ruby bindings for the gumbo html5 parser'
|
60
|
+
spec.authors = ['Nicolas Martyanoff', 'Ian MacLeod']
|
61
|
+
spec.email = ['khaelin@gmail.com', 'ian@nevir.net']
|
62
|
+
spec.license = 'ISC'
|
70
63
|
|
71
|
-
spec.files
|
72
|
-
spec.extensions =
|
64
|
+
spec.files = SOURCE_FILES + VENDOR_FILES
|
65
|
+
spec.extensions = 'ext/extconf.rb'
|
73
66
|
|
74
|
-
spec.required_ruby_version =
|
67
|
+
spec.required_ruby_version = '>= 1.9.3'
|
75
68
|
end
|
76
69
|
|
77
70
|
Gem::PackageTask.new(SPEC) do |pkg|
|
78
|
-
|
71
|
+
pkg.need_tar = true
|
72
|
+
pkg.need_zip = true
|
79
73
|
end
|
74
|
+
|
75
|
+
# Cleaning
|
76
|
+
|
77
|
+
CLEAN.include('ext/**/*', '.yardoc')
|
78
|
+
CLEAN.exclude(*SOURCE_FILES, *BUILT_FILES)
|
79
|
+
|
80
|
+
CLOBBER.include('doc', *BUILT_FILES)
|
data/ext/extconf.rb
CHANGED
@@ -1,15 +1,23 @@
|
|
1
|
+
require 'mkmf'
|
1
2
|
|
2
|
-
|
3
|
+
$CFLAGS << ' -std=c99'
|
3
4
|
|
4
|
-
|
5
|
+
unless enable_config('packaged-library')
|
6
|
+
pkg_config('libgumbo')
|
7
|
+
end
|
5
8
|
|
6
|
-
|
9
|
+
if enable_config('packaged-library') || !have_library('gumbo', 'gumbo_parse')
|
10
|
+
gumbo_lib_src = File.expand_path('../../vendor/gumbo-parser/src', __FILE__)
|
11
|
+
unless File.directory? gumbo_lib_src
|
12
|
+
abort "Couldn't find the packaged gumbo-parser library. " +
|
13
|
+
"Did you forget to git clone --recursive?"
|
14
|
+
end
|
15
|
+
require 'fileutils'
|
7
16
|
|
8
|
-
|
9
|
-
|
17
|
+
# mkmf doesn't appear to deal well with sources/objects in multiple
|
18
|
+
# directories, so we bring the gumbo source to it.
|
19
|
+
gumbo_sources = Dir[File.join(gumbo_lib_src, '*')]
|
20
|
+
FileUtils.cp(gumbo_sources, File.dirname(__FILE__))
|
10
21
|
end
|
11
22
|
|
12
|
-
|
13
|
-
|
14
|
-
create_header
|
15
|
-
create_makefile(extension_name)
|
23
|
+
create_makefile('gumbo_ext')
|
@@ -43,6 +43,7 @@ static VALUE r_gumbo_quirks_mode_to_symbol(GumboQuirksModeEnum mode);
|
|
43
43
|
static VALUE r_gumbo_namespace_to_symbol(GumboNamespaceEnum ns);
|
44
44
|
static VALUE r_gumbo_tag_to_symbol(GumboTag tag);
|
45
45
|
static VALUE r_gumbo_node_to_value(GumboNode *node);
|
46
|
+
static VALUE r_gumbo_stringpiece_to_str(const GumboStringPiece* string);
|
46
47
|
|
47
48
|
static VALUE r_gumbo_attribute_namespace_to_symbol(GumboAttributeNamespaceEnum ns);
|
48
49
|
static VALUE r_gumbo_attribute_to_value(GumboAttribute *attribute);
|
@@ -55,7 +56,7 @@ static VALUE c_source_position;
|
|
55
56
|
|
56
57
|
|
57
58
|
void
|
58
|
-
|
59
|
+
Init_gumbo_ext(void) {
|
59
60
|
m_gumbo = rb_define_module("Gumbo");
|
60
61
|
rb_define_module_function(m_gumbo, "parse", r_gumbo_parse, 1);
|
61
62
|
|
@@ -76,6 +77,8 @@ Init_gumbo(void) {
|
|
76
77
|
rb_define_attr(c_element, "tag", 1, 0);
|
77
78
|
rb_define_attr(c_element, "original_tag", 1, 0);
|
78
79
|
rb_define_attr(c_element, "original_tag_name", 1, 0);
|
80
|
+
rb_define_attr(c_element, "original_end_tag", 1, 0);
|
81
|
+
rb_define_attr(c_element, "original_end_tag_name", 1, 0);
|
79
82
|
rb_define_attr(c_element, "tag_namespace", 1, 0);
|
80
83
|
rb_define_attr(c_element, "attributes", 1, 0);
|
81
84
|
rb_define_attr(c_element, "children", 1, 0);
|
@@ -89,20 +92,9 @@ Init_gumbo(void) {
|
|
89
92
|
rb_define_attr(c_text, "original_text", 1, 0);
|
90
93
|
rb_define_attr(c_text, "start_pos", 1, 0);
|
91
94
|
|
92
|
-
c_cdata = rb_define_class_under(m_gumbo, "CData",
|
93
|
-
|
94
|
-
|
95
|
-
rb_define_attr(c_cdata, "start_pos", 1, 0);
|
96
|
-
|
97
|
-
c_comment = rb_define_class_under(m_gumbo, "Comment", c_node);
|
98
|
-
rb_define_attr(c_comment, "text", 1, 0);
|
99
|
-
rb_define_attr(c_comment, "original_text", 1, 0);
|
100
|
-
rb_define_attr(c_comment, "start_pos", 1, 0);
|
101
|
-
|
102
|
-
c_whitespace = rb_define_class_under(m_gumbo, "Whitespace", c_node);
|
103
|
-
rb_define_attr(c_whitespace, "text", 1, 0);
|
104
|
-
rb_define_attr(c_whitespace, "original_text", 1, 0);
|
105
|
-
rb_define_attr(c_whitespace, "start_pos", 1, 0);
|
95
|
+
c_cdata = rb_define_class_under(m_gumbo, "CData", c_text);
|
96
|
+
c_comment = rb_define_class_under(m_gumbo, "Comment", c_text);
|
97
|
+
c_whitespace = rb_define_class_under(m_gumbo, "Whitespace", c_text);
|
106
98
|
|
107
99
|
c_attribute = rb_define_class_under(m_gumbo, "Attribute", rb_cObject);
|
108
100
|
rb_define_attr(c_attribute, "namespace", 1, 0);
|
@@ -240,7 +232,7 @@ r_tainted_str_new(const char *str, long len) {
|
|
240
232
|
|
241
233
|
if (str) {
|
242
234
|
val = rb_enc_str_new(str, len, rb_utf8_encoding());
|
243
|
-
OBJ_TAINT(
|
235
|
+
OBJ_TAINT(val);
|
244
236
|
} else {
|
245
237
|
val = Qnil;
|
246
238
|
}
|
@@ -258,6 +250,11 @@ r_tainted_cstr_new(const char *str) {
|
|
258
250
|
return r_tainted_str_new(str, strlen(str));
|
259
251
|
}
|
260
252
|
|
253
|
+
static VALUE
|
254
|
+
r_gumbo_stringpiece_to_str(const GumboStringPiece* string) {
|
255
|
+
return r_tainted_str_new(string->data, string->length);
|
256
|
+
}
|
257
|
+
|
261
258
|
static VALUE
|
262
259
|
r_gumbo_destroy_output(VALUE value) {
|
263
260
|
GumboOutput *output;
|
@@ -431,12 +428,9 @@ r_gumbo_node_to_value(GumboNode *node) {
|
|
431
428
|
rb_iv_set(r_node, "@tag",
|
432
429
|
r_gumbo_tag_to_symbol(element->tag));
|
433
430
|
rb_iv_set(r_node, "@original_tag",
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
rb_iv_set(r_node, "@original_tag_name",
|
438
|
-
r_tainted_str_new(element->original_tag.data,
|
439
|
-
element->original_tag.length));
|
431
|
+
r_gumbo_stringpiece_to_str(&element->original_tag));
|
432
|
+
rb_iv_set(r_node, "@original_end_tag",
|
433
|
+
r_gumbo_stringpiece_to_str(&element->original_end_tag));
|
440
434
|
rb_iv_set(r_node, "@tag_namespace",
|
441
435
|
r_gumbo_namespace_to_symbol(element->tag_namespace));
|
442
436
|
rb_iv_set(r_node, "@start_pos",
|
@@ -444,6 +438,16 @@ r_gumbo_node_to_value(GumboNode *node) {
|
|
444
438
|
rb_iv_set(r_node, "@end_pos",
|
445
439
|
r_gumbo_source_position_to_value(element->end_pos));
|
446
440
|
|
441
|
+
GumboStringPiece original_tag_name = element->original_tag;
|
442
|
+
gumbo_tag_from_original_text(&original_tag_name);
|
443
|
+
rb_iv_set(r_node, "@original_tag_name",
|
444
|
+
r_gumbo_stringpiece_to_str(&original_tag_name));
|
445
|
+
|
446
|
+
GumboStringPiece original_end_tag_name = element->original_end_tag;
|
447
|
+
gumbo_tag_from_original_text(&original_end_tag_name);
|
448
|
+
rb_iv_set(r_node, "@original_end_tag_name",
|
449
|
+
r_gumbo_stringpiece_to_str(&original_end_tag_name));
|
450
|
+
|
447
451
|
r_attributes = rb_ary_new2(element->attributes.length);
|
448
452
|
rb_iv_set(r_node, "@attributes", r_attributes);
|
449
453
|
|
@@ -466,8 +470,7 @@ r_gumbo_node_to_value(GumboNode *node) {
|
|
466
470
|
|
467
471
|
rb_iv_set(r_node, "@text", r_tainted_cstr_new(text->text));
|
468
472
|
rb_iv_set(r_node, "@original_text",
|
469
|
-
|
470
|
-
text->original_text.length));
|
473
|
+
r_gumbo_stringpiece_to_str(&text->original_text));
|
471
474
|
rb_iv_set(r_node, "@start_pos",
|
472
475
|
r_gumbo_source_position_to_value(text->start_pos));
|
473
476
|
}
|
@@ -520,12 +523,10 @@ r_gumbo_attribute_to_value(GumboAttribute *attribute) {
|
|
520
523
|
r_gumbo_attribute_namespace_to_symbol(attribute->attr_namespace));
|
521
524
|
rb_iv_set(r_attribute, "@name", r_tainted_cstr_new(attribute->name));
|
522
525
|
rb_iv_set(r_attribute, "@original_name",
|
523
|
-
|
524
|
-
attribute->original_name.length));
|
526
|
+
r_gumbo_stringpiece_to_str(&attribute->original_name));
|
525
527
|
rb_iv_set(r_attribute, "@value", r_tainted_cstr_new(attribute->value));
|
526
528
|
rb_iv_set(r_attribute, "@original_value",
|
527
|
-
|
528
|
-
attribute->original_value.length));
|
529
|
+
r_gumbo_stringpiece_to_str(&attribute->original_value));
|
529
530
|
rb_iv_set(r_attribute, "@name_start",
|
530
531
|
r_gumbo_source_position_to_value(attribute->name_start));
|
531
532
|
rb_iv_set(r_attribute, "@name_end",
|
data/lib/gumbo.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
# Copyright (c) 2013 Nicolas Martyanoff
|
3
|
+
#
|
4
|
+
# Permission to use, copy, modify, and distribute this software for any
|
5
|
+
# purpose with or without fee is hereby granted, provided that the above
|
6
|
+
# copyright notice and this permission notice appear in all copies.
|
7
|
+
#
|
8
|
+
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
|
16
|
+
require 'gumbo_ext'
|
17
|
+
require 'gumbo/element'
|
18
|
+
require 'gumbo/node'
|
19
|
+
require 'gumbo/text'
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Copyright (c) 2013 Nicolas Martyanoff
|
2
|
+
#
|
3
|
+
# Permission to use, copy, modify, and distribute this software for any
|
4
|
+
# purpose with or without fee is hereby granted, provided that the above
|
5
|
+
# copyright notice and this permission notice appear in all copies.
|
6
|
+
#
|
7
|
+
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
8
|
+
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
9
|
+
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
10
|
+
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
11
|
+
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
12
|
+
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
13
|
+
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
14
|
+
|
15
|
+
require 'gumbo'
|
16
|
+
|
17
|
+
class Gumbo::Element
|
18
|
+
def to_s
|
19
|
+
if original_tag
|
20
|
+
open_tag = original_tag
|
21
|
+
end_tag = original_end_tag || ''
|
22
|
+
else
|
23
|
+
tag_name = original_tag_name || tag
|
24
|
+
open_tag = "<#{tag_name}>"
|
25
|
+
end_tag = "</#{tag_name}>"
|
26
|
+
end
|
27
|
+
|
28
|
+
open_tag + (children || []).map(&:to_s).join + end_tag
|
29
|
+
end
|
30
|
+
alias_method :inspect, :to_s
|
31
|
+
|
32
|
+
# The *byte* offset range where this element was extracted from, or nil if it
|
33
|
+
# was inserted algorithmically.
|
34
|
+
def offset_range
|
35
|
+
return nil unless original_tag
|
36
|
+
if original_end_tag
|
37
|
+
end_offset = end_pos.offset + original_end_tag.bytesize
|
38
|
+
else
|
39
|
+
end_offset = start_pos.offset + original_tag.bytesize
|
40
|
+
end
|
41
|
+
|
42
|
+
start_pos.offset...end_offset
|
43
|
+
end
|
44
|
+
|
45
|
+
# The *byte* offset range where the content inside this node exists, or nil if
|
46
|
+
# the node was inserted algorithmically, or has no content.
|
47
|
+
def content_range
|
48
|
+
return nil unless original_tag && original_end_tag
|
49
|
+
|
50
|
+
(start_pos.offset + original_tag.bytesize)...end_pos.offset
|
51
|
+
end
|
52
|
+
end
|