html_tokenizer 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/html_tokenizer_ext/parser.c +5 -2
- data/ext/html_tokenizer_ext/parser.h +2 -0
- data/html_tokenizer.gemspec +1 -1
- data/lib/html_tokenizer.rb +3 -2
- data/test/unit/parser_test.rb +16 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf47920a7a89b9806ac9b1e830b6965e2ec0e98e
|
4
|
+
data.tar.gz: 97dbc221ee255a3d6a9d17f20e1807cdddec3eb3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9865b029658f9ad0186863319ecbb9a9d62f7de43b7763c91f455848c79cfd0fed2008f5f86d7b79b5434a97b547464716e5a9ebb67ac6248ceec5b8c85cd4a
|
7
|
+
data.tar.gz: 0c092d6501187b4eff8d0981831b3df3a25b957516c138b7989ae57c92c139ade5952a90df7f80ed928252c724de9cc2bc37d0921540d4da0d1bd13faf6462aa
|
data/Gemfile.lock
CHANGED
@@ -82,6 +82,8 @@ static void parser_add_error(struct parser_t *parser, const char *message)
|
|
82
82
|
{
|
83
83
|
REALLOC_N(parser->errors, struct parser_document_error_t, parser->errors_count + 1);
|
84
84
|
parser->errors[parser->errors_count].message = strdup(message);
|
85
|
+
parser->errors[parser->errors_count].pos = parser->tk.scan.cursor;
|
86
|
+
parser->errors[parser->errors_count].mb_pos = parser->tk.scan.mb_cursor;
|
85
87
|
parser->errors[parser->errors_count].line_number = parser->doc.line_number;
|
86
88
|
parser->errors[parser->errors_count].column_number = parser->doc.column_number;
|
87
89
|
parser->errors_count += 1;
|
@@ -723,12 +725,13 @@ static VALUE create_parser_error(struct parser_document_error_t *error)
|
|
723
725
|
{
|
724
726
|
VALUE module = rb_const_get(rb_cObject, rb_intern("HtmlTokenizer"));
|
725
727
|
VALUE klass = rb_const_get(module, rb_intern("ParserError"));
|
726
|
-
VALUE args[
|
728
|
+
VALUE args[4] = {
|
727
729
|
rb_str_new2(error->message),
|
730
|
+
ULONG2NUM(error->mb_pos),
|
728
731
|
ULONG2NUM(error->line_number),
|
729
732
|
ULONG2NUM(error->column_number),
|
730
733
|
};
|
731
|
-
return rb_class_new_instance(
|
734
|
+
return rb_class_new_instance(4, args, klass);
|
732
735
|
}
|
733
736
|
|
734
737
|
static VALUE parser_errors_method(VALUE self, VALUE error_p)
|
data/html_tokenizer.gemspec
CHANGED
data/lib/html_tokenizer.rb
CHANGED
@@ -2,9 +2,10 @@ require 'html_tokenizer_ext'
|
|
2
2
|
|
3
3
|
module HtmlTokenizer
|
4
4
|
class ParserError < RuntimeError
|
5
|
-
attr_reader :line, :column
|
6
|
-
def initialize(message, line, column)
|
5
|
+
attr_reader :position, :line, :column
|
6
|
+
def initialize(message, position, line, column)
|
7
7
|
super(message)
|
8
|
+
@position = position
|
8
9
|
@line = line
|
9
10
|
@column = column
|
10
11
|
end
|
data/test/unit/parser_test.rb
CHANGED
@@ -437,6 +437,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
437
437
|
parse('<>')
|
438
438
|
assert_equal 1, @parser.errors_count
|
439
439
|
assert_equal "expected '/' or tag name", @parser.errors.first.to_s
|
440
|
+
assert_equal 1, @parser.errors.first.position
|
440
441
|
assert_equal 1, @parser.errors.first.line
|
441
442
|
assert_equal 1, @parser.errors.first.column
|
442
443
|
end
|
@@ -445,6 +446,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
445
446
|
parse('< ')
|
446
447
|
assert_equal 1, @parser.errors_count
|
447
448
|
assert_equal "expected '/' or tag name", @parser.errors.first.to_s
|
449
|
+
assert_equal 1, @parser.errors.first.position
|
448
450
|
assert_equal 1, @parser.errors.first.line
|
449
451
|
assert_equal 1, @parser.errors.first.column
|
450
452
|
end
|
@@ -453,6 +455,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
453
455
|
parse('<foo =')
|
454
456
|
assert_equal 1, @parser.errors_count
|
455
457
|
assert_equal "expected whitespace, '>', attribute name or value", @parser.errors.first.to_s
|
458
|
+
assert_equal 5, @parser.errors.first.position
|
456
459
|
assert_equal 1, @parser.errors.first.line
|
457
460
|
assert_equal 5, @parser.errors.first.column
|
458
461
|
end
|
@@ -461,6 +464,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
461
464
|
parse('<foo /x')
|
462
465
|
assert_equal 1, @parser.errors_count
|
463
466
|
assert_equal "expected '>' after '/'", @parser.errors.first.to_s
|
467
|
+
assert_equal 6, @parser.errors.first.position
|
464
468
|
assert_equal 1, @parser.errors.first.line
|
465
469
|
assert_equal 6, @parser.errors.first.column
|
466
470
|
end
|
@@ -469,6 +473,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
469
473
|
parse('<foo / ')
|
470
474
|
assert_equal 1, @parser.errors_count
|
471
475
|
assert_equal "expected '>' after '/'", @parser.errors.first.to_s
|
476
|
+
assert_equal 6, @parser.errors.first.position
|
472
477
|
assert_equal 1, @parser.errors.first.line
|
473
478
|
assert_equal 6, @parser.errors.first.column
|
474
479
|
end
|
@@ -476,29 +481,33 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
476
481
|
def test_attribute_name_error
|
477
482
|
parse('<foo bar~')
|
478
483
|
assert_equal 2, @parser.errors_count
|
479
|
-
assert_equal "expected whitespace, '>' or '=' after attribute name", @parser.errors.first.to_s
|
480
|
-
assert_equal 1, @parser.errors.first.line
|
481
|
-
assert_equal 8, @parser.errors.first.column
|
482
484
|
assert_equal "expected whitespace, '>' or '=' after attribute name", @parser.errors[0].to_s
|
485
|
+
assert_equal 8, @parser.errors.first.position
|
483
486
|
assert_equal 1, @parser.errors[0].line
|
484
487
|
assert_equal 8, @parser.errors[0].column
|
488
|
+
assert_equal "expected whitespace, '>', attribute name or value", @parser.errors[1].to_s
|
489
|
+
assert_equal 8, @parser.errors.first.position
|
490
|
+
assert_equal 1, @parser.errors[1].line
|
491
|
+
assert_equal 8, @parser.errors[1].column
|
485
492
|
end
|
486
493
|
|
487
494
|
def test_attribute_whitespace_or_equal_error
|
488
495
|
parse('<foo bar ~')
|
489
496
|
assert_equal 2, @parser.errors_count
|
490
|
-
assert_equal "expected '/', '>', \", ' or '=' after attribute name", @parser.errors.first.to_s
|
491
|
-
assert_equal 1, @parser.errors.first.line
|
492
|
-
assert_equal 9, @parser.errors.first.column
|
493
497
|
assert_equal "expected '/', '>', \", ' or '=' after attribute name", @parser.errors[0].to_s
|
494
498
|
assert_equal 1, @parser.errors[0].line
|
495
499
|
assert_equal 9, @parser.errors[0].column
|
500
|
+
assert_equal "expected whitespace, '>', attribute name or value", @parser.errors[1].to_s
|
501
|
+
assert_equal 9, @parser.errors.first.position
|
502
|
+
assert_equal 1, @parser.errors[1].line
|
503
|
+
assert_equal 9, @parser.errors[1].column
|
496
504
|
end
|
497
505
|
|
498
506
|
def test_attribute_whitespace_or_equal_error_2
|
499
507
|
parse('<foo bar = >')
|
500
508
|
assert_equal 1, @parser.errors_count
|
501
509
|
assert_equal "expected attribute value after '='", @parser.errors.first.to_s
|
510
|
+
assert_equal 11, @parser.errors.first.position
|
502
511
|
assert_equal 1, @parser.errors.first.line
|
503
512
|
assert_equal 11, @parser.errors.first.column
|
504
513
|
end
|
@@ -507,6 +516,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
507
516
|
parse('<foo bar=""x')
|
508
517
|
assert_equal 1, @parser.errors_count
|
509
518
|
assert_equal "expected space after attribute value", @parser.errors.first.to_s
|
519
|
+
assert_equal 11, @parser.errors.first.position
|
510
520
|
assert_equal 1, @parser.errors.first.line
|
511
521
|
assert_equal 11, @parser.errors.first.column
|
512
522
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html_tokenizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francois Chagnon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -100,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
102
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.6.
|
103
|
+
rubygems_version: 2.6.14
|
104
104
|
signing_key:
|
105
105
|
specification_version: 4
|
106
106
|
summary: HTML Tokenizer
|