html_tokenizer 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/html_tokenizer_ext/parser.c +5 -2
- data/ext/html_tokenizer_ext/parser.h +2 -0
- data/html_tokenizer.gemspec +1 -1
- data/lib/html_tokenizer.rb +3 -2
- data/test/unit/parser_test.rb +16 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf47920a7a89b9806ac9b1e830b6965e2ec0e98e
|
4
|
+
data.tar.gz: 97dbc221ee255a3d6a9d17f20e1807cdddec3eb3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9865b029658f9ad0186863319ecbb9a9d62f7de43b7763c91f455848c79cfd0fed2008f5f86d7b79b5434a97b547464716e5a9ebb67ac6248ceec5b8c85cd4a
|
7
|
+
data.tar.gz: 0c092d6501187b4eff8d0981831b3df3a25b957516c138b7989ae57c92c139ade5952a90df7f80ed928252c724de9cc2bc37d0921540d4da0d1bd13faf6462aa
|
data/Gemfile.lock
CHANGED
@@ -82,6 +82,8 @@ static void parser_add_error(struct parser_t *parser, const char *message)
|
|
82
82
|
{
|
83
83
|
REALLOC_N(parser->errors, struct parser_document_error_t, parser->errors_count + 1);
|
84
84
|
parser->errors[parser->errors_count].message = strdup(message);
|
85
|
+
parser->errors[parser->errors_count].pos = parser->tk.scan.cursor;
|
86
|
+
parser->errors[parser->errors_count].mb_pos = parser->tk.scan.mb_cursor;
|
85
87
|
parser->errors[parser->errors_count].line_number = parser->doc.line_number;
|
86
88
|
parser->errors[parser->errors_count].column_number = parser->doc.column_number;
|
87
89
|
parser->errors_count += 1;
|
@@ -723,12 +725,13 @@ static VALUE create_parser_error(struct parser_document_error_t *error)
|
|
723
725
|
{
|
724
726
|
VALUE module = rb_const_get(rb_cObject, rb_intern("HtmlTokenizer"));
|
725
727
|
VALUE klass = rb_const_get(module, rb_intern("ParserError"));
|
726
|
-
VALUE args[
|
728
|
+
VALUE args[4] = {
|
727
729
|
rb_str_new2(error->message),
|
730
|
+
ULONG2NUM(error->mb_pos),
|
728
731
|
ULONG2NUM(error->line_number),
|
729
732
|
ULONG2NUM(error->column_number),
|
730
733
|
};
|
731
|
-
return rb_class_new_instance(
|
734
|
+
return rb_class_new_instance(4, args, klass);
|
732
735
|
}
|
733
736
|
|
734
737
|
static VALUE parser_errors_method(VALUE self, VALUE error_p)
|
data/html_tokenizer.gemspec
CHANGED
data/lib/html_tokenizer.rb
CHANGED
@@ -2,9 +2,10 @@ require 'html_tokenizer_ext'
|
|
2
2
|
|
3
3
|
module HtmlTokenizer
|
4
4
|
class ParserError < RuntimeError
|
5
|
-
attr_reader :line, :column
|
6
|
-
def initialize(message, line, column)
|
5
|
+
attr_reader :position, :line, :column
|
6
|
+
def initialize(message, position, line, column)
|
7
7
|
super(message)
|
8
|
+
@position = position
|
8
9
|
@line = line
|
9
10
|
@column = column
|
10
11
|
end
|
data/test/unit/parser_test.rb
CHANGED
@@ -437,6 +437,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
437
437
|
parse('<>')
|
438
438
|
assert_equal 1, @parser.errors_count
|
439
439
|
assert_equal "expected '/' or tag name", @parser.errors.first.to_s
|
440
|
+
assert_equal 1, @parser.errors.first.position
|
440
441
|
assert_equal 1, @parser.errors.first.line
|
441
442
|
assert_equal 1, @parser.errors.first.column
|
442
443
|
end
|
@@ -445,6 +446,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
445
446
|
parse('< ')
|
446
447
|
assert_equal 1, @parser.errors_count
|
447
448
|
assert_equal "expected '/' or tag name", @parser.errors.first.to_s
|
449
|
+
assert_equal 1, @parser.errors.first.position
|
448
450
|
assert_equal 1, @parser.errors.first.line
|
449
451
|
assert_equal 1, @parser.errors.first.column
|
450
452
|
end
|
@@ -453,6 +455,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
453
455
|
parse('<foo =')
|
454
456
|
assert_equal 1, @parser.errors_count
|
455
457
|
assert_equal "expected whitespace, '>', attribute name or value", @parser.errors.first.to_s
|
458
|
+
assert_equal 5, @parser.errors.first.position
|
456
459
|
assert_equal 1, @parser.errors.first.line
|
457
460
|
assert_equal 5, @parser.errors.first.column
|
458
461
|
end
|
@@ -461,6 +464,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
461
464
|
parse('<foo /x')
|
462
465
|
assert_equal 1, @parser.errors_count
|
463
466
|
assert_equal "expected '>' after '/'", @parser.errors.first.to_s
|
467
|
+
assert_equal 6, @parser.errors.first.position
|
464
468
|
assert_equal 1, @parser.errors.first.line
|
465
469
|
assert_equal 6, @parser.errors.first.column
|
466
470
|
end
|
@@ -469,6 +473,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
469
473
|
parse('<foo / ')
|
470
474
|
assert_equal 1, @parser.errors_count
|
471
475
|
assert_equal "expected '>' after '/'", @parser.errors.first.to_s
|
476
|
+
assert_equal 6, @parser.errors.first.position
|
472
477
|
assert_equal 1, @parser.errors.first.line
|
473
478
|
assert_equal 6, @parser.errors.first.column
|
474
479
|
end
|
@@ -476,29 +481,33 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
476
481
|
def test_attribute_name_error
|
477
482
|
parse('<foo bar~')
|
478
483
|
assert_equal 2, @parser.errors_count
|
479
|
-
assert_equal "expected whitespace, '>' or '=' after attribute name", @parser.errors.first.to_s
|
480
|
-
assert_equal 1, @parser.errors.first.line
|
481
|
-
assert_equal 8, @parser.errors.first.column
|
482
484
|
assert_equal "expected whitespace, '>' or '=' after attribute name", @parser.errors[0].to_s
|
485
|
+
assert_equal 8, @parser.errors.first.position
|
483
486
|
assert_equal 1, @parser.errors[0].line
|
484
487
|
assert_equal 8, @parser.errors[0].column
|
488
|
+
assert_equal "expected whitespace, '>', attribute name or value", @parser.errors[1].to_s
|
489
|
+
assert_equal 8, @parser.errors.first.position
|
490
|
+
assert_equal 1, @parser.errors[1].line
|
491
|
+
assert_equal 8, @parser.errors[1].column
|
485
492
|
end
|
486
493
|
|
487
494
|
def test_attribute_whitespace_or_equal_error
|
488
495
|
parse('<foo bar ~')
|
489
496
|
assert_equal 2, @parser.errors_count
|
490
|
-
assert_equal "expected '/', '>', \", ' or '=' after attribute name", @parser.errors.first.to_s
|
491
|
-
assert_equal 1, @parser.errors.first.line
|
492
|
-
assert_equal 9, @parser.errors.first.column
|
493
497
|
assert_equal "expected '/', '>', \", ' or '=' after attribute name", @parser.errors[0].to_s
|
494
498
|
assert_equal 1, @parser.errors[0].line
|
495
499
|
assert_equal 9, @parser.errors[0].column
|
500
|
+
assert_equal "expected whitespace, '>', attribute name or value", @parser.errors[1].to_s
|
501
|
+
assert_equal 9, @parser.errors.first.position
|
502
|
+
assert_equal 1, @parser.errors[1].line
|
503
|
+
assert_equal 9, @parser.errors[1].column
|
496
504
|
end
|
497
505
|
|
498
506
|
def test_attribute_whitespace_or_equal_error_2
|
499
507
|
parse('<foo bar = >')
|
500
508
|
assert_equal 1, @parser.errors_count
|
501
509
|
assert_equal "expected attribute value after '='", @parser.errors.first.to_s
|
510
|
+
assert_equal 11, @parser.errors.first.position
|
502
511
|
assert_equal 1, @parser.errors.first.line
|
503
512
|
assert_equal 11, @parser.errors.first.column
|
504
513
|
end
|
@@ -507,6 +516,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
507
516
|
parse('<foo bar=""x')
|
508
517
|
assert_equal 1, @parser.errors_count
|
509
518
|
assert_equal "expected space after attribute value", @parser.errors.first.to_s
|
519
|
+
assert_equal 11, @parser.errors.first.position
|
510
520
|
assert_equal 1, @parser.errors.first.line
|
511
521
|
assert_equal 11, @parser.errors.first.column
|
512
522
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html_tokenizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francois Chagnon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -100,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
102
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.6.
|
103
|
+
rubygems_version: 2.6.14
|
104
104
|
signing_key:
|
105
105
|
specification_version: 4
|
106
106
|
summary: HTML Tokenizer
|