html_tokenizer 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a45a481e7310c22092c48de49f62315d7ae19700
4
- data.tar.gz: fda14319ce3a8b6770c074b68384e3c7e16d3fa7
3
+ metadata.gz: cf47920a7a89b9806ac9b1e830b6965e2ec0e98e
4
+ data.tar.gz: 97dbc221ee255a3d6a9d17f20e1807cdddec3eb3
5
5
  SHA512:
6
- metadata.gz: b55f2ae076aa4cbf3b55fca42435bcfa63ec4e9e032a91ad55260cf0b6cc74ae75b484b0c7ad70cb08a6bf3fe75d9499feec9df768f85fdac2fdfe29cc8262ca
7
- data.tar.gz: 8e3726c0471524c66270d8ed187a807b5c6bf906312e754c69e0e4da915db46ee28729f0400347413ddaf147c48a21fd4d628454e01f09eec2a104ace6452d17
6
+ metadata.gz: e9865b029658f9ad0186863319ecbb9a9d62f7de43b7763c91f455848c79cfd0fed2008f5f86d7b79b5434a97b547464716e5a9ebb67ac6248ceec5b8c85cd4a
7
+ data.tar.gz: 0c092d6501187b4eff8d0981831b3df3a25b957516c138b7989ae57c92c139ade5952a90df7f80ed928252c724de9cc2bc37d0921540d4da0d1bd13faf6462aa
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html_tokenizer (0.0.5)
4
+ html_tokenizer (0.0.6)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -82,6 +82,8 @@ static void parser_add_error(struct parser_t *parser, const char *message)
82
82
  {
83
83
  REALLOC_N(parser->errors, struct parser_document_error_t, parser->errors_count + 1);
84
84
  parser->errors[parser->errors_count].message = strdup(message);
85
+ parser->errors[parser->errors_count].pos = parser->tk.scan.cursor;
86
+ parser->errors[parser->errors_count].mb_pos = parser->tk.scan.mb_cursor;
85
87
  parser->errors[parser->errors_count].line_number = parser->doc.line_number;
86
88
  parser->errors[parser->errors_count].column_number = parser->doc.column_number;
87
89
  parser->errors_count += 1;
@@ -723,12 +725,13 @@ static VALUE create_parser_error(struct parser_document_error_t *error)
723
725
  {
724
726
  VALUE module = rb_const_get(rb_cObject, rb_intern("HtmlTokenizer"));
725
727
  VALUE klass = rb_const_get(module, rb_intern("ParserError"));
726
- VALUE args[3] = {
728
+ VALUE args[4] = {
727
729
  rb_str_new2(error->message),
730
+ ULONG2NUM(error->mb_pos),
728
731
  ULONG2NUM(error->line_number),
729
732
  ULONG2NUM(error->column_number),
730
733
  };
731
- return rb_class_new_instance(3, args, klass);
734
+ return rb_class_new_instance(4, args, klass);
732
735
  }
733
736
 
734
737
  static VALUE parser_errors_method(VALUE self, VALUE error_p)
@@ -19,6 +19,8 @@ enum parser_context {
19
19
 
20
20
  struct parser_document_error_t {
21
21
  char *message;
22
+ long unsigned int pos;
23
+ long unsigned int mb_pos;
22
24
  long unsigned int line_number;
23
25
  long unsigned int column_number;
24
26
  };
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "html_tokenizer"
3
- spec.version = "0.0.5"
3
+ spec.version = "0.0.6"
4
4
  spec.summary = "HTML Tokenizer"
5
5
  spec.author = "Francois Chagnon"
6
6
 
@@ -2,9 +2,10 @@ require 'html_tokenizer_ext'
2
2
 
3
3
  module HtmlTokenizer
4
4
  class ParserError < RuntimeError
5
- attr_reader :line, :column
6
- def initialize(message, line, column)
5
+ attr_reader :position, :line, :column
6
+ def initialize(message, position, line, column)
7
7
  super(message)
8
+ @position = position
8
9
  @line = line
9
10
  @column = column
10
11
  end
@@ -437,6 +437,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
437
437
  parse('<>')
438
438
  assert_equal 1, @parser.errors_count
439
439
  assert_equal "expected '/' or tag name", @parser.errors.first.to_s
440
+ assert_equal 1, @parser.errors.first.position
440
441
  assert_equal 1, @parser.errors.first.line
441
442
  assert_equal 1, @parser.errors.first.column
442
443
  end
@@ -445,6 +446,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
445
446
  parse('< ')
446
447
  assert_equal 1, @parser.errors_count
447
448
  assert_equal "expected '/' or tag name", @parser.errors.first.to_s
449
+ assert_equal 1, @parser.errors.first.position
448
450
  assert_equal 1, @parser.errors.first.line
449
451
  assert_equal 1, @parser.errors.first.column
450
452
  end
@@ -453,6 +455,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
453
455
  parse('<foo =')
454
456
  assert_equal 1, @parser.errors_count
455
457
  assert_equal "expected whitespace, '>', attribute name or value", @parser.errors.first.to_s
458
+ assert_equal 5, @parser.errors.first.position
456
459
  assert_equal 1, @parser.errors.first.line
457
460
  assert_equal 5, @parser.errors.first.column
458
461
  end
@@ -461,6 +464,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
461
464
  parse('<foo /x')
462
465
  assert_equal 1, @parser.errors_count
463
466
  assert_equal "expected '>' after '/'", @parser.errors.first.to_s
467
+ assert_equal 6, @parser.errors.first.position
464
468
  assert_equal 1, @parser.errors.first.line
465
469
  assert_equal 6, @parser.errors.first.column
466
470
  end
@@ -469,6 +473,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
469
473
  parse('<foo / ')
470
474
  assert_equal 1, @parser.errors_count
471
475
  assert_equal "expected '>' after '/'", @parser.errors.first.to_s
476
+ assert_equal 6, @parser.errors.first.position
472
477
  assert_equal 1, @parser.errors.first.line
473
478
  assert_equal 6, @parser.errors.first.column
474
479
  end
@@ -476,29 +481,33 @@ class HtmlTokenizer::ParserTest < Minitest::Test
476
481
  def test_attribute_name_error
477
482
  parse('<foo bar~')
478
483
  assert_equal 2, @parser.errors_count
479
- assert_equal "expected whitespace, '>' or '=' after attribute name", @parser.errors.first.to_s
480
- assert_equal 1, @parser.errors.first.line
481
- assert_equal 8, @parser.errors.first.column
482
484
  assert_equal "expected whitespace, '>' or '=' after attribute name", @parser.errors[0].to_s
485
+ assert_equal 8, @parser.errors.first.position
483
486
  assert_equal 1, @parser.errors[0].line
484
487
  assert_equal 8, @parser.errors[0].column
488
+ assert_equal "expected whitespace, '>', attribute name or value", @parser.errors[1].to_s
489
+ assert_equal 8, @parser.errors.first.position
490
+ assert_equal 1, @parser.errors[1].line
491
+ assert_equal 8, @parser.errors[1].column
485
492
  end
486
493
 
487
494
  def test_attribute_whitespace_or_equal_error
488
495
  parse('<foo bar ~')
489
496
  assert_equal 2, @parser.errors_count
490
- assert_equal "expected '/', '>', \", ' or '=' after attribute name", @parser.errors.first.to_s
491
- assert_equal 1, @parser.errors.first.line
492
- assert_equal 9, @parser.errors.first.column
493
497
  assert_equal "expected '/', '>', \", ' or '=' after attribute name", @parser.errors[0].to_s
494
498
  assert_equal 1, @parser.errors[0].line
495
499
  assert_equal 9, @parser.errors[0].column
500
+ assert_equal "expected whitespace, '>', attribute name or value", @parser.errors[1].to_s
501
+ assert_equal 9, @parser.errors.first.position
502
+ assert_equal 1, @parser.errors[1].line
503
+ assert_equal 9, @parser.errors[1].column
496
504
  end
497
505
 
498
506
  def test_attribute_whitespace_or_equal_error_2
499
507
  parse('<foo bar = >')
500
508
  assert_equal 1, @parser.errors_count
501
509
  assert_equal "expected attribute value after '='", @parser.errors.first.to_s
510
+ assert_equal 11, @parser.errors.first.position
502
511
  assert_equal 1, @parser.errors.first.line
503
512
  assert_equal 11, @parser.errors.first.column
504
513
  end
@@ -507,6 +516,7 @@ class HtmlTokenizer::ParserTest < Minitest::Test
507
516
  parse('<foo bar=""x')
508
517
  assert_equal 1, @parser.errors_count
509
518
  assert_equal "expected space after attribute value", @parser.errors.first.to_s
519
+ assert_equal 11, @parser.errors.first.position
510
520
  assert_equal 1, @parser.errors.first.line
511
521
  assert_equal 11, @parser.errors.first.column
512
522
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html_tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francois Chagnon
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-23 00:00:00.000000000 Z
11
+ date: 2018-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -100,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
100
100
  version: '0'
101
101
  requirements: []
102
102
  rubyforge_project:
103
- rubygems_version: 2.6.8
103
+ rubygems_version: 2.6.14
104
104
  signing_key:
105
105
  specification_version: 4
106
106
  summary: HTML Tokenizer