html_tokenizer 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/ext/html_tokenizer_ext/parser.c +56 -34
- data/ext/html_tokenizer_ext/parser.h +4 -0
- data/ext/html_tokenizer_ext/tokenizer.c +16 -1
- data/ext/html_tokenizer_ext/tokenizer.h +3 -0
- data/html_tokenizer.gemspec +1 -1
- data/test/unit/parser_test.rb +23 -23
- data/test/unit/tokenizer_test.rb +22 -1
- metadata +13 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e620e43f97a82c4cb3aae2067a2666325b453a5
|
4
|
+
data.tar.gz: 57784c1b53c4faefe2ab3b6e222836bfad852d8c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b7a26cc219ea9f5885999146015e8137264ee51b43e5e6929d377cc2447ca6d4c1d5804e32954c24b2b807a19edcaf1f9cc708e7b5a0f83b086e16a21c8fa3e
|
7
|
+
data.tar.gz: 46beb0ed1994fe7468ab89451de1af830251da73d15c5d0943d30b70080806a458a7531a9fb7df2d7640e5ba1a195706eb84bf8896ef9184a207a1477747c200
|
data/Gemfile.lock
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
2
3
|
#include "html_tokenizer.h"
|
3
4
|
#include "parser.h"
|
4
5
|
|
@@ -65,6 +66,7 @@ static inline void parser_append_ref(struct token_reference_t *dest, struct toke
|
|
65
66
|
if(dest->type == TOKEN_NONE || dest->type != src->type || (dest->start + dest->length) != src->start) {
|
66
67
|
dest->type = src->type;
|
67
68
|
dest->start = src->start;
|
69
|
+
dest->mb_start = src->mb_start;
|
68
70
|
dest->length = src->length;
|
69
71
|
dest->line_number = src->line_number;
|
70
72
|
dest->column_number = src->column_number;
|
@@ -362,15 +364,21 @@ static inline int rawtext_context(struct parser_t *parser)
|
|
362
364
|
|
363
365
|
static void parser_adjust_line_number(struct parser_t *parser, long unsigned int start, long unsigned int length)
|
364
366
|
{
|
367
|
+
rb_encoding *enc = rb_enc_from_index(parser->doc.enc_index);
|
365
368
|
long unsigned int i;
|
369
|
+
const char *buf, *nextlf;
|
366
370
|
|
367
|
-
for(i =
|
368
|
-
|
371
|
+
for(i = 0; i < length;) {
|
372
|
+
buf = &parser->doc.data[start + i];
|
373
|
+
nextlf = memchr(buf, '\n', length - i);
|
374
|
+
if(nextlf) {
|
369
375
|
parser->doc.column_number = 0;
|
370
376
|
parser->doc.line_number += 1;
|
377
|
+
i += (nextlf - buf) + 1;
|
371
378
|
}
|
372
379
|
else {
|
373
|
-
parser->doc.column_number +=
|
380
|
+
parser->doc.column_number += rb_enc_strlen(buf, buf + length - i, enc);
|
381
|
+
break;
|
374
382
|
}
|
375
383
|
}
|
376
384
|
|
@@ -383,11 +391,14 @@ static void parser_tokenize_callback(struct tokenizer_t *tk, enum token_type typ
|
|
383
391
|
struct token_reference_t ref = {
|
384
392
|
.type = type,
|
385
393
|
.start = tk->scan.cursor,
|
394
|
+
.mb_start = tk->scan.mb_cursor,
|
386
395
|
.length = length,
|
387
396
|
.line_number = parser->doc.line_number,
|
388
397
|
.column_number = parser->doc.column_number,
|
389
398
|
};
|
390
399
|
int parse_again = 1;
|
400
|
+
long unsigned int mb_strlen;
|
401
|
+
rb_encoding *enc;
|
391
402
|
|
392
403
|
while(parse_again) {
|
393
404
|
switch(parser->context)
|
@@ -438,8 +449,10 @@ static void parser_tokenize_callback(struct tokenizer_t *tk, enum token_type typ
|
|
438
449
|
}
|
439
450
|
|
440
451
|
if(rb_block_given_p()) {
|
452
|
+
enc = rb_enc_from_index(parser->doc.enc_index);
|
453
|
+
mb_strlen = rb_enc_strlen(parser->doc.data + ref.start, parser->doc.data + ref.start + ref.length, enc);
|
441
454
|
rb_yield_values(5, token_type_to_symbol(type),
|
442
|
-
INT2NUM(ref.
|
455
|
+
INT2NUM(ref.mb_start), INT2NUM(ref.mb_start + mb_strlen),
|
443
456
|
INT2NUM(ref.line_number), INT2NUM(ref.column_number));
|
444
457
|
}
|
445
458
|
|
@@ -465,6 +478,8 @@ static VALUE parser_initialize_method(VALUE self)
|
|
465
478
|
|
466
479
|
parser->doc.length = 0;
|
467
480
|
parser->doc.data = NULL;
|
481
|
+
parser->doc.enc_index = 0;
|
482
|
+
parser->doc.mb_length = 0;
|
468
483
|
|
469
484
|
parser->doc.line_number = 1;
|
470
485
|
parser->doc.column_number = 0;
|
@@ -478,11 +493,17 @@ static VALUE parser_initialize_method(VALUE self)
|
|
478
493
|
static int parser_document_append(struct parser_t *parser, const char *string, unsigned long int length)
|
479
494
|
{
|
480
495
|
void *old = parser->doc.data;
|
496
|
+
unsigned long int mb_length;
|
497
|
+
char *buf;
|
498
|
+
rb_encoding *enc = rb_enc_from_index(parser->doc.enc_index);
|
481
499
|
REALLOC_N(parser->doc.data, char, parser->doc.length + length + 1);
|
482
500
|
DBG_PRINT("parser=%p realloc(parser->doc.data) %p -> %p length=%lu", parser, old,
|
483
|
-
parser->doc.data,
|
484
|
-
|
501
|
+
parser->doc.data, parser->doc.length + length + 1);
|
502
|
+
buf = parser->doc.data + parser->doc.length;
|
503
|
+
strcpy(buf, string);
|
504
|
+
mb_length = rb_enc_strlen(buf, buf + length, enc);
|
485
505
|
parser->doc.length += length;
|
506
|
+
parser->doc.mb_length += mb_length;
|
486
507
|
return 1;
|
487
508
|
}
|
488
509
|
|
@@ -490,7 +511,7 @@ static VALUE parser_append_data(VALUE self, VALUE source, int is_placeholder)
|
|
490
511
|
{
|
491
512
|
struct parser_t *parser = NULL;
|
492
513
|
char *string = NULL;
|
493
|
-
long unsigned int length = 0, cursor = 0;
|
514
|
+
long unsigned int length = 0, cursor = 0, mb_cursor = 0;
|
494
515
|
|
495
516
|
if(NIL_P(source))
|
496
517
|
return Qnil;
|
@@ -502,6 +523,15 @@ static VALUE parser_append_data(VALUE self, VALUE source, int is_placeholder)
|
|
502
523
|
length = strlen(string);
|
503
524
|
|
504
525
|
cursor = parser->doc.length;
|
526
|
+
mb_cursor = parser->doc.mb_length;
|
527
|
+
|
528
|
+
if(parser->doc.data == NULL) {
|
529
|
+
parser->doc.enc_index = rb_enc_get_index(source);
|
530
|
+
}
|
531
|
+
else if(parser->doc.enc_index != rb_enc_get_index(source)) {
|
532
|
+
rb_raise(rb_eArgError, "cannot append %s string to %s document",
|
533
|
+
rb_enc_name(rb_enc_get(source)), rb_enc_name(rb_enc_from_index(parser->doc.enc_index)));
|
534
|
+
}
|
505
535
|
|
506
536
|
if(!parser_document_append(parser, string, length)) {
|
507
537
|
// error
|
@@ -515,6 +545,8 @@ static VALUE parser_append_data(VALUE self, VALUE source, int is_placeholder)
|
|
515
545
|
parser->tk.scan.cursor = cursor;
|
516
546
|
parser->tk.scan.string = parser->doc.data;
|
517
547
|
parser->tk.scan.length = parser->doc.length;
|
548
|
+
parser->tk.scan.enc_index = parser->doc.enc_index;
|
549
|
+
parser->tk.scan.mb_cursor = mb_cursor;
|
518
550
|
|
519
551
|
tokenizer_scan_all(&parser->tk);
|
520
552
|
}
|
@@ -535,17 +567,30 @@ static VALUE parser_append_placeholder_method(VALUE self, VALUE source)
|
|
535
567
|
static VALUE parser_document_method(VALUE self)
|
536
568
|
{
|
537
569
|
struct parser_t *parser = NULL;
|
570
|
+
rb_encoding *enc;
|
538
571
|
Parser_Get_Struct(self, parser);
|
539
572
|
if(!parser->doc.data)
|
540
573
|
return Qnil;
|
541
|
-
|
574
|
+
enc = rb_enc_from_index(parser->doc.enc_index);
|
575
|
+
return rb_enc_str_new(parser->doc.data, parser->doc.length, enc);
|
542
576
|
}
|
543
577
|
|
544
578
|
static VALUE parser_document_length_method(VALUE self)
|
545
579
|
{
|
546
580
|
struct parser_t *parser = NULL;
|
581
|
+
rb_encoding *enc;
|
582
|
+
const char *buf;
|
583
|
+
|
547
584
|
Parser_Get_Struct(self, parser);
|
548
|
-
|
585
|
+
|
586
|
+
if(parser->doc.data == NULL) {
|
587
|
+
return ULONG2NUM(0);
|
588
|
+
}
|
589
|
+
else {
|
590
|
+
buf = parser->doc.data;
|
591
|
+
enc = rb_enc_from_index(parser->doc.enc_index);
|
592
|
+
return ULONG2NUM(rb_enc_strlen(buf, buf + parser->doc.length, enc));
|
593
|
+
}
|
549
594
|
}
|
550
595
|
|
551
596
|
static VALUE parser_context_method(VALUE self)
|
@@ -588,9 +633,10 @@ static VALUE parser_context_method(VALUE self)
|
|
588
633
|
|
589
634
|
static inline VALUE ref_to_str(struct parser_t *parser, struct token_reference_t *ref)
|
590
635
|
{
|
636
|
+
rb_encoding *enc = rb_enc_from_index(parser->doc.enc_index);
|
591
637
|
if(ref->type == TOKEN_NONE || parser->doc.data == NULL)
|
592
638
|
return Qnil;
|
593
|
-
return
|
639
|
+
return rb_enc_str_new(parser->doc.data+ref->start, ref->length, enc);
|
594
640
|
}
|
595
641
|
|
596
642
|
static VALUE parser_tag_name_method(VALUE self)
|
@@ -665,29 +711,6 @@ static VALUE parser_rawtext_text_method(VALUE self)
|
|
665
711
|
return ref_to_str(parser, &parser->rawtext.text);
|
666
712
|
}
|
667
713
|
|
668
|
-
static VALUE parser_extract_method(VALUE self, VALUE start_p, VALUE end_p)
|
669
|
-
{
|
670
|
-
struct parser_t *parser = NULL;
|
671
|
-
unsigned long int start, end;
|
672
|
-
struct token_reference_t ref;
|
673
|
-
|
674
|
-
Parser_Get_Struct(self, parser);
|
675
|
-
|
676
|
-
start = NUM2ULONG(start_p);
|
677
|
-
end = NUM2ULONG(end_p);
|
678
|
-
if(end < start) {
|
679
|
-
rb_raise(rb_eArgError, "'end' must be greater or equal than 'start'");
|
680
|
-
}
|
681
|
-
if(end > parser->doc.length) {
|
682
|
-
rb_raise(rb_eArgError, "'end' argument not in range of document");
|
683
|
-
}
|
684
|
-
|
685
|
-
ref.type = TOKEN_TEXT; // anything not NONE
|
686
|
-
ref.start = start;
|
687
|
-
ref.length = end - start;
|
688
|
-
return ref_to_str(parser, &ref);
|
689
|
-
}
|
690
|
-
|
691
714
|
static VALUE parser_errors_count_method(VALUE self)
|
692
715
|
{
|
693
716
|
struct parser_t *parser = NULL;
|
@@ -749,7 +772,6 @@ void Init_html_tokenizer_parser(VALUE mHtmlTokenizer)
|
|
749
772
|
rb_define_method(cParser, "column_number", parser_column_number_method, 0);
|
750
773
|
rb_define_method(cParser, "parse", parser_parse_method, 1);
|
751
774
|
rb_define_method(cParser, "append_placeholder", parser_append_placeholder_method, 1);
|
752
|
-
rb_define_method(cParser, "extract", parser_extract_method, 2);
|
753
775
|
rb_define_method(cParser, "context", parser_context_method, 0);
|
754
776
|
rb_define_method(cParser, "tag_name", parser_tag_name_method, 0);
|
755
777
|
rb_define_method(cParser, "closing_tag?", parser_closing_tag_method, 0);
|
@@ -28,11 +28,15 @@ struct parser_document_t {
|
|
28
28
|
char *data;
|
29
29
|
long unsigned int line_number;
|
30
30
|
long unsigned int column_number;
|
31
|
+
|
32
|
+
int enc_index;
|
33
|
+
long unsigned int mb_length;
|
31
34
|
};
|
32
35
|
|
33
36
|
struct token_reference_t {
|
34
37
|
enum token_type type;
|
35
38
|
long unsigned int start;
|
39
|
+
long unsigned int mb_start;
|
36
40
|
long unsigned int length;
|
37
41
|
long unsigned int line_number;
|
38
42
|
long unsigned int column_number;
|
@@ -1,4 +1,5 @@
|
|
1
1
|
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
2
3
|
#include "html_tokenizer.h"
|
3
4
|
#include "tokenizer.h"
|
4
5
|
|
@@ -60,6 +61,8 @@ void tokenizer_init(struct tokenizer_t *tk)
|
|
60
61
|
tk->scan.string = NULL;
|
61
62
|
tk->scan.cursor = 0;
|
62
63
|
tk->scan.length = 0;
|
64
|
+
tk->scan.mb_cursor = 0;
|
65
|
+
tk->scan.enc_index = 0;
|
63
66
|
|
64
67
|
tk->attribute_value_start = 0;
|
65
68
|
tk->found_attribute = 0;
|
@@ -115,17 +118,27 @@ VALUE token_type_to_symbol(enum token_type type)
|
|
115
118
|
return Qnil;
|
116
119
|
}
|
117
120
|
|
121
|
+
static long unsigned int tokenizer_mblength(struct tokenizer_t *tk, long unsigned int length)
|
122
|
+
{
|
123
|
+
rb_encoding *enc = rb_enc_from_index(tk->scan.enc_index);
|
124
|
+
const char *buf = tk->scan.string + tk->scan.cursor;
|
125
|
+
return rb_enc_strlen(buf, buf + length, enc);
|
126
|
+
}
|
127
|
+
|
118
128
|
static void tokenizer_yield_tag(struct tokenizer_t *tk, enum token_type type, long unsigned int length, void *data)
|
119
129
|
{
|
130
|
+
long unsigned int mb_length = tokenizer_mblength(tk, length);
|
120
131
|
tk->last_token = type;
|
121
|
-
rb_yield_values(3, token_type_to_symbol(type), INT2NUM(tk->scan.
|
132
|
+
rb_yield_values(3, token_type_to_symbol(type), INT2NUM(tk->scan.mb_cursor), INT2NUM(tk->scan.mb_cursor + mb_length));
|
122
133
|
}
|
123
134
|
|
124
135
|
static void tokenizer_callback(struct tokenizer_t *tk, enum token_type type, long unsigned int length)
|
125
136
|
{
|
137
|
+
long unsigned int mb_length = tokenizer_mblength(tk, length);
|
126
138
|
if(tk->f_callback)
|
127
139
|
tk->f_callback(tk, type, length, tk->callback_data);
|
128
140
|
tk->scan.cursor += length;
|
141
|
+
tk->scan.mb_cursor += mb_length;
|
129
142
|
}
|
130
143
|
|
131
144
|
static VALUE tokenizer_initialize_method(VALUE self)
|
@@ -657,6 +670,8 @@ static VALUE tokenizer_tokenize_method(VALUE self, VALUE source)
|
|
657
670
|
c_source = StringValueCStr(source);
|
658
671
|
tk->scan.cursor = 0;
|
659
672
|
tk->scan.length = strlen(c_source);
|
673
|
+
tk->scan.enc_index = rb_enc_get_index(source);
|
674
|
+
tk->scan.mb_cursor = 0;
|
660
675
|
|
661
676
|
old = tk->scan.string;
|
662
677
|
REALLOC_N(tk->scan.string, char, tk->scan.length+1);
|
data/html_tokenizer.gemspec
CHANGED
data/test/unit/parser_test.rb
CHANGED
@@ -431,29 +431,6 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
431
431
|
tokens << token
|
432
432
|
end
|
433
433
|
assert_equal [[:text, 0, 4, 1, 0], [:text, 34, 38, 5, 0]], tokens
|
434
|
-
assert_equal "bar\n", @parser.extract(34, 38)
|
435
|
-
end
|
436
|
-
|
437
|
-
def test_extract_method
|
438
|
-
parse("abcdefg")
|
439
|
-
assert_equal "a", @parser.extract(0, 1)
|
440
|
-
assert_equal "cd", @parser.extract(2, 4)
|
441
|
-
end
|
442
|
-
|
443
|
-
def test_extract_method_raises_argument_error_end_past_length
|
444
|
-
parse("abcdefg")
|
445
|
-
e = assert_raises(ArgumentError) do
|
446
|
-
@parser.extract(0, 32)
|
447
|
-
end
|
448
|
-
assert_equal "'end' argument not in range of document", e.message
|
449
|
-
end
|
450
|
-
|
451
|
-
def test_extract_method_raises_argument_error_end_less_than_start
|
452
|
-
parse("abcdefg")
|
453
|
-
e = assert_raises(ArgumentError) do
|
454
|
-
@parser.extract(1, 0)
|
455
|
-
end
|
456
|
-
assert_equal "'end' must be greater or equal than 'start'", e.message
|
457
434
|
end
|
458
435
|
|
459
436
|
def test_solidus_or_tag_name_error
|
@@ -534,6 +511,29 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
534
511
|
assert_equal 11, @parser.errors.first.column
|
535
512
|
end
|
536
513
|
|
514
|
+
def test_attribute_with_mutlibyte_characters
|
515
|
+
data = ["<div title", "='your store’s'>"]
|
516
|
+
tokens = []
|
517
|
+
parse(*data) { |name, start, stop| tokens << [name, start, stop, data.join[start...stop]] }
|
518
|
+
assert_equal "div", @parser.tag_name
|
519
|
+
assert_equal "title", @parser.attribute_name
|
520
|
+
assert_equal "your store’s", @parser.attribute_value
|
521
|
+
assert_equal data.join, @parser.document
|
522
|
+
assert_equal data.join.size, @parser.document_length
|
523
|
+
assert_equal data.join.size, @parser.column_number
|
524
|
+
assert_equal [
|
525
|
+
[:tag_start, 0, 1, "<"],
|
526
|
+
[:tag_name, 1, 4, "div"],
|
527
|
+
[:whitespace, 4, 5, " "],
|
528
|
+
[:attribute_name, 5, 10, "title"],
|
529
|
+
[:equal, 10, 11, "="],
|
530
|
+
[:attribute_quoted_value_start, 11, 12, "'"],
|
531
|
+
[:attribute_quoted_value, 12, 24, "your store’s"],
|
532
|
+
[:attribute_quoted_value_end, 24, 25, "'"],
|
533
|
+
[:tag_end, 25, 26, ">"],
|
534
|
+
], tokens
|
535
|
+
end
|
536
|
+
|
537
537
|
def test_valid_syntaxes
|
538
538
|
parse(
|
539
539
|
'<div>',
|
data/test/unit/tokenizer_test.rb
CHANGED
@@ -324,13 +324,34 @@ class HtmlTokenizer::TokenizerTest < Minitest::Test
|
|
324
324
|
], result
|
325
325
|
end
|
326
326
|
|
327
|
+
def test_html_with_mutlibyte_characters
|
328
|
+
data = "<div title='your store’s'>foo</div>"
|
329
|
+
result = tokenize(data)
|
330
|
+
assert_equal [
|
331
|
+
[:tag_start, "<"],
|
332
|
+
[:tag_name, "div"],
|
333
|
+
[:whitespace, " "],
|
334
|
+
[:attribute_name, "title"],
|
335
|
+
[:equal, "="],
|
336
|
+
[:attribute_quoted_value_start, "'"],
|
337
|
+
[:attribute_quoted_value, "your store’s"],
|
338
|
+
[:attribute_quoted_value_end, "'"],
|
339
|
+
[:tag_end, ">"],
|
340
|
+
[:text, "foo"],
|
341
|
+
[:tag_start, "<"],
|
342
|
+
[:solidus, "/"],
|
343
|
+
[:tag_name, "div"],
|
344
|
+
[:tag_end, ">"],
|
345
|
+
], result
|
346
|
+
end
|
347
|
+
|
327
348
|
private
|
328
349
|
|
329
350
|
def tokenize(*parts)
|
330
351
|
tokens = []
|
331
352
|
@tokenizer = HtmlTokenizer::Tokenizer.new
|
332
353
|
parts.each do |part|
|
333
|
-
@tokenizer.tokenize(part) { |name, start, stop| tokens << [name, part[start
|
354
|
+
@tokenizer.tokenize(part) { |name, start, stop| tokens << [name, part[start...stop]] }
|
334
355
|
end
|
335
356
|
tokens
|
336
357
|
end
|
metadata
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html_tokenizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francois Chagnon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake-compiler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - ~>
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - ~>
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - ~>
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - ~>
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
description:
|
@@ -60,8 +60,8 @@ extensions:
|
|
60
60
|
- ext/html_tokenizer_ext/extconf.rb
|
61
61
|
extra_rdoc_files: []
|
62
62
|
files:
|
63
|
-
- .autotest
|
64
|
-
- .gitignore
|
63
|
+
- ".autotest"
|
64
|
+
- ".gitignore"
|
65
65
|
- Gemfile
|
66
66
|
- Gemfile.lock
|
67
67
|
- LICENSE
|
@@ -90,17 +90,17 @@ require_paths:
|
|
90
90
|
- ext
|
91
91
|
required_ruby_version: !ruby/object:Gem::Requirement
|
92
92
|
requirements:
|
93
|
-
- -
|
93
|
+
- - ">="
|
94
94
|
- !ruby/object:Gem::Version
|
95
95
|
version: '0'
|
96
96
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
97
|
requirements:
|
98
|
-
- -
|
98
|
+
- - ">="
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
102
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.
|
103
|
+
rubygems_version: 2.6.8
|
104
104
|
signing_key:
|
105
105
|
specification_version: 4
|
106
106
|
summary: HTML Tokenizer
|