html_tokenizer 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/ext/html_tokenizer_ext/parser.c +56 -34
- data/ext/html_tokenizer_ext/parser.h +4 -0
- data/ext/html_tokenizer_ext/tokenizer.c +16 -1
- data/ext/html_tokenizer_ext/tokenizer.h +3 -0
- data/html_tokenizer.gemspec +1 -1
- data/test/unit/parser_test.rb +23 -23
- data/test/unit/tokenizer_test.rb +22 -1
- metadata +13 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e620e43f97a82c4cb3aae2067a2666325b453a5
|
4
|
+
data.tar.gz: 57784c1b53c4faefe2ab3b6e222836bfad852d8c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b7a26cc219ea9f5885999146015e8137264ee51b43e5e6929d377cc2447ca6d4c1d5804e32954c24b2b807a19edcaf1f9cc708e7b5a0f83b086e16a21c8fa3e
|
7
|
+
data.tar.gz: 46beb0ed1994fe7468ab89451de1af830251da73d15c5d0943d30b70080806a458a7531a9fb7df2d7640e5ba1a195706eb84bf8896ef9184a207a1477747c200
|
data/Gemfile.lock
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
2
3
|
#include "html_tokenizer.h"
|
3
4
|
#include "parser.h"
|
4
5
|
|
@@ -65,6 +66,7 @@ static inline void parser_append_ref(struct token_reference_t *dest, struct toke
|
|
65
66
|
if(dest->type == TOKEN_NONE || dest->type != src->type || (dest->start + dest->length) != src->start) {
|
66
67
|
dest->type = src->type;
|
67
68
|
dest->start = src->start;
|
69
|
+
dest->mb_start = src->mb_start;
|
68
70
|
dest->length = src->length;
|
69
71
|
dest->line_number = src->line_number;
|
70
72
|
dest->column_number = src->column_number;
|
@@ -362,15 +364,21 @@ static inline int rawtext_context(struct parser_t *parser)
|
|
362
364
|
|
363
365
|
static void parser_adjust_line_number(struct parser_t *parser, long unsigned int start, long unsigned int length)
|
364
366
|
{
|
367
|
+
rb_encoding *enc = rb_enc_from_index(parser->doc.enc_index);
|
365
368
|
long unsigned int i;
|
369
|
+
const char *buf, *nextlf;
|
366
370
|
|
367
|
-
for(i =
|
368
|
-
|
371
|
+
for(i = 0; i < length;) {
|
372
|
+
buf = &parser->doc.data[start + i];
|
373
|
+
nextlf = memchr(buf, '\n', length - i);
|
374
|
+
if(nextlf) {
|
369
375
|
parser->doc.column_number = 0;
|
370
376
|
parser->doc.line_number += 1;
|
377
|
+
i += (nextlf - buf) + 1;
|
371
378
|
}
|
372
379
|
else {
|
373
|
-
parser->doc.column_number +=
|
380
|
+
parser->doc.column_number += rb_enc_strlen(buf, buf + length - i, enc);
|
381
|
+
break;
|
374
382
|
}
|
375
383
|
}
|
376
384
|
|
@@ -383,11 +391,14 @@ static void parser_tokenize_callback(struct tokenizer_t *tk, enum token_type typ
|
|
383
391
|
struct token_reference_t ref = {
|
384
392
|
.type = type,
|
385
393
|
.start = tk->scan.cursor,
|
394
|
+
.mb_start = tk->scan.mb_cursor,
|
386
395
|
.length = length,
|
387
396
|
.line_number = parser->doc.line_number,
|
388
397
|
.column_number = parser->doc.column_number,
|
389
398
|
};
|
390
399
|
int parse_again = 1;
|
400
|
+
long unsigned int mb_strlen;
|
401
|
+
rb_encoding *enc;
|
391
402
|
|
392
403
|
while(parse_again) {
|
393
404
|
switch(parser->context)
|
@@ -438,8 +449,10 @@ static void parser_tokenize_callback(struct tokenizer_t *tk, enum token_type typ
|
|
438
449
|
}
|
439
450
|
|
440
451
|
if(rb_block_given_p()) {
|
452
|
+
enc = rb_enc_from_index(parser->doc.enc_index);
|
453
|
+
mb_strlen = rb_enc_strlen(parser->doc.data + ref.start, parser->doc.data + ref.start + ref.length, enc);
|
441
454
|
rb_yield_values(5, token_type_to_symbol(type),
|
442
|
-
INT2NUM(ref.
|
455
|
+
INT2NUM(ref.mb_start), INT2NUM(ref.mb_start + mb_strlen),
|
443
456
|
INT2NUM(ref.line_number), INT2NUM(ref.column_number));
|
444
457
|
}
|
445
458
|
|
@@ -465,6 +478,8 @@ static VALUE parser_initialize_method(VALUE self)
|
|
465
478
|
|
466
479
|
parser->doc.length = 0;
|
467
480
|
parser->doc.data = NULL;
|
481
|
+
parser->doc.enc_index = 0;
|
482
|
+
parser->doc.mb_length = 0;
|
468
483
|
|
469
484
|
parser->doc.line_number = 1;
|
470
485
|
parser->doc.column_number = 0;
|
@@ -478,11 +493,17 @@ static VALUE parser_initialize_method(VALUE self)
|
|
478
493
|
static int parser_document_append(struct parser_t *parser, const char *string, unsigned long int length)
|
479
494
|
{
|
480
495
|
void *old = parser->doc.data;
|
496
|
+
unsigned long int mb_length;
|
497
|
+
char *buf;
|
498
|
+
rb_encoding *enc = rb_enc_from_index(parser->doc.enc_index);
|
481
499
|
REALLOC_N(parser->doc.data, char, parser->doc.length + length + 1);
|
482
500
|
DBG_PRINT("parser=%p realloc(parser->doc.data) %p -> %p length=%lu", parser, old,
|
483
|
-
parser->doc.data,
|
484
|
-
|
501
|
+
parser->doc.data, parser->doc.length + length + 1);
|
502
|
+
buf = parser->doc.data + parser->doc.length;
|
503
|
+
strcpy(buf, string);
|
504
|
+
mb_length = rb_enc_strlen(buf, buf + length, enc);
|
485
505
|
parser->doc.length += length;
|
506
|
+
parser->doc.mb_length += mb_length;
|
486
507
|
return 1;
|
487
508
|
}
|
488
509
|
|
@@ -490,7 +511,7 @@ static VALUE parser_append_data(VALUE self, VALUE source, int is_placeholder)
|
|
490
511
|
{
|
491
512
|
struct parser_t *parser = NULL;
|
492
513
|
char *string = NULL;
|
493
|
-
long unsigned int length = 0, cursor = 0;
|
514
|
+
long unsigned int length = 0, cursor = 0, mb_cursor = 0;
|
494
515
|
|
495
516
|
if(NIL_P(source))
|
496
517
|
return Qnil;
|
@@ -502,6 +523,15 @@ static VALUE parser_append_data(VALUE self, VALUE source, int is_placeholder)
|
|
502
523
|
length = strlen(string);
|
503
524
|
|
504
525
|
cursor = parser->doc.length;
|
526
|
+
mb_cursor = parser->doc.mb_length;
|
527
|
+
|
528
|
+
if(parser->doc.data == NULL) {
|
529
|
+
parser->doc.enc_index = rb_enc_get_index(source);
|
530
|
+
}
|
531
|
+
else if(parser->doc.enc_index != rb_enc_get_index(source)) {
|
532
|
+
rb_raise(rb_eArgError, "cannot append %s string to %s document",
|
533
|
+
rb_enc_name(rb_enc_get(source)), rb_enc_name(rb_enc_from_index(parser->doc.enc_index)));
|
534
|
+
}
|
505
535
|
|
506
536
|
if(!parser_document_append(parser, string, length)) {
|
507
537
|
// error
|
@@ -515,6 +545,8 @@ static VALUE parser_append_data(VALUE self, VALUE source, int is_placeholder)
|
|
515
545
|
parser->tk.scan.cursor = cursor;
|
516
546
|
parser->tk.scan.string = parser->doc.data;
|
517
547
|
parser->tk.scan.length = parser->doc.length;
|
548
|
+
parser->tk.scan.enc_index = parser->doc.enc_index;
|
549
|
+
parser->tk.scan.mb_cursor = mb_cursor;
|
518
550
|
|
519
551
|
tokenizer_scan_all(&parser->tk);
|
520
552
|
}
|
@@ -535,17 +567,30 @@ static VALUE parser_append_placeholder_method(VALUE self, VALUE source)
|
|
535
567
|
static VALUE parser_document_method(VALUE self)
|
536
568
|
{
|
537
569
|
struct parser_t *parser = NULL;
|
570
|
+
rb_encoding *enc;
|
538
571
|
Parser_Get_Struct(self, parser);
|
539
572
|
if(!parser->doc.data)
|
540
573
|
return Qnil;
|
541
|
-
|
574
|
+
enc = rb_enc_from_index(parser->doc.enc_index);
|
575
|
+
return rb_enc_str_new(parser->doc.data, parser->doc.length, enc);
|
542
576
|
}
|
543
577
|
|
544
578
|
static VALUE parser_document_length_method(VALUE self)
|
545
579
|
{
|
546
580
|
struct parser_t *parser = NULL;
|
581
|
+
rb_encoding *enc;
|
582
|
+
const char *buf;
|
583
|
+
|
547
584
|
Parser_Get_Struct(self, parser);
|
548
|
-
|
585
|
+
|
586
|
+
if(parser->doc.data == NULL) {
|
587
|
+
return ULONG2NUM(0);
|
588
|
+
}
|
589
|
+
else {
|
590
|
+
buf = parser->doc.data;
|
591
|
+
enc = rb_enc_from_index(parser->doc.enc_index);
|
592
|
+
return ULONG2NUM(rb_enc_strlen(buf, buf + parser->doc.length, enc));
|
593
|
+
}
|
549
594
|
}
|
550
595
|
|
551
596
|
static VALUE parser_context_method(VALUE self)
|
@@ -588,9 +633,10 @@ static VALUE parser_context_method(VALUE self)
|
|
588
633
|
|
589
634
|
static inline VALUE ref_to_str(struct parser_t *parser, struct token_reference_t *ref)
|
590
635
|
{
|
636
|
+
rb_encoding *enc = rb_enc_from_index(parser->doc.enc_index);
|
591
637
|
if(ref->type == TOKEN_NONE || parser->doc.data == NULL)
|
592
638
|
return Qnil;
|
593
|
-
return
|
639
|
+
return rb_enc_str_new(parser->doc.data+ref->start, ref->length, enc);
|
594
640
|
}
|
595
641
|
|
596
642
|
static VALUE parser_tag_name_method(VALUE self)
|
@@ -665,29 +711,6 @@ static VALUE parser_rawtext_text_method(VALUE self)
|
|
665
711
|
return ref_to_str(parser, &parser->rawtext.text);
|
666
712
|
}
|
667
713
|
|
668
|
-
static VALUE parser_extract_method(VALUE self, VALUE start_p, VALUE end_p)
|
669
|
-
{
|
670
|
-
struct parser_t *parser = NULL;
|
671
|
-
unsigned long int start, end;
|
672
|
-
struct token_reference_t ref;
|
673
|
-
|
674
|
-
Parser_Get_Struct(self, parser);
|
675
|
-
|
676
|
-
start = NUM2ULONG(start_p);
|
677
|
-
end = NUM2ULONG(end_p);
|
678
|
-
if(end < start) {
|
679
|
-
rb_raise(rb_eArgError, "'end' must be greater or equal than 'start'");
|
680
|
-
}
|
681
|
-
if(end > parser->doc.length) {
|
682
|
-
rb_raise(rb_eArgError, "'end' argument not in range of document");
|
683
|
-
}
|
684
|
-
|
685
|
-
ref.type = TOKEN_TEXT; // anything not NONE
|
686
|
-
ref.start = start;
|
687
|
-
ref.length = end - start;
|
688
|
-
return ref_to_str(parser, &ref);
|
689
|
-
}
|
690
|
-
|
691
714
|
static VALUE parser_errors_count_method(VALUE self)
|
692
715
|
{
|
693
716
|
struct parser_t *parser = NULL;
|
@@ -749,7 +772,6 @@ void Init_html_tokenizer_parser(VALUE mHtmlTokenizer)
|
|
749
772
|
rb_define_method(cParser, "column_number", parser_column_number_method, 0);
|
750
773
|
rb_define_method(cParser, "parse", parser_parse_method, 1);
|
751
774
|
rb_define_method(cParser, "append_placeholder", parser_append_placeholder_method, 1);
|
752
|
-
rb_define_method(cParser, "extract", parser_extract_method, 2);
|
753
775
|
rb_define_method(cParser, "context", parser_context_method, 0);
|
754
776
|
rb_define_method(cParser, "tag_name", parser_tag_name_method, 0);
|
755
777
|
rb_define_method(cParser, "closing_tag?", parser_closing_tag_method, 0);
|
@@ -28,11 +28,15 @@ struct parser_document_t {
|
|
28
28
|
char *data;
|
29
29
|
long unsigned int line_number;
|
30
30
|
long unsigned int column_number;
|
31
|
+
|
32
|
+
int enc_index;
|
33
|
+
long unsigned int mb_length;
|
31
34
|
};
|
32
35
|
|
33
36
|
struct token_reference_t {
|
34
37
|
enum token_type type;
|
35
38
|
long unsigned int start;
|
39
|
+
long unsigned int mb_start;
|
36
40
|
long unsigned int length;
|
37
41
|
long unsigned int line_number;
|
38
42
|
long unsigned int column_number;
|
@@ -1,4 +1,5 @@
|
|
1
1
|
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
2
3
|
#include "html_tokenizer.h"
|
3
4
|
#include "tokenizer.h"
|
4
5
|
|
@@ -60,6 +61,8 @@ void tokenizer_init(struct tokenizer_t *tk)
|
|
60
61
|
tk->scan.string = NULL;
|
61
62
|
tk->scan.cursor = 0;
|
62
63
|
tk->scan.length = 0;
|
64
|
+
tk->scan.mb_cursor = 0;
|
65
|
+
tk->scan.enc_index = 0;
|
63
66
|
|
64
67
|
tk->attribute_value_start = 0;
|
65
68
|
tk->found_attribute = 0;
|
@@ -115,17 +118,27 @@ VALUE token_type_to_symbol(enum token_type type)
|
|
115
118
|
return Qnil;
|
116
119
|
}
|
117
120
|
|
121
|
+
static long unsigned int tokenizer_mblength(struct tokenizer_t *tk, long unsigned int length)
|
122
|
+
{
|
123
|
+
rb_encoding *enc = rb_enc_from_index(tk->scan.enc_index);
|
124
|
+
const char *buf = tk->scan.string + tk->scan.cursor;
|
125
|
+
return rb_enc_strlen(buf, buf + length, enc);
|
126
|
+
}
|
127
|
+
|
118
128
|
static void tokenizer_yield_tag(struct tokenizer_t *tk, enum token_type type, long unsigned int length, void *data)
|
119
129
|
{
|
130
|
+
long unsigned int mb_length = tokenizer_mblength(tk, length);
|
120
131
|
tk->last_token = type;
|
121
|
-
rb_yield_values(3, token_type_to_symbol(type), INT2NUM(tk->scan.
|
132
|
+
rb_yield_values(3, token_type_to_symbol(type), INT2NUM(tk->scan.mb_cursor), INT2NUM(tk->scan.mb_cursor + mb_length));
|
122
133
|
}
|
123
134
|
|
124
135
|
static void tokenizer_callback(struct tokenizer_t *tk, enum token_type type, long unsigned int length)
|
125
136
|
{
|
137
|
+
long unsigned int mb_length = tokenizer_mblength(tk, length);
|
126
138
|
if(tk->f_callback)
|
127
139
|
tk->f_callback(tk, type, length, tk->callback_data);
|
128
140
|
tk->scan.cursor += length;
|
141
|
+
tk->scan.mb_cursor += mb_length;
|
129
142
|
}
|
130
143
|
|
131
144
|
static VALUE tokenizer_initialize_method(VALUE self)
|
@@ -657,6 +670,8 @@ static VALUE tokenizer_tokenize_method(VALUE self, VALUE source)
|
|
657
670
|
c_source = StringValueCStr(source);
|
658
671
|
tk->scan.cursor = 0;
|
659
672
|
tk->scan.length = strlen(c_source);
|
673
|
+
tk->scan.enc_index = rb_enc_get_index(source);
|
674
|
+
tk->scan.mb_cursor = 0;
|
660
675
|
|
661
676
|
old = tk->scan.string;
|
662
677
|
REALLOC_N(tk->scan.string, char, tk->scan.length+1);
|
data/html_tokenizer.gemspec
CHANGED
data/test/unit/parser_test.rb
CHANGED
@@ -431,29 +431,6 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
431
431
|
tokens << token
|
432
432
|
end
|
433
433
|
assert_equal [[:text, 0, 4, 1, 0], [:text, 34, 38, 5, 0]], tokens
|
434
|
-
assert_equal "bar\n", @parser.extract(34, 38)
|
435
|
-
end
|
436
|
-
|
437
|
-
def test_extract_method
|
438
|
-
parse("abcdefg")
|
439
|
-
assert_equal "a", @parser.extract(0, 1)
|
440
|
-
assert_equal "cd", @parser.extract(2, 4)
|
441
|
-
end
|
442
|
-
|
443
|
-
def test_extract_method_raises_argument_error_end_past_length
|
444
|
-
parse("abcdefg")
|
445
|
-
e = assert_raises(ArgumentError) do
|
446
|
-
@parser.extract(0, 32)
|
447
|
-
end
|
448
|
-
assert_equal "'end' argument not in range of document", e.message
|
449
|
-
end
|
450
|
-
|
451
|
-
def test_extract_method_raises_argument_error_end_less_than_start
|
452
|
-
parse("abcdefg")
|
453
|
-
e = assert_raises(ArgumentError) do
|
454
|
-
@parser.extract(1, 0)
|
455
|
-
end
|
456
|
-
assert_equal "'end' must be greater or equal than 'start'", e.message
|
457
434
|
end
|
458
435
|
|
459
436
|
def test_solidus_or_tag_name_error
|
@@ -534,6 +511,29 @@ class HtmlTokenizer::ParserTest < Minitest::Test
|
|
534
511
|
assert_equal 11, @parser.errors.first.column
|
535
512
|
end
|
536
513
|
|
514
|
+
def test_attribute_with_mutlibyte_characters
|
515
|
+
data = ["<div title", "='your store’s'>"]
|
516
|
+
tokens = []
|
517
|
+
parse(*data) { |name, start, stop| tokens << [name, start, stop, data.join[start...stop]] }
|
518
|
+
assert_equal "div", @parser.tag_name
|
519
|
+
assert_equal "title", @parser.attribute_name
|
520
|
+
assert_equal "your store’s", @parser.attribute_value
|
521
|
+
assert_equal data.join, @parser.document
|
522
|
+
assert_equal data.join.size, @parser.document_length
|
523
|
+
assert_equal data.join.size, @parser.column_number
|
524
|
+
assert_equal [
|
525
|
+
[:tag_start, 0, 1, "<"],
|
526
|
+
[:tag_name, 1, 4, "div"],
|
527
|
+
[:whitespace, 4, 5, " "],
|
528
|
+
[:attribute_name, 5, 10, "title"],
|
529
|
+
[:equal, 10, 11, "="],
|
530
|
+
[:attribute_quoted_value_start, 11, 12, "'"],
|
531
|
+
[:attribute_quoted_value, 12, 24, "your store’s"],
|
532
|
+
[:attribute_quoted_value_end, 24, 25, "'"],
|
533
|
+
[:tag_end, 25, 26, ">"],
|
534
|
+
], tokens
|
535
|
+
end
|
536
|
+
|
537
537
|
def test_valid_syntaxes
|
538
538
|
parse(
|
539
539
|
'<div>',
|
data/test/unit/tokenizer_test.rb
CHANGED
@@ -324,13 +324,34 @@ class HtmlTokenizer::TokenizerTest < Minitest::Test
|
|
324
324
|
], result
|
325
325
|
end
|
326
326
|
|
327
|
+
def test_html_with_mutlibyte_characters
|
328
|
+
data = "<div title='your store’s'>foo</div>"
|
329
|
+
result = tokenize(data)
|
330
|
+
assert_equal [
|
331
|
+
[:tag_start, "<"],
|
332
|
+
[:tag_name, "div"],
|
333
|
+
[:whitespace, " "],
|
334
|
+
[:attribute_name, "title"],
|
335
|
+
[:equal, "="],
|
336
|
+
[:attribute_quoted_value_start, "'"],
|
337
|
+
[:attribute_quoted_value, "your store’s"],
|
338
|
+
[:attribute_quoted_value_end, "'"],
|
339
|
+
[:tag_end, ">"],
|
340
|
+
[:text, "foo"],
|
341
|
+
[:tag_start, "<"],
|
342
|
+
[:solidus, "/"],
|
343
|
+
[:tag_name, "div"],
|
344
|
+
[:tag_end, ">"],
|
345
|
+
], result
|
346
|
+
end
|
347
|
+
|
327
348
|
private
|
328
349
|
|
329
350
|
def tokenize(*parts)
|
330
351
|
tokens = []
|
331
352
|
@tokenizer = HtmlTokenizer::Tokenizer.new
|
332
353
|
parts.each do |part|
|
333
|
-
@tokenizer.tokenize(part) { |name, start, stop| tokens << [name, part[start
|
354
|
+
@tokenizer.tokenize(part) { |name, start, stop| tokens << [name, part[start...stop]] }
|
334
355
|
end
|
335
356
|
tokens
|
336
357
|
end
|
metadata
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html_tokenizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francois Chagnon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake-compiler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - ~>
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - ~>
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - ~>
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - ~>
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
description:
|
@@ -60,8 +60,8 @@ extensions:
|
|
60
60
|
- ext/html_tokenizer_ext/extconf.rb
|
61
61
|
extra_rdoc_files: []
|
62
62
|
files:
|
63
|
-
- .autotest
|
64
|
-
- .gitignore
|
63
|
+
- ".autotest"
|
64
|
+
- ".gitignore"
|
65
65
|
- Gemfile
|
66
66
|
- Gemfile.lock
|
67
67
|
- LICENSE
|
@@ -90,17 +90,17 @@ require_paths:
|
|
90
90
|
- ext
|
91
91
|
required_ruby_version: !ruby/object:Gem::Requirement
|
92
92
|
requirements:
|
93
|
-
- -
|
93
|
+
- - ">="
|
94
94
|
- !ruby/object:Gem::Version
|
95
95
|
version: '0'
|
96
96
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
97
|
requirements:
|
98
|
-
- -
|
98
|
+
- - ">="
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
102
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.
|
103
|
+
rubygems_version: 2.6.8
|
104
104
|
signing_key:
|
105
105
|
specification_version: 4
|
106
106
|
summary: HTML Tokenizer
|