html_tokenizer 0.0.6 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: cf47920a7a89b9806ac9b1e830b6965e2ec0e98e
4
- data.tar.gz: 97dbc221ee255a3d6a9d17f20e1807cdddec3eb3
2
+ SHA256:
3
+ metadata.gz: 2bd91c4080202f5c9d62d494f843f73a3f12c24a24d1408bc09495a161756d4e
4
+ data.tar.gz: 484eba0fbc765e4894d63d60cbcc335032278d8699f56888e00f1782c4bd1466
5
5
  SHA512:
6
- metadata.gz: e9865b029658f9ad0186863319ecbb9a9d62f7de43b7763c91f455848c79cfd0fed2008f5f86d7b79b5434a97b547464716e5a9ebb67ac6248ceec5b8c85cd4a
7
- data.tar.gz: 0c092d6501187b4eff8d0981831b3df3a25b957516c138b7989ae57c92c139ade5952a90df7f80ed928252c724de9cc2bc37d0921540d4da0d1bd13faf6462aa
6
+ metadata.gz: 3b6a469d11e44df8898e1e30e18ae6048974691de52de34b3a9c781598145f42415d2a738337a587b4fc86853f06c6acf7f20e86e972ec96016dea37ae9291b2
7
+ data.tar.gz: 24622424f4abc8ee5ea2cc519024391ca8ef8c49d2c7f29d080256137e34284e64a7cf2b768fb6d773e987f40cf3cdf854e994dd812bf2719efda3f60414af9c
@@ -0,0 +1,19 @@
1
+ name: CI
2
+ on: [push]
3
+
4
+ jobs:
5
+ tests:
6
+ runs-on: ubuntu-latest
7
+ strategy:
8
+ matrix:
9
+ ruby: [ '2.5', '2.6', '2.7', '3.0', '3.1', '3.2', '3.3' ]
10
+ name: Ruby ${{ matrix.ruby }} Tests
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - name: Set up Ruby
14
+ uses: ruby/setup-ruby@v1
15
+ with:
16
+ ruby-version: ${{ matrix.ruby }}
17
+ bundler-cache: true
18
+ - name: Run tests
19
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -1,6 +1,8 @@
1
+ Gemfile.lock
1
2
  *.bundle
2
3
  tmp/
3
4
 
5
+
4
6
  # Object files
5
7
  *.o
6
8
  *.ko
@@ -1,8 +1,8 @@
1
1
  require 'mkmf'
2
2
 
3
3
  $CXXFLAGS += " -std=c++11 "
4
- $CXXFLAGS += " -g -Og -ggdb "
5
- $CFLAGS += " -g -Og -ggdb "
4
+ $CXXFLAGS += " -g -O1 -ggdb "
5
+ $CFLAGS += " -g -O1 -ggdb "
6
6
 
7
7
  if ENV['DEBUG']
8
8
  $CXXFLAGS += " -DDEBUG "
@@ -455,8 +455,8 @@ static void parser_tokenize_callback(struct tokenizer_t *tk, enum token_type typ
455
455
  enc = rb_enc_from_index(parser->doc.enc_index);
456
456
  mb_strlen = rb_enc_strlen(parser->doc.data + ref.start, parser->doc.data + ref.start + ref.length, enc);
457
457
  rb_yield_values(5, token_type_to_symbol(type),
458
- INT2NUM(ref.mb_start), INT2NUM(ref.mb_start + mb_strlen),
459
- INT2NUM(ref.line_number), INT2NUM(ref.column_number));
458
+ ULONG2NUM(ref.mb_start), ULONG2NUM(ref.mb_start + mb_strlen),
459
+ ULONG2NUM(ref.line_number), ULONG2NUM(ref.column_number));
460
460
  }
461
461
 
462
462
  parser_adjust_line_number(parser, ref.start, ref.length);
@@ -495,7 +495,10 @@ static VALUE parser_initialize_method(VALUE self)
495
495
 
496
496
  static int parser_document_append(struct parser_t *parser, const char *string, unsigned long int length)
497
497
  {
498
+ #ifdef DEBUG
498
499
  void *old = parser->doc.data;
500
+ #endif
501
+
499
502
  unsigned long int mb_length;
500
503
  char *buf;
501
504
  rb_encoding *enc = rb_enc_from_index(parser->doc.enc_index);
@@ -718,7 +721,7 @@ static VALUE parser_errors_count_method(VALUE self)
718
721
  {
719
722
  struct parser_t *parser = NULL;
720
723
  Parser_Get_Struct(self, parser);
721
- return INT2NUM(parser->errors_count);
724
+ return ULONG2NUM(parser->errors_count);
722
725
  }
723
726
 
724
727
  static VALUE create_parser_error(struct parser_document_error_t *error)
@@ -734,7 +737,7 @@ static VALUE create_parser_error(struct parser_document_error_t *error)
734
737
  return rb_class_new_instance(4, args, klass);
735
738
  }
736
739
 
737
- static VALUE parser_errors_method(VALUE self, VALUE error_p)
740
+ static VALUE parser_errors_method(VALUE self)
738
741
  {
739
742
  struct parser_t *parser = NULL;
740
743
  VALUE list;
@@ -135,7 +135,7 @@ static void tokenizer_yield_tag(struct tokenizer_t *tk, enum token_type type, lo
135
135
  {
136
136
  long unsigned int mb_length = tokenizer_mblength(tk, length);
137
137
  tk->last_token = type;
138
- rb_yield_values(3, token_type_to_symbol(type), INT2NUM(tk->scan.mb_cursor), INT2NUM(tk->scan.mb_cursor + mb_length));
138
+ rb_yield_values(3, token_type_to_symbol(type), ULONG2NUM(tk->scan.mb_cursor), ULONG2NUM(tk->scan.mb_cursor + mb_length));
139
139
  }
140
140
 
141
141
  static void tokenizer_callback(struct tokenizer_t *tk, enum token_type type, long unsigned int length)
@@ -464,11 +464,9 @@ static int scan_tag_name(struct tokenizer_t *tk)
464
464
  {
465
465
  unsigned long int length = 0, tag_name_length = 0;
466
466
  const char *tag_name = NULL;
467
- void *old;
468
467
 
469
468
  if(is_tag_name(&tk->scan, &tag_name, &tag_name_length)) {
470
469
  length = (tk->current_tag ? strlen(tk->current_tag) : 0);
471
- old = tk->current_tag;
472
470
  REALLOC_N(tk->current_tag, char, length + tag_name_length + 1);
473
471
  DBG_PRINT("tk=%p realloc(tk->current_tag) %p -> %p length=%lu", tk, old,
474
472
  tk->current_tag, length + tag_name_length + 1);
@@ -664,7 +662,6 @@ void tokenizer_scan_all(struct tokenizer_t *tk)
664
662
 
665
663
  void tokenizer_set_scan_string(struct tokenizer_t *tk, const char *string, long unsigned int length)
666
664
  {
667
- const char *old = tk->scan.string;
668
665
  REALLOC_N(tk->scan.string, char, string ? length + 1 : 0);
669
666
  DBG_PRINT("tk=%p realloc(tk->scan.string) %p -> %p length=%lu", tk, old,
670
667
  tk->scan.string, length + 1);
@@ -1,9 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/html_tokenizer/version"
4
+
1
5
  Gem::Specification.new do |spec|
2
6
  spec.name = "html_tokenizer"
3
- spec.version = "0.0.6"
7
+ spec.version = HtmlTokenizer::VERSION
4
8
  spec.summary = "HTML Tokenizer"
5
9
  spec.author = "Francois Chagnon"
6
10
 
11
+ spec.homepage = "https://github.com/Shopify/html_tokenizer"
12
+ spec.license = "MIT"
13
+
14
+ spec.metadata["allowed_push_host"] = "https://rubygems.org/"
15
+
16
+ spec.metadata["homepage_uri"] = spec.homepage
17
+ spec.metadata["source_code_uri"] = spec.homepage
18
+
7
19
  spec.files = Dir.glob("ext/**/*.{c,h,rb}") +
8
20
  Dir.glob("lib/**/*.rb")
9
21
 
@@ -12,8 +24,4 @@ Gem::Specification.new do |spec|
12
24
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
13
25
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
14
26
  spec.require_paths = ["lib", "ext"]
15
-
16
- spec.add_development_dependency 'rake', '~> 0'
17
- spec.add_development_dependency 'rake-compiler', '~> 0'
18
- spec.add_development_dependency 'minitest', '~> 0'
19
27
  end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HtmlTokenizer
4
+ VERSION = "0.0.8"
5
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'html_tokenizer_ext'
2
4
 
3
5
  module HtmlTokenizer
metadata CHANGED
@@ -1,59 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html_tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francois Chagnon
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-23 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: rake
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: rake-compiler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: minitest
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- description:
56
- email:
11
+ date: 2024-03-20 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
57
15
  executables:
58
16
  - html_tokenizer
59
17
  extensions:
@@ -61,9 +19,9 @@ extensions:
61
19
  extra_rdoc_files: []
62
20
  files:
63
21
  - ".autotest"
22
+ - ".github/workflows/ci.yml"
64
23
  - ".gitignore"
65
24
  - Gemfile
66
- - Gemfile.lock
67
25
  - LICENSE
68
26
  - Manifest.txt
69
27
  - README.md
@@ -78,12 +36,17 @@ files:
78
36
  - ext/html_tokenizer_ext/tokenizer.h
79
37
  - html_tokenizer.gemspec
80
38
  - lib/html_tokenizer.rb
39
+ - lib/html_tokenizer/version.rb
81
40
  - test/unit/parser_test.rb
82
41
  - test/unit/tokenizer_test.rb
83
- homepage:
84
- licenses: []
85
- metadata: {}
86
- post_install_message:
42
+ homepage: https://github.com/Shopify/html_tokenizer
43
+ licenses:
44
+ - MIT
45
+ metadata:
46
+ allowed_push_host: https://rubygems.org/
47
+ homepage_uri: https://github.com/Shopify/html_tokenizer
48
+ source_code_uri: https://github.com/Shopify/html_tokenizer
49
+ post_install_message:
87
50
  rdoc_options: []
88
51
  require_paths:
89
52
  - lib
@@ -99,9 +62,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
62
  - !ruby/object:Gem::Version
100
63
  version: '0'
101
64
  requirements: []
102
- rubyforge_project:
103
- rubygems_version: 2.6.14
104
- signing_key:
65
+ rubygems_version: 3.5.5
66
+ signing_key:
105
67
  specification_version: 4
106
68
  summary: HTML Tokenizer
107
69
  test_files:
data/Gemfile.lock DELETED
@@ -1,24 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- html_tokenizer (0.0.6)
5
-
6
- GEM
7
- remote: https://rubygems.org/
8
- specs:
9
- minitest (5.9.0)
10
- rake (12.3.0)
11
- rake-compiler (0.9.9)
12
- rake
13
-
14
- PLATFORMS
15
- ruby
16
-
17
- DEPENDENCIES
18
- html_tokenizer!
19
- minitest
20
- rake
21
- rake-compiler
22
-
23
- BUNDLED WITH
24
- 1.16.0