html_tokenizer 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: cf47920a7a89b9806ac9b1e830b6965e2ec0e98e
4
- data.tar.gz: 97dbc221ee255a3d6a9d17f20e1807cdddec3eb3
2
+ SHA256:
3
+ metadata.gz: 2bd91c4080202f5c9d62d494f843f73a3f12c24a24d1408bc09495a161756d4e
4
+ data.tar.gz: 484eba0fbc765e4894d63d60cbcc335032278d8699f56888e00f1782c4bd1466
5
5
  SHA512:
6
- metadata.gz: e9865b029658f9ad0186863319ecbb9a9d62f7de43b7763c91f455848c79cfd0fed2008f5f86d7b79b5434a97b547464716e5a9ebb67ac6248ceec5b8c85cd4a
7
- data.tar.gz: 0c092d6501187b4eff8d0981831b3df3a25b957516c138b7989ae57c92c139ade5952a90df7f80ed928252c724de9cc2bc37d0921540d4da0d1bd13faf6462aa
6
+ metadata.gz: 3b6a469d11e44df8898e1e30e18ae6048974691de52de34b3a9c781598145f42415d2a738337a587b4fc86853f06c6acf7f20e86e972ec96016dea37ae9291b2
7
+ data.tar.gz: 24622424f4abc8ee5ea2cc519024391ca8ef8c49d2c7f29d080256137e34284e64a7cf2b768fb6d773e987f40cf3cdf854e994dd812bf2719efda3f60414af9c
@@ -0,0 +1,19 @@
1
+ name: CI
2
+ on: [push]
3
+
4
+ jobs:
5
+ tests:
6
+ runs-on: ubuntu-latest
7
+ strategy:
8
+ matrix:
9
+ ruby: [ '2.5', '2.6', '2.7', '3.0', '3.1', '3.2', '3.3' ]
10
+ name: Ruby ${{ matrix.ruby }} Tests
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - name: Set up Ruby
14
+ uses: ruby/setup-ruby@v1
15
+ with:
16
+ ruby-version: ${{ matrix.ruby }}
17
+ bundler-cache: true
18
+ - name: Run tests
19
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -1,6 +1,8 @@
1
+ Gemfile.lock
1
2
  *.bundle
2
3
  tmp/
3
4
 
5
+
4
6
  # Object files
5
7
  *.o
6
8
  *.ko
@@ -1,8 +1,8 @@
1
1
  require 'mkmf'
2
2
 
3
3
  $CXXFLAGS += " -std=c++11 "
4
- $CXXFLAGS += " -g -Og -ggdb "
5
- $CFLAGS += " -g -Og -ggdb "
4
+ $CXXFLAGS += " -g -O1 -ggdb "
5
+ $CFLAGS += " -g -O1 -ggdb "
6
6
 
7
7
  if ENV['DEBUG']
8
8
  $CXXFLAGS += " -DDEBUG "
@@ -455,8 +455,8 @@ static void parser_tokenize_callback(struct tokenizer_t *tk, enum token_type typ
455
455
  enc = rb_enc_from_index(parser->doc.enc_index);
456
456
  mb_strlen = rb_enc_strlen(parser->doc.data + ref.start, parser->doc.data + ref.start + ref.length, enc);
457
457
  rb_yield_values(5, token_type_to_symbol(type),
458
- INT2NUM(ref.mb_start), INT2NUM(ref.mb_start + mb_strlen),
459
- INT2NUM(ref.line_number), INT2NUM(ref.column_number));
458
+ ULONG2NUM(ref.mb_start), ULONG2NUM(ref.mb_start + mb_strlen),
459
+ ULONG2NUM(ref.line_number), ULONG2NUM(ref.column_number));
460
460
  }
461
461
 
462
462
  parser_adjust_line_number(parser, ref.start, ref.length);
@@ -495,7 +495,10 @@ static VALUE parser_initialize_method(VALUE self)
495
495
 
496
496
  static int parser_document_append(struct parser_t *parser, const char *string, unsigned long int length)
497
497
  {
498
+ #ifdef DEBUG
498
499
  void *old = parser->doc.data;
500
+ #endif
501
+
499
502
  unsigned long int mb_length;
500
503
  char *buf;
501
504
  rb_encoding *enc = rb_enc_from_index(parser->doc.enc_index);
@@ -718,7 +721,7 @@ static VALUE parser_errors_count_method(VALUE self)
718
721
  {
719
722
  struct parser_t *parser = NULL;
720
723
  Parser_Get_Struct(self, parser);
721
- return INT2NUM(parser->errors_count);
724
+ return ULONG2NUM(parser->errors_count);
722
725
  }
723
726
 
724
727
  static VALUE create_parser_error(struct parser_document_error_t *error)
@@ -734,7 +737,7 @@ static VALUE create_parser_error(struct parser_document_error_t *error)
734
737
  return rb_class_new_instance(4, args, klass);
735
738
  }
736
739
 
737
- static VALUE parser_errors_method(VALUE self, VALUE error_p)
740
+ static VALUE parser_errors_method(VALUE self)
738
741
  {
739
742
  struct parser_t *parser = NULL;
740
743
  VALUE list;
@@ -135,7 +135,7 @@ static void tokenizer_yield_tag(struct tokenizer_t *tk, enum token_type type, lo
135
135
  {
136
136
  long unsigned int mb_length = tokenizer_mblength(tk, length);
137
137
  tk->last_token = type;
138
- rb_yield_values(3, token_type_to_symbol(type), INT2NUM(tk->scan.mb_cursor), INT2NUM(tk->scan.mb_cursor + mb_length));
138
+ rb_yield_values(3, token_type_to_symbol(type), ULONG2NUM(tk->scan.mb_cursor), ULONG2NUM(tk->scan.mb_cursor + mb_length));
139
139
  }
140
140
 
141
141
  static void tokenizer_callback(struct tokenizer_t *tk, enum token_type type, long unsigned int length)
@@ -464,11 +464,9 @@ static int scan_tag_name(struct tokenizer_t *tk)
464
464
  {
465
465
  unsigned long int length = 0, tag_name_length = 0;
466
466
  const char *tag_name = NULL;
467
- void *old;
468
467
 
469
468
  if(is_tag_name(&tk->scan, &tag_name, &tag_name_length)) {
470
469
  length = (tk->current_tag ? strlen(tk->current_tag) : 0);
471
- old = tk->current_tag;
472
470
  REALLOC_N(tk->current_tag, char, length + tag_name_length + 1);
473
471
  DBG_PRINT("tk=%p realloc(tk->current_tag) %p -> %p length=%lu", tk, old,
474
472
  tk->current_tag, length + tag_name_length + 1);
@@ -664,7 +662,6 @@ void tokenizer_scan_all(struct tokenizer_t *tk)
664
662
 
665
663
  void tokenizer_set_scan_string(struct tokenizer_t *tk, const char *string, long unsigned int length)
666
664
  {
667
- const char *old = tk->scan.string;
668
665
  REALLOC_N(tk->scan.string, char, string ? length + 1 : 0);
669
666
  DBG_PRINT("tk=%p realloc(tk->scan.string) %p -> %p length=%lu", tk, old,
670
667
  tk->scan.string, length + 1);
@@ -1,9 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/html_tokenizer/version"
4
+
1
5
  Gem::Specification.new do |spec|
2
6
  spec.name = "html_tokenizer"
3
- spec.version = "0.0.6"
7
+ spec.version = HtmlTokenizer::VERSION
4
8
  spec.summary = "HTML Tokenizer"
5
9
  spec.author = "Francois Chagnon"
6
10
 
11
+ spec.homepage = "https://github.com/Shopify/html_tokenizer"
12
+ spec.license = "MIT"
13
+
14
+ spec.metadata["allowed_push_host"] = "https://rubygems.org/"
15
+
16
+ spec.metadata["homepage_uri"] = spec.homepage
17
+ spec.metadata["source_code_uri"] = spec.homepage
18
+
7
19
  spec.files = Dir.glob("ext/**/*.{c,h,rb}") +
8
20
  Dir.glob("lib/**/*.rb")
9
21
 
@@ -12,8 +24,4 @@ Gem::Specification.new do |spec|
12
24
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
13
25
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
14
26
  spec.require_paths = ["lib", "ext"]
15
-
16
- spec.add_development_dependency 'rake', '~> 0'
17
- spec.add_development_dependency 'rake-compiler', '~> 0'
18
- spec.add_development_dependency 'minitest', '~> 0'
19
27
  end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HtmlTokenizer
4
+ VERSION = "0.0.8"
5
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'html_tokenizer_ext'
2
4
 
3
5
  module HtmlTokenizer
metadata CHANGED
@@ -1,59 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html_tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francois Chagnon
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-23 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: rake
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: rake-compiler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: minitest
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- description:
56
- email:
11
+ date: 2024-03-20 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
57
15
  executables:
58
16
  - html_tokenizer
59
17
  extensions:
@@ -61,9 +19,9 @@ extensions:
61
19
  extra_rdoc_files: []
62
20
  files:
63
21
  - ".autotest"
22
+ - ".github/workflows/ci.yml"
64
23
  - ".gitignore"
65
24
  - Gemfile
66
- - Gemfile.lock
67
25
  - LICENSE
68
26
  - Manifest.txt
69
27
  - README.md
@@ -78,12 +36,17 @@ files:
78
36
  - ext/html_tokenizer_ext/tokenizer.h
79
37
  - html_tokenizer.gemspec
80
38
  - lib/html_tokenizer.rb
39
+ - lib/html_tokenizer/version.rb
81
40
  - test/unit/parser_test.rb
82
41
  - test/unit/tokenizer_test.rb
83
- homepage:
84
- licenses: []
85
- metadata: {}
86
- post_install_message:
42
+ homepage: https://github.com/Shopify/html_tokenizer
43
+ licenses:
44
+ - MIT
45
+ metadata:
46
+ allowed_push_host: https://rubygems.org/
47
+ homepage_uri: https://github.com/Shopify/html_tokenizer
48
+ source_code_uri: https://github.com/Shopify/html_tokenizer
49
+ post_install_message:
87
50
  rdoc_options: []
88
51
  require_paths:
89
52
  - lib
@@ -99,9 +62,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
62
  - !ruby/object:Gem::Version
100
63
  version: '0'
101
64
  requirements: []
102
- rubyforge_project:
103
- rubygems_version: 2.6.14
104
- signing_key:
65
+ rubygems_version: 3.5.5
66
+ signing_key:
105
67
  specification_version: 4
106
68
  summary: HTML Tokenizer
107
69
  test_files:
data/Gemfile.lock DELETED
@@ -1,24 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- html_tokenizer (0.0.6)
5
-
6
- GEM
7
- remote: https://rubygems.org/
8
- specs:
9
- minitest (5.9.0)
10
- rake (12.3.0)
11
- rake-compiler (0.9.9)
12
- rake
13
-
14
- PLATFORMS
15
- ruby
16
-
17
- DEPENDENCIES
18
- html_tokenizer!
19
- minitest
20
- rake
21
- rake-compiler
22
-
23
- BUNDLED WITH
24
- 1.16.0