html_tokenizer 0.0.3 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18ea0aa757ca8640a78cb619149b306aba21b8cc
4
- data.tar.gz: 182b3fab9d73373ae04c45beb3d73eb0798bfb3b
3
+ metadata.gz: a45a481e7310c22092c48de49f62315d7ae19700
4
+ data.tar.gz: fda14319ce3a8b6770c074b68384e3c7e16d3fa7
5
5
  SHA512:
6
- metadata.gz: d92c656e1c755d7b6e338732444327481af753145ca981e5e35510c44f77eecc5b06cb9eae16b02874f502037ecbf286f817886cb4fe1a5ea196fe77c760feaf
7
- data.tar.gz: a57323a2228866046de1176f28f70c0309f24cb8a5fec6e0b486a4c64beaab83dcc32ab059b044c85776edcc1833bf59bb068606fcbd32c7c9ead6ecd032edaa
6
+ metadata.gz: b55f2ae076aa4cbf3b55fca42435bcfa63ec4e9e032a91ad55260cf0b6cc74ae75b484b0c7ad70cb08a6bf3fe75d9499feec9df768f85fdac2fdfe29cc8262ca
7
+ data.tar.gz: 8e3726c0471524c66270d8ed187a807b5c6bf906312e754c69e0e4da915db46ee28729f0400347413ddaf147c48a21fd4d628454e01f09eec2a104ace6452d17
data/.gitignore CHANGED
@@ -30,6 +30,7 @@ tmp/
30
30
  *.i*86
31
31
  *.x86_64
32
32
  *.hex
33
+ *.gem
33
34
 
34
35
  # Debug files
35
36
  *.dSYM/
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html_tokenizer (0.0.3)
4
+ html_tokenizer (0.0.5)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
9
  minitest (5.9.0)
10
- rake (11.1.2)
10
+ rake (12.3.0)
11
11
  rake-compiler (0.9.9)
12
12
  rake
13
13
 
@@ -2,5 +2,11 @@ require 'mkmf'
2
2
 
3
3
  $CXXFLAGS += " -std=c++11 "
4
4
  $CXXFLAGS += " -g -Og -ggdb "
5
+ $CFLAGS += " -g -Og -ggdb "
6
+
7
+ if ENV['DEBUG']
8
+ $CXXFLAGS += " -DDEBUG "
9
+ $CFLAGS += " -DDEBUG "
10
+ end
5
11
 
6
12
  create_makefile('html_tokenizer_ext')
@@ -14,6 +14,7 @@ static void parser_free(void *ptr)
14
14
  size_t i;
15
15
 
16
16
  if(parser) {
17
+ tokenizer_free_members(&parser->tk);
17
18
  if(parser->doc.data) {
18
19
  DBG_PRINT("parser=%p xfree(parser->doc.data) %p", parser, parser->doc.data);
19
20
  xfree(parser->doc.data);
@@ -543,12 +544,12 @@ static VALUE parser_append_data(VALUE self, VALUE source, int is_placeholder)
543
544
  }
544
545
  else {
545
546
  parser->tk.scan.cursor = cursor;
546
- parser->tk.scan.string = parser->doc.data;
547
- parser->tk.scan.length = parser->doc.length;
547
+ tokenizer_set_scan_string(&parser->tk, parser->doc.data, parser->doc.length);
548
548
  parser->tk.scan.enc_index = parser->doc.enc_index;
549
549
  parser->tk.scan.mb_cursor = mb_cursor;
550
550
 
551
551
  tokenizer_scan_all(&parser->tk);
552
+ tokenizer_free_scan_string(&parser->tk);
552
553
  }
553
554
 
554
555
  return Qtrue;
@@ -12,16 +12,7 @@ static void tokenizer_free(void *ptr)
12
12
  {
13
13
  struct tokenizer_t *tk = ptr;
14
14
  if(tk) {
15
- if(tk->current_tag) {
16
- DBG_PRINT("tk=%p xfree(tk->current_tag) %p", tk, tk->current_tag);
17
- xfree(tk->current_tag);
18
- tk->current_tag = NULL;
19
- }
20
- if(tk->scan.string) {
21
- DBG_PRINT("tk=%p xfree(tk->scan.string) %p", tk, tk->scan.string);
22
- xfree(tk->scan.string);
23
- tk->scan.string = NULL;
24
- }
15
+ tokenizer_free_members(tk);
25
16
  DBG_PRINT("tk=%p xfree(tk)", tk);
26
17
  xfree(tk);
27
18
  }
@@ -75,6 +66,21 @@ void tokenizer_init(struct tokenizer_t *tk)
75
66
  return;
76
67
  }
77
68
 
69
+ void tokenizer_free_members(struct tokenizer_t *tk)
70
+ {
71
+ if(tk->current_tag) {
72
+ DBG_PRINT("tk=%p xfree(tk->current_tag) %p", tk, tk->current_tag);
73
+ xfree(tk->current_tag);
74
+ tk->current_tag = NULL;
75
+ }
76
+ if(tk->scan.string) {
77
+ DBG_PRINT("tk=%p xfree(tk->scan.string) %p", tk, tk->scan.string);
78
+ xfree(tk->scan.string);
79
+ tk->scan.string = NULL;
80
+ }
81
+ return;
82
+ }
83
+
78
84
  VALUE token_type_to_symbol(enum token_type type)
79
85
  {
80
86
  switch(type) {
@@ -656,11 +662,30 @@ void tokenizer_scan_all(struct tokenizer_t *tk)
656
662
  return;
657
663
  }
658
664
 
665
+ void tokenizer_set_scan_string(struct tokenizer_t *tk, const char *string, long unsigned int length)
666
+ {
667
+ const char *old = tk->scan.string;
668
+ REALLOC_N(tk->scan.string, char, string ? length + 1 : 0);
669
+ DBG_PRINT("tk=%p realloc(tk->scan.string) %p -> %p length=%lu", tk, old,
670
+ tk->scan.string, length + 1);
671
+ if(string && length > 0) {
672
+ strncpy(tk->scan.string, string, length);
673
+ tk->scan.string[length] = 0;
674
+ }
675
+ tk->scan.length = length;
676
+ return;
677
+ }
678
+
679
+ void tokenizer_free_scan_string(struct tokenizer_t *tk)
680
+ {
681
+ tokenizer_set_scan_string(tk, NULL, 0);
682
+ return;
683
+ }
684
+
659
685
  static VALUE tokenizer_tokenize_method(VALUE self, VALUE source)
660
686
  {
661
687
  struct tokenizer_t *tk = NULL;
662
688
  char *c_source;
663
- char *old;
664
689
 
665
690
  if(NIL_P(source))
666
691
  return Qnil;
@@ -670,21 +695,13 @@ static VALUE tokenizer_tokenize_method(VALUE self, VALUE source)
670
695
 
671
696
  c_source = StringValueCStr(source);
672
697
  tk->scan.cursor = 0;
673
- tk->scan.length = strlen(c_source);
698
+ tokenizer_set_scan_string(tk, c_source, strlen(c_source));
674
699
  tk->scan.enc_index = rb_enc_get_index(source);
675
700
  tk->scan.mb_cursor = 0;
676
701
 
677
- old = tk->scan.string;
678
- REALLOC_N(tk->scan.string, char, tk->scan.length+1);
679
- DBG_PRINT("tk=%p realloc(tk->scan.string) %p -> %p length=%lu", tk, old,
680
- tk->scan.string, tk->scan.length+1);
681
- strncpy(tk->scan.string, c_source, tk->scan.length);
682
-
683
702
  tokenizer_scan_all(tk);
684
703
 
685
- DBG_PRINT("tk=%p xfree(tk->scan.string) 0x%p", tk, tk->scan.string);
686
- xfree(tk->scan.string);
687
- tk->scan.string = NULL;
704
+ tokenizer_free_scan_string(tk);
688
705
 
689
706
  return Qtrue;
690
707
  }
@@ -70,6 +70,9 @@ struct tokenizer_t
70
70
 
71
71
  void Init_html_tokenizer_tokenizer(VALUE mHtmlTokenizer);
72
72
  void tokenizer_init(struct tokenizer_t *tk);
73
+ void tokenizer_free_members(struct tokenizer_t *tk);
74
+ void tokenizer_set_scan_string(struct tokenizer_t *tk, const char *string, long unsigned int length);
75
+ void tokenizer_free_scan_string(struct tokenizer_t *tk);
73
76
  void tokenizer_scan_all(struct tokenizer_t *tk);
74
77
  VALUE token_type_to_symbol(enum token_type type);
75
78
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "html_tokenizer"
3
- spec.version = "0.0.3"
3
+ spec.version = "0.0.5"
4
4
  spec.summary = "HTML Tokenizer"
5
5
  spec.author = "Francois Chagnon"
6
6
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html_tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francois Chagnon
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-21 00:00:00.000000000 Z
11
+ date: 2017-11-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake