nokogumbo 0.5 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/work/util.h ADDED
@@ -0,0 +1,57 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Author: jdtang@google.com (Jonathan Tang)
16
+ //
17
+ // This contains some utility functions that didn't fit into any of the other
18
+ // headers.
19
+
20
+ #ifndef GUMBO_UTIL_H_
21
+ #define GUMBO_UTIL_H_
22
+
23
+ #include <stdbool.h>
24
+ #include <stddef.h>
25
+
26
+ #ifdef __cplusplus
27
+ extern "C" {
28
+ #endif
29
+
30
+ // Forward declaration since it's passed into some of the functions in this
31
+ // header.
32
+ struct _GumboParser;
33
+
34
+ // Utility function for allocating & copying a null-terminated string into a
35
+ // freshly-allocated buffer. This is necessary for proper memory management; we
36
+ // have the convention that all const char* in parse tree structures are
37
+ // freshly-allocated, so if we didn't copy, we'd try to delete a literal string
38
+ // when the parse tree is destroyed.
39
+ char* gumbo_copy_stringz(struct _GumboParser* parser, const char* str);
40
+
41
+ // Allocate a chunk of memory, using the allocator specified in the Parser's
42
+ // config options.
43
+ void* gumbo_parser_allocate(struct _GumboParser* parser, size_t num_bytes);
44
+
45
+ // Deallocate a chunk of memory, using the deallocator specified in the Parser's
46
+ // config options.
47
+ void gumbo_parser_deallocate(struct _GumboParser* parser, void* ptr);
48
+
49
+ // Debug wrapper for printf, to make it easier to turn off debugging info when
50
+ // required.
51
+ void gumbo_debug(const char* format, ...);
52
+
53
+ #ifdef __cplusplus
54
+ }
55
+ #endif
56
+
57
+ #endif // GUMBO_UTIL_H_
data/work/util.o ADDED
Binary file
data/work/vector.c ADDED
@@ -0,0 +1,121 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Author: jdtang@google.com (Jonathan Tang)
16
+
17
+ #include "vector.h"
18
+
19
+ #include <assert.h>
20
+ #include <stdlib.h>
21
+ #include <string.h>
22
+ #include <strings.h>
23
+
24
+ #include "util.h"
25
+
26
+ struct _GumboParser;
27
+
28
+ const GumboVector kGumboEmptyVector = { NULL, 0, 0 };
29
+
30
+ void gumbo_vector_init(
31
+ struct _GumboParser* parser, size_t initial_capacity, GumboVector* vector) {
32
+ vector->length = 0;
33
+ vector->capacity = initial_capacity;
34
+ if (initial_capacity > 0) {
35
+ vector->data = gumbo_parser_allocate(
36
+ parser, sizeof(void*) * initial_capacity);
37
+ } else {
38
+ vector->data = NULL;
39
+ }
40
+ }
41
+
42
+ void gumbo_vector_destroy(struct _GumboParser* parser, GumboVector* vector) {
43
+ if (vector->capacity > 0) {
44
+ gumbo_parser_deallocate(parser, vector->data);
45
+ }
46
+ }
47
+
48
+ static void enlarge_vector_if_full(
49
+ struct _GumboParser* parser, GumboVector* vector) {
50
+ if (vector->length >= vector->capacity) {
51
+ if (vector->capacity) {
52
+ size_t old_num_bytes = sizeof(void*) * vector->capacity;
53
+ vector->capacity *= 2;
54
+ size_t num_bytes = sizeof(void*) * vector->capacity;
55
+ void** temp = gumbo_parser_allocate(parser, num_bytes);
56
+ memcpy(temp, vector->data, old_num_bytes);
57
+ gumbo_parser_deallocate(parser, vector->data);
58
+ vector->data = temp;
59
+ } else {
60
+ // 0-capacity vector; no previous array to deallocate.
61
+ vector->capacity = 2;
62
+ vector->data = gumbo_parser_allocate(
63
+ parser, sizeof(void*) * vector->capacity);
64
+ }
65
+ }
66
+ }
67
+
68
+ void gumbo_vector_add(
69
+ struct _GumboParser* parser, void* element, GumboVector* vector) {
70
+ enlarge_vector_if_full(parser, vector);
71
+ assert(vector->data);
72
+ assert(vector->length < vector->capacity);
73
+ vector->data[vector->length++] = element;
74
+ }
75
+
76
+ void* gumbo_vector_pop(struct _GumboParser* parser, GumboVector* vector) {
77
+ if (vector->length == 0) {
78
+ return NULL;
79
+ }
80
+ return vector->data[--vector->length];
81
+ }
82
+
83
+ int gumbo_vector_index_of(GumboVector* vector, void* element) {
84
+ for (int i = 0; i < vector->length; ++i) {
85
+ if (vector->data[i] == element) {
86
+ return i;
87
+ }
88
+ }
89
+ return -1;
90
+ }
91
+
92
+ void gumbo_vector_insert_at(
93
+ struct _GumboParser* parser, void* element, int index, GumboVector* vector) {
94
+ assert(index >= 0);
95
+ assert(index <= vector->length);
96
+ enlarge_vector_if_full(parser, vector);
97
+ ++vector->length;
98
+ memmove(&vector->data[index + 1], &vector->data[index],
99
+ sizeof(void*) * (vector->length - index - 1));
100
+ vector->data[index] = element;
101
+ }
102
+
103
+ void gumbo_vector_remove(
104
+ struct _GumboParser* parser, void* node, GumboVector* vector) {
105
+ int index = gumbo_vector_index_of(vector, node);
106
+ if (index == -1) {
107
+ return;
108
+ }
109
+ gumbo_vector_remove_at(parser, index, vector);
110
+ }
111
+
112
+ void* gumbo_vector_remove_at(
113
+ struct _GumboParser* parser, int index, GumboVector* vector) {
114
+ assert(index >= 0);
115
+ assert(index < vector->length);
116
+ void* result = vector->data[index];
117
+ memmove(&vector->data[index], &vector->data[index + 1],
118
+ sizeof(void*) * (vector->length - index - 1));
119
+ --vector->length;
120
+ return result;
121
+ }
data/work/vector.h ADDED
@@ -0,0 +1,66 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Author: jdtang@google.com (Jonathan Tang)
16
+
17
+ #ifndef GUMBO_VECTOR_H_
18
+ #define GUMBO_VECTOR_H_
19
+
20
+ #include "gumbo.h"
21
+
22
+ #ifdef __cplusplus
23
+ extern "C" {
24
+ #endif
25
+
26
+ // Forward declaration since it's passed into some of the functions in this
27
+ // header.
28
+ struct _GumboParser;
29
+
30
+ // Initializes a new GumboVector with the specified initial capacity.
31
+ void gumbo_vector_init(
32
+ struct _GumboParser* parser, size_t initial_capacity, GumboVector* vector);
33
+
34
+ // Frees the memory used by an GumboVector. Does not free the contained
35
+ // pointers.
36
+ void gumbo_vector_destroy(struct _GumboParser* parser, GumboVector* vector);
37
+
38
+ // Adds a new element to an GumboVector.
39
+ void gumbo_vector_add(
40
+ struct _GumboParser* parser, void* element, GumboVector* vector);
41
+
42
+ // Removes and returns the element most recently added to the GumboVector.
43
+ // Ownership is transferred to caller. Capacity is unchanged. If the vector is
44
+ // empty, NULL is returned.
45
+ void* gumbo_vector_pop(struct _GumboParser* parser, GumboVector* vector);
46
+
47
+ // Inserts an element at a specific index. This is potentially O(N) time, but
48
+ // is necessary for some of the spec's behavior.
49
+ void gumbo_vector_insert_at(
50
+ struct _GumboParser* parser, void* element, int index, GumboVector* vector);
51
+
52
+ // Removes an element from the vector, or does nothing if the element is not in
53
+ // the vector.
54
+ void gumbo_vector_remove(
55
+ struct _GumboParser* parser, void* element, GumboVector* vector);
56
+
57
+ // Removes and returns an element at a specific index. Note that this is
58
+ // potentially O(N) time and should be used sparingly.
59
+ void* gumbo_vector_remove_at(
60
+ struct _GumboParser* parser, int index, GumboVector* vector);
61
+
62
+ #ifdef __cplusplus
63
+ }
64
+ #endif
65
+
66
+ #endif // GUMBO_VECTOR_H_
data/work/vector.o ADDED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: 0.5.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -37,9 +37,49 @@ extra_rdoc_files: []
37
37
  files:
38
38
  - lib/nokogumbo.rb
39
39
  - LICENSE.txt
40
- - Rakefile
41
40
  - README.md
42
41
  - work/extconf.rb
42
+ - work/utf8.h
43
+ - work/string_piece.o
44
+ - work/string_buffer.o
45
+ - work/nokogumbo.c
46
+ - work/token_type.h
47
+ - work/util.h
48
+ - work/nokogumbo.o
49
+ - work/parser.o
50
+ - work/Makefile
51
+ - work/utf8.o
52
+ - work/vector.c
53
+ - work/string_buffer.c
54
+ - work/tokenizer_states.h
55
+ - work/error.h
56
+ - work/parser.h
57
+ - work/error.c
58
+ - work/tokenizer.h
59
+ - work/nokogumboc.so
60
+ - work/string_buffer.h
61
+ - work/vector.o
62
+ - work/vector.h
63
+ - work/tag.o
64
+ - work/tokenizer.o
65
+ - work/string_piece.h
66
+ - work/attribute.c
67
+ - work/mkmf.log
68
+ - work/char_ref.c
69
+ - work/string_piece.c
70
+ - work/error.o
71
+ - work/gumbo.h
72
+ - work/tag.c
73
+ - work/util.c
74
+ - work/parser.c
75
+ - work/utf8.c
76
+ - work/attribute.h
77
+ - work/char_ref.h
78
+ - work/char_ref.o
79
+ - work/insertion_mode.h
80
+ - work/tokenizer.c
81
+ - work/util.o
82
+ - work/attribute.o
43
83
  homepage: https://github.com/rubys/nokogumbo/#readme
44
84
  licenses:
45
85
  - Apache 2.0
data/Rakefile DELETED
@@ -1,68 +0,0 @@
1
- require 'rubygems/package_task'
2
- require 'rake/clean'
3
-
4
- task 'default' => 'test'
5
-
6
- file 'gumbo-parser' do
7
- sh 'git clone https://github.com/google/gumbo-parser.git'
8
- end
9
-
10
- file 'work/extconf.rb' => ['ext/extconf.rb', 'gumbo-parser'] do
11
- mkdir_p 'work'
12
- rm_f 'work/Makefile'
13
- cp Dir['gumbo-parser/src/*'], 'work', :preserve => true
14
- cp Dir['ext/*'], 'work'
15
- end
16
-
17
- file 'work/Makefile' => 'work/extconf.rb' do
18
- Dir.chdir 'work' do
19
- ruby 'extconf.rb'
20
- end
21
- end
22
-
23
- file 'work/nokogumbo.c' => 'ext/nokogumbo.c' do
24
- cp 'ext/nokogumbo.c', 'work/nokogumbo.c'
25
- end
26
-
27
- task 'compile' => ['work/Makefile', 'work/nokogumbo.c'] do
28
- Dir.chdir 'work' do
29
- sh 'make -s'
30
- end
31
- end
32
-
33
- task 'test' => 'compile' do
34
- ruby 'test-nokogumbo.rb'
35
- end
36
-
37
- CLEAN.include 'pkg', 'gumbo-parser', 'work'
38
-
39
- SPEC = Gem::Specification.new do |gem|
40
- gem.name = 'nokogumbo'
41
- gem.version = '0.5'
42
- gem.email = 'rubys@intertwingly.net'
43
- gem.homepage = 'https://github.com/rubys/nokogumbo/#readme'
44
- gem.summary = 'Nokogiri interface to the Gumbo HTML5 parser'
45
- gem.extensions = 'work/extconf.rb'
46
- gem.author = 'Sam Ruby'
47
- gem.add_dependency 'nokogiri'
48
- gem.license = 'Apache 2.0'
49
- gem.description = %q(
50
- Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
51
- access the result as a Nokogiri parsed document.).strip.gsub(/\s+/, ' ')
52
- gem.files = FileList[
53
- 'lib/nokogumbo.rb',
54
- 'LICENSE.txt',
55
- 'Rakefile',
56
- 'README.md'
57
- ]
58
- end
59
-
60
- task 'package_workfiles' => 'work/extconf.rb' do
61
- PKG.package_files += FileList['work/*.rb', 'work/*.c', 'work/*.h']
62
- end
63
-
64
- task 'gem' => ['test', 'package_workfiles']
65
- PKG = Gem::PackageTask.new(SPEC) do |pkg|
66
- pkg.need_tar = true
67
- pkg.need_zip = true
68
- end