nokogumbo 0.5 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
data/work/util.h ADDED
@@ -0,0 +1,57 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Author: jdtang@google.com (Jonathan Tang)
16
+ //
17
+ // This contains some utility functions that didn't fit into any of the other
18
+ // headers.
19
+
20
+ #ifndef GUMBO_UTIL_H_
21
+ #define GUMBO_UTIL_H_
22
+
23
+ #include <stdbool.h>
24
+ #include <stddef.h>
25
+
26
+ #ifdef __cplusplus
27
+ extern "C" {
28
+ #endif
29
+
30
+ // Forward declaration since it's passed into some of the functions in this
31
+ // header.
32
+ struct _GumboParser;
33
+
34
+ // Utility function for allocating & copying a null-terminated string into a
35
+ // freshly-allocated buffer. This is necessary for proper memory management; we
36
+ // have the convention that all const char* in parse tree structures are
37
+ // freshly-allocated, so if we didn't copy, we'd try to delete a literal string
38
+ // when the parse tree is destroyed.
39
+ char* gumbo_copy_stringz(struct _GumboParser* parser, const char* str);
40
+
41
+ // Allocate a chunk of memory, using the allocator specified in the Parser's
42
+ // config options.
43
+ void* gumbo_parser_allocate(struct _GumboParser* parser, size_t num_bytes);
44
+
45
+ // Deallocate a chunk of memory, using the deallocator specified in the Parser's
46
+ // config options.
47
+ void gumbo_parser_deallocate(struct _GumboParser* parser, void* ptr);
48
+
49
+ // Debug wrapper for printf, to make it easier to turn off debugging info when
50
+ // required.
51
+ void gumbo_debug(const char* format, ...);
52
+
53
+ #ifdef __cplusplus
54
+ }
55
+ #endif
56
+
57
+ #endif // GUMBO_UTIL_H_
data/work/util.o ADDED
Binary file
data/work/vector.c ADDED
@@ -0,0 +1,121 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Author: jdtang@google.com (Jonathan Tang)
16
+
17
+ #include "vector.h"
18
+
19
+ #include <assert.h>
20
+ #include <stdlib.h>
21
+ #include <string.h>
22
+ #include <strings.h>
23
+
24
+ #include "util.h"
25
+
26
+ struct _GumboParser;
27
+
28
+ const GumboVector kGumboEmptyVector = { NULL, 0, 0 };
29
+
30
+ void gumbo_vector_init(
31
+ struct _GumboParser* parser, size_t initial_capacity, GumboVector* vector) {
32
+ vector->length = 0;
33
+ vector->capacity = initial_capacity;
34
+ if (initial_capacity > 0) {
35
+ vector->data = gumbo_parser_allocate(
36
+ parser, sizeof(void*) * initial_capacity);
37
+ } else {
38
+ vector->data = NULL;
39
+ }
40
+ }
41
+
42
+ void gumbo_vector_destroy(struct _GumboParser* parser, GumboVector* vector) {
43
+ if (vector->capacity > 0) {
44
+ gumbo_parser_deallocate(parser, vector->data);
45
+ }
46
+ }
47
+
48
+ static void enlarge_vector_if_full(
49
+ struct _GumboParser* parser, GumboVector* vector) {
50
+ if (vector->length >= vector->capacity) {
51
+ if (vector->capacity) {
52
+ size_t old_num_bytes = sizeof(void*) * vector->capacity;
53
+ vector->capacity *= 2;
54
+ size_t num_bytes = sizeof(void*) * vector->capacity;
55
+ void** temp = gumbo_parser_allocate(parser, num_bytes);
56
+ memcpy(temp, vector->data, old_num_bytes);
57
+ gumbo_parser_deallocate(parser, vector->data);
58
+ vector->data = temp;
59
+ } else {
60
+ // 0-capacity vector; no previous array to deallocate.
61
+ vector->capacity = 2;
62
+ vector->data = gumbo_parser_allocate(
63
+ parser, sizeof(void*) * vector->capacity);
64
+ }
65
+ }
66
+ }
67
+
68
+ void gumbo_vector_add(
69
+ struct _GumboParser* parser, void* element, GumboVector* vector) {
70
+ enlarge_vector_if_full(parser, vector);
71
+ assert(vector->data);
72
+ assert(vector->length < vector->capacity);
73
+ vector->data[vector->length++] = element;
74
+ }
75
+
76
+ void* gumbo_vector_pop(struct _GumboParser* parser, GumboVector* vector) {
77
+ if (vector->length == 0) {
78
+ return NULL;
79
+ }
80
+ return vector->data[--vector->length];
81
+ }
82
+
83
+ int gumbo_vector_index_of(GumboVector* vector, void* element) {
84
+ for (int i = 0; i < vector->length; ++i) {
85
+ if (vector->data[i] == element) {
86
+ return i;
87
+ }
88
+ }
89
+ return -1;
90
+ }
91
+
92
+ void gumbo_vector_insert_at(
93
+ struct _GumboParser* parser, void* element, int index, GumboVector* vector) {
94
+ assert(index >= 0);
95
+ assert(index <= vector->length);
96
+ enlarge_vector_if_full(parser, vector);
97
+ ++vector->length;
98
+ memmove(&vector->data[index + 1], &vector->data[index],
99
+ sizeof(void*) * (vector->length - index - 1));
100
+ vector->data[index] = element;
101
+ }
102
+
103
+ void gumbo_vector_remove(
104
+ struct _GumboParser* parser, void* node, GumboVector* vector) {
105
+ int index = gumbo_vector_index_of(vector, node);
106
+ if (index == -1) {
107
+ return;
108
+ }
109
+ gumbo_vector_remove_at(parser, index, vector);
110
+ }
111
+
112
+ void* gumbo_vector_remove_at(
113
+ struct _GumboParser* parser, int index, GumboVector* vector) {
114
+ assert(index >= 0);
115
+ assert(index < vector->length);
116
+ void* result = vector->data[index];
117
+ memmove(&vector->data[index], &vector->data[index + 1],
118
+ sizeof(void*) * (vector->length - index - 1));
119
+ --vector->length;
120
+ return result;
121
+ }
data/work/vector.h ADDED
@@ -0,0 +1,66 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Author: jdtang@google.com (Jonathan Tang)
16
+
17
+ #ifndef GUMBO_VECTOR_H_
18
+ #define GUMBO_VECTOR_H_
19
+
20
+ #include "gumbo.h"
21
+
22
+ #ifdef __cplusplus
23
+ extern "C" {
24
+ #endif
25
+
26
+ // Forward declaration since it's passed into some of the functions in this
27
+ // header.
28
+ struct _GumboParser;
29
+
30
+ // Initializes a new GumboVector with the specified initial capacity.
31
+ void gumbo_vector_init(
32
+ struct _GumboParser* parser, size_t initial_capacity, GumboVector* vector);
33
+
34
+ // Frees the memory used by an GumboVector. Does not free the contained
35
+ // pointers.
36
+ void gumbo_vector_destroy(struct _GumboParser* parser, GumboVector* vector);
37
+
38
+ // Adds a new element to an GumboVector.
39
+ void gumbo_vector_add(
40
+ struct _GumboParser* parser, void* element, GumboVector* vector);
41
+
42
+ // Removes and returns the element most recently added to the GumboVector.
43
+ // Ownership is transferred to caller. Capacity is unchanged. If the vector is
44
+ // empty, NULL is returned.
45
+ void* gumbo_vector_pop(struct _GumboParser* parser, GumboVector* vector);
46
+
47
+ // Inserts an element at a specific index. This is potentially O(N) time, but
48
+ // is necessary for some of the spec's behavior.
49
+ void gumbo_vector_insert_at(
50
+ struct _GumboParser* parser, void* element, int index, GumboVector* vector);
51
+
52
+ // Removes an element from the vector, or does nothing if the element is not in
53
+ // the vector.
54
+ void gumbo_vector_remove(
55
+ struct _GumboParser* parser, void* element, GumboVector* vector);
56
+
57
+ // Removes and returns an element at a specific index. Note that this is
58
+ // potentially O(N) time and should be used sparingly.
59
+ void* gumbo_vector_remove_at(
60
+ struct _GumboParser* parser, int index, GumboVector* vector);
61
+
62
+ #ifdef __cplusplus
63
+ }
64
+ #endif
65
+
66
+ #endif // GUMBO_VECTOR_H_
data/work/vector.o ADDED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: 0.5.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -37,9 +37,49 @@ extra_rdoc_files: []
37
37
  files:
38
38
  - lib/nokogumbo.rb
39
39
  - LICENSE.txt
40
- - Rakefile
41
40
  - README.md
42
41
  - work/extconf.rb
42
+ - work/utf8.h
43
+ - work/string_piece.o
44
+ - work/string_buffer.o
45
+ - work/nokogumbo.c
46
+ - work/token_type.h
47
+ - work/util.h
48
+ - work/nokogumbo.o
49
+ - work/parser.o
50
+ - work/Makefile
51
+ - work/utf8.o
52
+ - work/vector.c
53
+ - work/string_buffer.c
54
+ - work/tokenizer_states.h
55
+ - work/error.h
56
+ - work/parser.h
57
+ - work/error.c
58
+ - work/tokenizer.h
59
+ - work/nokogumboc.so
60
+ - work/string_buffer.h
61
+ - work/vector.o
62
+ - work/vector.h
63
+ - work/tag.o
64
+ - work/tokenizer.o
65
+ - work/string_piece.h
66
+ - work/attribute.c
67
+ - work/mkmf.log
68
+ - work/char_ref.c
69
+ - work/string_piece.c
70
+ - work/error.o
71
+ - work/gumbo.h
72
+ - work/tag.c
73
+ - work/util.c
74
+ - work/parser.c
75
+ - work/utf8.c
76
+ - work/attribute.h
77
+ - work/char_ref.h
78
+ - work/char_ref.o
79
+ - work/insertion_mode.h
80
+ - work/tokenizer.c
81
+ - work/util.o
82
+ - work/attribute.o
43
83
  homepage: https://github.com/rubys/nokogumbo/#readme
44
84
  licenses:
45
85
  - Apache 2.0
data/Rakefile DELETED
@@ -1,68 +0,0 @@
1
- require 'rubygems/package_task'
2
- require 'rake/clean'
3
-
4
- task 'default' => 'test'
5
-
6
- file 'gumbo-parser' do
7
- sh 'git clone https://github.com/google/gumbo-parser.git'
8
- end
9
-
10
- file 'work/extconf.rb' => ['ext/extconf.rb', 'gumbo-parser'] do
11
- mkdir_p 'work'
12
- rm_f 'work/Makefile'
13
- cp Dir['gumbo-parser/src/*'], 'work', :preserve => true
14
- cp Dir['ext/*'], 'work'
15
- end
16
-
17
- file 'work/Makefile' => 'work/extconf.rb' do
18
- Dir.chdir 'work' do
19
- ruby 'extconf.rb'
20
- end
21
- end
22
-
23
- file 'work/nokogumbo.c' => 'ext/nokogumbo.c' do
24
- cp 'ext/nokogumbo.c', 'work/nokogumbo.c'
25
- end
26
-
27
- task 'compile' => ['work/Makefile', 'work/nokogumbo.c'] do
28
- Dir.chdir 'work' do
29
- sh 'make -s'
30
- end
31
- end
32
-
33
- task 'test' => 'compile' do
34
- ruby 'test-nokogumbo.rb'
35
- end
36
-
37
- CLEAN.include 'pkg', 'gumbo-parser', 'work'
38
-
39
- SPEC = Gem::Specification.new do |gem|
40
- gem.name = 'nokogumbo'
41
- gem.version = '0.5'
42
- gem.email = 'rubys@intertwingly.net'
43
- gem.homepage = 'https://github.com/rubys/nokogumbo/#readme'
44
- gem.summary = 'Nokogiri interface to the Gumbo HTML5 parser'
45
- gem.extensions = 'work/extconf.rb'
46
- gem.author = 'Sam Ruby'
47
- gem.add_dependency 'nokogiri'
48
- gem.license = 'Apache 2.0'
49
- gem.description = %q(
50
- Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
51
- access the result as a Nokogiri parsed document.).strip.gsub(/\s+/, ' ')
52
- gem.files = FileList[
53
- 'lib/nokogumbo.rb',
54
- 'LICENSE.txt',
55
- 'Rakefile',
56
- 'README.md'
57
- ]
58
- end
59
-
60
- task 'package_workfiles' => 'work/extconf.rb' do
61
- PKG.package_files += FileList['work/*.rb', 'work/*.c', 'work/*.h']
62
- end
63
-
64
- task 'gem' => ['test', 'package_workfiles']
65
- PKG = Gem::PackageTask.new(SPEC) do |pkg|
66
- pkg.need_tar = true
67
- pkg.need_zip = true
68
- end