nokogumbo 0.5 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/work/Makefile +213 -0
- data/work/attribute.c +44 -0
- data/work/attribute.h +37 -0
- data/work/attribute.o +0 -0
- data/work/char_ref.c +2561 -0
- data/work/char_ref.h +61 -0
- data/work/char_ref.o +0 -0
- data/work/error.c +258 -0
- data/work/error.h +225 -0
- data/work/error.o +0 -0
- data/work/gumbo.h +800 -0
- data/work/insertion_mode.h +54 -0
- data/work/mkmf.log +41 -0
- data/work/nokogumbo.c +97 -0
- data/work/nokogumbo.o +0 -0
- data/work/nokogumboc.so +0 -0
- data/work/parser.c +3893 -0
- data/work/parser.h +57 -0
- data/work/parser.o +0 -0
- data/work/string_buffer.c +106 -0
- data/work/string_buffer.h +82 -0
- data/work/string_buffer.o +0 -0
- data/work/string_piece.c +49 -0
- data/work/string_piece.h +39 -0
- data/work/string_piece.o +0 -0
- data/work/tag.c +222 -0
- data/work/tag.o +0 -0
- data/work/token_type.h +40 -0
- data/work/tokenizer.c +2978 -0
- data/work/tokenizer.h +123 -0
- data/work/tokenizer.o +0 -0
- data/work/tokenizer_states.h +103 -0
- data/work/utf8.c +268 -0
- data/work/utf8.h +127 -0
- data/work/utf8.o +0 -0
- data/work/util.c +58 -0
- data/work/util.h +57 -0
- data/work/util.o +0 -0
- data/work/vector.c +121 -0
- data/work/vector.h +66 -0
- data/work/vector.o +0 -0
- metadata +42 -2
- data/Rakefile +0 -68
data/work/util.h
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
+
//
|
17
|
+
// This contains some utility functions that didn't fit into any of the other
|
18
|
+
// headers.
|
19
|
+
|
20
|
+
#ifndef GUMBO_UTIL_H_
|
21
|
+
#define GUMBO_UTIL_H_
|
22
|
+
|
23
|
+
#include <stdbool.h>
|
24
|
+
#include <stddef.h>
|
25
|
+
|
26
|
+
#ifdef __cplusplus
|
27
|
+
extern "C" {
|
28
|
+
#endif
|
29
|
+
|
30
|
+
// Forward declaration since it's passed into some of the functions in this
|
31
|
+
// header.
|
32
|
+
struct _GumboParser;
|
33
|
+
|
34
|
+
// Utility function for allocating & copying a null-terminated string into a
|
35
|
+
// freshly-allocated buffer. This is necessary for proper memory management; we
|
36
|
+
// have the convention that all const char* in parse tree structures are
|
37
|
+
// freshly-allocated, so if we didn't copy, we'd try to delete a literal string
|
38
|
+
// when the parse tree is destroyed.
|
39
|
+
char* gumbo_copy_stringz(struct _GumboParser* parser, const char* str);
|
40
|
+
|
41
|
+
// Allocate a chunk of memory, using the allocator specified in the Parser's
|
42
|
+
// config options.
|
43
|
+
void* gumbo_parser_allocate(struct _GumboParser* parser, size_t num_bytes);
|
44
|
+
|
45
|
+
// Deallocate a chunk of memory, using the deallocator specified in the Parser's
|
46
|
+
// config options.
|
47
|
+
void gumbo_parser_deallocate(struct _GumboParser* parser, void* ptr);
|
48
|
+
|
49
|
+
// Debug wrapper for printf, to make it easier to turn off debugging info when
|
50
|
+
// required.
|
51
|
+
void gumbo_debug(const char* format, ...);
|
52
|
+
|
53
|
+
#ifdef __cplusplus
|
54
|
+
}
|
55
|
+
#endif
|
56
|
+
|
57
|
+
#endif // GUMBO_UTIL_H_
|
data/work/util.o
ADDED
Binary file
|
data/work/vector.c
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
+
|
17
|
+
#include "vector.h"
|
18
|
+
|
19
|
+
#include <assert.h>
|
20
|
+
#include <stdlib.h>
|
21
|
+
#include <string.h>
|
22
|
+
#include <strings.h>
|
23
|
+
|
24
|
+
#include "util.h"
|
25
|
+
|
26
|
+
struct _GumboParser;
|
27
|
+
|
28
|
+
const GumboVector kGumboEmptyVector = { NULL, 0, 0 };
|
29
|
+
|
30
|
+
void gumbo_vector_init(
|
31
|
+
struct _GumboParser* parser, size_t initial_capacity, GumboVector* vector) {
|
32
|
+
vector->length = 0;
|
33
|
+
vector->capacity = initial_capacity;
|
34
|
+
if (initial_capacity > 0) {
|
35
|
+
vector->data = gumbo_parser_allocate(
|
36
|
+
parser, sizeof(void*) * initial_capacity);
|
37
|
+
} else {
|
38
|
+
vector->data = NULL;
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
void gumbo_vector_destroy(struct _GumboParser* parser, GumboVector* vector) {
|
43
|
+
if (vector->capacity > 0) {
|
44
|
+
gumbo_parser_deallocate(parser, vector->data);
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
static void enlarge_vector_if_full(
|
49
|
+
struct _GumboParser* parser, GumboVector* vector) {
|
50
|
+
if (vector->length >= vector->capacity) {
|
51
|
+
if (vector->capacity) {
|
52
|
+
size_t old_num_bytes = sizeof(void*) * vector->capacity;
|
53
|
+
vector->capacity *= 2;
|
54
|
+
size_t num_bytes = sizeof(void*) * vector->capacity;
|
55
|
+
void** temp = gumbo_parser_allocate(parser, num_bytes);
|
56
|
+
memcpy(temp, vector->data, old_num_bytes);
|
57
|
+
gumbo_parser_deallocate(parser, vector->data);
|
58
|
+
vector->data = temp;
|
59
|
+
} else {
|
60
|
+
// 0-capacity vector; no previous array to deallocate.
|
61
|
+
vector->capacity = 2;
|
62
|
+
vector->data = gumbo_parser_allocate(
|
63
|
+
parser, sizeof(void*) * vector->capacity);
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
void gumbo_vector_add(
|
69
|
+
struct _GumboParser* parser, void* element, GumboVector* vector) {
|
70
|
+
enlarge_vector_if_full(parser, vector);
|
71
|
+
assert(vector->data);
|
72
|
+
assert(vector->length < vector->capacity);
|
73
|
+
vector->data[vector->length++] = element;
|
74
|
+
}
|
75
|
+
|
76
|
+
void* gumbo_vector_pop(struct _GumboParser* parser, GumboVector* vector) {
|
77
|
+
if (vector->length == 0) {
|
78
|
+
return NULL;
|
79
|
+
}
|
80
|
+
return vector->data[--vector->length];
|
81
|
+
}
|
82
|
+
|
83
|
+
int gumbo_vector_index_of(GumboVector* vector, void* element) {
|
84
|
+
for (int i = 0; i < vector->length; ++i) {
|
85
|
+
if (vector->data[i] == element) {
|
86
|
+
return i;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
return -1;
|
90
|
+
}
|
91
|
+
|
92
|
+
void gumbo_vector_insert_at(
|
93
|
+
struct _GumboParser* parser, void* element, int index, GumboVector* vector) {
|
94
|
+
assert(index >= 0);
|
95
|
+
assert(index <= vector->length);
|
96
|
+
enlarge_vector_if_full(parser, vector);
|
97
|
+
++vector->length;
|
98
|
+
memmove(&vector->data[index + 1], &vector->data[index],
|
99
|
+
sizeof(void*) * (vector->length - index - 1));
|
100
|
+
vector->data[index] = element;
|
101
|
+
}
|
102
|
+
|
103
|
+
void gumbo_vector_remove(
|
104
|
+
struct _GumboParser* parser, void* node, GumboVector* vector) {
|
105
|
+
int index = gumbo_vector_index_of(vector, node);
|
106
|
+
if (index == -1) {
|
107
|
+
return;
|
108
|
+
}
|
109
|
+
gumbo_vector_remove_at(parser, index, vector);
|
110
|
+
}
|
111
|
+
|
112
|
+
void* gumbo_vector_remove_at(
|
113
|
+
struct _GumboParser* parser, int index, GumboVector* vector) {
|
114
|
+
assert(index >= 0);
|
115
|
+
assert(index < vector->length);
|
116
|
+
void* result = vector->data[index];
|
117
|
+
memmove(&vector->data[index], &vector->data[index + 1],
|
118
|
+
sizeof(void*) * (vector->length - index - 1));
|
119
|
+
--vector->length;
|
120
|
+
return result;
|
121
|
+
}
|
data/work/vector.h
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
+
|
17
|
+
#ifndef GUMBO_VECTOR_H_
|
18
|
+
#define GUMBO_VECTOR_H_
|
19
|
+
|
20
|
+
#include "gumbo.h"
|
21
|
+
|
22
|
+
#ifdef __cplusplus
|
23
|
+
extern "C" {
|
24
|
+
#endif
|
25
|
+
|
26
|
+
// Forward declaration since it's passed into some of the functions in this
|
27
|
+
// header.
|
28
|
+
struct _GumboParser;
|
29
|
+
|
30
|
+
// Initializes a new GumboVector with the specified initial capacity.
|
31
|
+
void gumbo_vector_init(
|
32
|
+
struct _GumboParser* parser, size_t initial_capacity, GumboVector* vector);
|
33
|
+
|
34
|
+
// Frees the memory used by an GumboVector. Does not free the contained
|
35
|
+
// pointers.
|
36
|
+
void gumbo_vector_destroy(struct _GumboParser* parser, GumboVector* vector);
|
37
|
+
|
38
|
+
// Adds a new element to an GumboVector.
|
39
|
+
void gumbo_vector_add(
|
40
|
+
struct _GumboParser* parser, void* element, GumboVector* vector);
|
41
|
+
|
42
|
+
// Removes and returns the element most recently added to the GumboVector.
|
43
|
+
// Ownership is transferred to caller. Capacity is unchanged. If the vector is
|
44
|
+
// empty, NULL is returned.
|
45
|
+
void* gumbo_vector_pop(struct _GumboParser* parser, GumboVector* vector);
|
46
|
+
|
47
|
+
// Inserts an element at a specific index. This is potentially O(N) time, but
|
48
|
+
// is necessary for some of the spec's behavior.
|
49
|
+
void gumbo_vector_insert_at(
|
50
|
+
struct _GumboParser* parser, void* element, int index, GumboVector* vector);
|
51
|
+
|
52
|
+
// Removes an element from the vector, or does nothing if the element is not in
|
53
|
+
// the vector.
|
54
|
+
void gumbo_vector_remove(
|
55
|
+
struct _GumboParser* parser, void* element, GumboVector* vector);
|
56
|
+
|
57
|
+
// Removes and returns an element at a specific index. Note that this is
|
58
|
+
// potentially O(N) time and should be used sparingly.
|
59
|
+
void* gumbo_vector_remove_at(
|
60
|
+
struct _GumboParser* parser, int index, GumboVector* vector);
|
61
|
+
|
62
|
+
#ifdef __cplusplus
|
63
|
+
}
|
64
|
+
#endif
|
65
|
+
|
66
|
+
#endif // GUMBO_VECTOR_H_
|
data/work/vector.o
ADDED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -37,9 +37,49 @@ extra_rdoc_files: []
|
|
37
37
|
files:
|
38
38
|
- lib/nokogumbo.rb
|
39
39
|
- LICENSE.txt
|
40
|
-
- Rakefile
|
41
40
|
- README.md
|
42
41
|
- work/extconf.rb
|
42
|
+
- work/utf8.h
|
43
|
+
- work/string_piece.o
|
44
|
+
- work/string_buffer.o
|
45
|
+
- work/nokogumbo.c
|
46
|
+
- work/token_type.h
|
47
|
+
- work/util.h
|
48
|
+
- work/nokogumbo.o
|
49
|
+
- work/parser.o
|
50
|
+
- work/Makefile
|
51
|
+
- work/utf8.o
|
52
|
+
- work/vector.c
|
53
|
+
- work/string_buffer.c
|
54
|
+
- work/tokenizer_states.h
|
55
|
+
- work/error.h
|
56
|
+
- work/parser.h
|
57
|
+
- work/error.c
|
58
|
+
- work/tokenizer.h
|
59
|
+
- work/nokogumboc.so
|
60
|
+
- work/string_buffer.h
|
61
|
+
- work/vector.o
|
62
|
+
- work/vector.h
|
63
|
+
- work/tag.o
|
64
|
+
- work/tokenizer.o
|
65
|
+
- work/string_piece.h
|
66
|
+
- work/attribute.c
|
67
|
+
- work/mkmf.log
|
68
|
+
- work/char_ref.c
|
69
|
+
- work/string_piece.c
|
70
|
+
- work/error.o
|
71
|
+
- work/gumbo.h
|
72
|
+
- work/tag.c
|
73
|
+
- work/util.c
|
74
|
+
- work/parser.c
|
75
|
+
- work/utf8.c
|
76
|
+
- work/attribute.h
|
77
|
+
- work/char_ref.h
|
78
|
+
- work/char_ref.o
|
79
|
+
- work/insertion_mode.h
|
80
|
+
- work/tokenizer.c
|
81
|
+
- work/util.o
|
82
|
+
- work/attribute.o
|
43
83
|
homepage: https://github.com/rubys/nokogumbo/#readme
|
44
84
|
licenses:
|
45
85
|
- Apache 2.0
|
data/Rakefile
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
require 'rubygems/package_task'
|
2
|
-
require 'rake/clean'
|
3
|
-
|
4
|
-
task 'default' => 'test'
|
5
|
-
|
6
|
-
file 'gumbo-parser' do
|
7
|
-
sh 'git clone https://github.com/google/gumbo-parser.git'
|
8
|
-
end
|
9
|
-
|
10
|
-
file 'work/extconf.rb' => ['ext/extconf.rb', 'gumbo-parser'] do
|
11
|
-
mkdir_p 'work'
|
12
|
-
rm_f 'work/Makefile'
|
13
|
-
cp Dir['gumbo-parser/src/*'], 'work', :preserve => true
|
14
|
-
cp Dir['ext/*'], 'work'
|
15
|
-
end
|
16
|
-
|
17
|
-
file 'work/Makefile' => 'work/extconf.rb' do
|
18
|
-
Dir.chdir 'work' do
|
19
|
-
ruby 'extconf.rb'
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
file 'work/nokogumbo.c' => 'ext/nokogumbo.c' do
|
24
|
-
cp 'ext/nokogumbo.c', 'work/nokogumbo.c'
|
25
|
-
end
|
26
|
-
|
27
|
-
task 'compile' => ['work/Makefile', 'work/nokogumbo.c'] do
|
28
|
-
Dir.chdir 'work' do
|
29
|
-
sh 'make -s'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
task 'test' => 'compile' do
|
34
|
-
ruby 'test-nokogumbo.rb'
|
35
|
-
end
|
36
|
-
|
37
|
-
CLEAN.include 'pkg', 'gumbo-parser', 'work'
|
38
|
-
|
39
|
-
SPEC = Gem::Specification.new do |gem|
|
40
|
-
gem.name = 'nokogumbo'
|
41
|
-
gem.version = '0.5'
|
42
|
-
gem.email = 'rubys@intertwingly.net'
|
43
|
-
gem.homepage = 'https://github.com/rubys/nokogumbo/#readme'
|
44
|
-
gem.summary = 'Nokogiri interface to the Gumbo HTML5 parser'
|
45
|
-
gem.extensions = 'work/extconf.rb'
|
46
|
-
gem.author = 'Sam Ruby'
|
47
|
-
gem.add_dependency 'nokogiri'
|
48
|
-
gem.license = 'Apache 2.0'
|
49
|
-
gem.description = %q(
|
50
|
-
Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
|
51
|
-
access the result as a Nokogiri parsed document.).strip.gsub(/\s+/, ' ')
|
52
|
-
gem.files = FileList[
|
53
|
-
'lib/nokogumbo.rb',
|
54
|
-
'LICENSE.txt',
|
55
|
-
'Rakefile',
|
56
|
-
'README.md'
|
57
|
-
]
|
58
|
-
end
|
59
|
-
|
60
|
-
task 'package_workfiles' => 'work/extconf.rb' do
|
61
|
-
PKG.package_files += FileList['work/*.rb', 'work/*.c', 'work/*.h']
|
62
|
-
end
|
63
|
-
|
64
|
-
task 'gem' => ['test', 'package_workfiles']
|
65
|
-
PKG = Gem::PackageTask.new(SPEC) do |pkg|
|
66
|
-
pkg.need_tar = true
|
67
|
-
pkg.need_zip = true
|
68
|
-
end
|