gumbo-html 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -10
- package/binding.gyp +49 -0
- package/examples/example.js +87 -0
- package/examples/scrape.js +301 -0
- package/index.d.ts +58 -3
- package/index.js +7 -2
- package/lib/wrapper.js +385 -0
- package/package.json +36 -5
- package/src/addon.cc +19 -0
- package/src/gumbo-parser/COPYING +201 -0
- package/src/gumbo-parser/README.md +8 -0
- package/src/gumbo-parser/src/attribute.c +44 -0
- package/src/gumbo-parser/src/attribute.h +37 -0
- package/src/gumbo-parser/src/char_ref.c +23069 -0
- package/src/gumbo-parser/src/char_ref.h +60 -0
- package/src/gumbo-parser/src/error.c +279 -0
- package/src/gumbo-parser/src/error.h +225 -0
- package/src/gumbo-parser/src/gumbo.h +671 -0
- package/src/gumbo-parser/src/insertion_mode.h +57 -0
- package/src/gumbo-parser/src/parser.c +4192 -0
- package/src/gumbo-parser/src/parser.h +57 -0
- package/src/gumbo-parser/src/string_buffer.c +110 -0
- package/src/gumbo-parser/src/string_buffer.h +84 -0
- package/src/gumbo-parser/src/string_piece.c +48 -0
- package/src/gumbo-parser/src/string_piece.h +38 -0
- package/src/gumbo-parser/src/tag.c +95 -0
- package/src/gumbo-parser/src/tag_enum.h +153 -0
- package/src/gumbo-parser/src/tag_gperf.h +105 -0
- package/src/gumbo-parser/src/tag_sizes.h +4 -0
- package/src/gumbo-parser/src/tag_strings.h +153 -0
- package/src/gumbo-parser/src/token_type.h +41 -0
- package/src/gumbo-parser/src/tokenizer.c +2897 -0
- package/src/gumbo-parser/src/tokenizer.h +123 -0
- package/src/gumbo-parser/src/tokenizer_states.h +103 -0
- package/src/gumbo-parser/src/utf8.c +270 -0
- package/src/gumbo-parser/src/utf8.h +132 -0
- package/src/gumbo-parser/src/util.c +58 -0
- package/src/gumbo-parser/src/util.h +60 -0
- package/src/gumbo-parser/src/vector.c +123 -0
- package/src/gumbo-parser/src/vector.h +67 -0
- package/src/html_document.cc +411 -0
- package/src/html_document.h +56 -0
- package/src/html_element.cc +963 -0
- package/src/html_element.h +70 -0
- package/src/include/win/strings.h +11 -0
- package/src/jsa.c +182 -0
- package/src/jsa.h +44 -0
- package/src/xnode.c +372 -0
- package/src/xnode_query.c +330 -0
- package/src/xnode_query.h +186 -0
- package/src/xnode_query_parser.c +414 -0
- package/install.js +0 -15
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
//
|
|
15
|
+
// Author: jdtang@google.com (Jonathan Tang)
|
|
16
|
+
//
|
|
17
|
+
// This contains some utility functions that didn't fit into any of the other
|
|
18
|
+
// headers.
|
|
19
|
+
|
|
20
|
+
#ifndef GUMBO_UTIL_H_
|
|
21
|
+
#define GUMBO_UTIL_H_
|
|
22
|
+
#ifdef _MSC_VER
|
|
23
|
+
#define _CRT_SECURE_NO_WARNINGS
|
|
24
|
+
#endif
|
|
25
|
+
#include <stdbool.h>
|
|
26
|
+
#include <stddef.h>
|
|
27
|
+
|
|
28
|
+
#ifdef __cplusplus
|
|
29
|
+
extern "C" {
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
// Forward declaration since it's passed into some of the functions in this
|
|
33
|
+
// header.
|
|
34
|
+
struct GumboInternalParser;
|
|
35
|
+
|
|
36
|
+
// Utility function for allocating & copying a null-terminated string into a
|
|
37
|
+
// freshly-allocated buffer. This is necessary for proper memory management; we
|
|
38
|
+
// have the convention that all const char* in parse tree structures are
|
|
39
|
+
// freshly-allocated, so if we didn't copy, we'd try to delete a literal string
|
|
40
|
+
// when the parse tree is destroyed.
|
|
41
|
+
char* gumbo_copy_stringz(struct GumboInternalParser* parser, const char* str);
|
|
42
|
+
|
|
43
|
+
// Allocate a chunk of memory, using the allocator specified in the Parser's
|
|
44
|
+
// config options.
|
|
45
|
+
void* gumbo_parser_allocate(
|
|
46
|
+
struct GumboInternalParser* parser, size_t num_bytes);
|
|
47
|
+
|
|
48
|
+
// Deallocate a chunk of memory, using the deallocator specified in the Parser's
|
|
49
|
+
// config options.
|
|
50
|
+
void gumbo_parser_deallocate(struct GumboInternalParser* parser, void* ptr);
|
|
51
|
+
|
|
52
|
+
// Debug wrapper for printf, to make it easier to turn off debugging info when
|
|
53
|
+
// required.
|
|
54
|
+
void gumbo_debug(const char* format, ...);
|
|
55
|
+
|
|
56
|
+
#ifdef __cplusplus
|
|
57
|
+
}
|
|
58
|
+
#endif
|
|
59
|
+
|
|
60
|
+
#endif // GUMBO_UTIL_H_
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
//
|
|
15
|
+
// Author: jdtang@google.com (Jonathan Tang)
|
|
16
|
+
|
|
17
|
+
#include "vector.h"
|
|
18
|
+
|
|
19
|
+
#include <assert.h>
|
|
20
|
+
#include <stdlib.h>
|
|
21
|
+
#include <string.h>
|
|
22
|
+
#include <strings.h>
|
|
23
|
+
|
|
24
|
+
#include "util.h"
|
|
25
|
+
|
|
26
|
+
struct GumboInternalParser;
|
|
27
|
+
|
|
28
|
+
const GumboVector kGumboEmptyVector = {NULL, 0, 0};
|
|
29
|
+
|
|
30
|
+
void gumbo_vector_init(struct GumboInternalParser* parser,
|
|
31
|
+
size_t initial_capacity, GumboVector* vector) {
|
|
32
|
+
vector->length = 0;
|
|
33
|
+
vector->capacity = initial_capacity;
|
|
34
|
+
if (initial_capacity > 0) {
|
|
35
|
+
vector->data =
|
|
36
|
+
gumbo_parser_allocate(parser, sizeof(void*) * initial_capacity);
|
|
37
|
+
} else {
|
|
38
|
+
vector->data = NULL;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
void gumbo_vector_destroy(
|
|
43
|
+
struct GumboInternalParser* parser, GumboVector* vector) {
|
|
44
|
+
if (vector->capacity > 0) {
|
|
45
|
+
gumbo_parser_deallocate(parser, vector->data);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
static void enlarge_vector_if_full(
|
|
50
|
+
struct GumboInternalParser* parser, GumboVector* vector) {
|
|
51
|
+
if (vector->length >= vector->capacity) {
|
|
52
|
+
if (vector->capacity) {
|
|
53
|
+
size_t old_num_bytes = sizeof(void*) * vector->capacity;
|
|
54
|
+
vector->capacity *= 2;
|
|
55
|
+
size_t num_bytes = sizeof(void*) * vector->capacity;
|
|
56
|
+
void** temp = gumbo_parser_allocate(parser, num_bytes);
|
|
57
|
+
memcpy(temp, vector->data, old_num_bytes);
|
|
58
|
+
gumbo_parser_deallocate(parser, vector->data);
|
|
59
|
+
vector->data = temp;
|
|
60
|
+
} else {
|
|
61
|
+
// 0-capacity vector; no previous array to deallocate.
|
|
62
|
+
vector->capacity = 2;
|
|
63
|
+
vector->data =
|
|
64
|
+
gumbo_parser_allocate(parser, sizeof(void*) * vector->capacity);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
void gumbo_vector_add(
|
|
70
|
+
struct GumboInternalParser* parser, void* element, GumboVector* vector) {
|
|
71
|
+
enlarge_vector_if_full(parser, vector);
|
|
72
|
+
assert(vector->data);
|
|
73
|
+
assert(vector->length < vector->capacity);
|
|
74
|
+
vector->data[vector->length++] = element;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void* gumbo_vector_pop(
|
|
78
|
+
struct GumboInternalParser* parser, GumboVector* vector) {
|
|
79
|
+
if (vector->length == 0) {
|
|
80
|
+
return NULL;
|
|
81
|
+
}
|
|
82
|
+
return vector->data[--vector->length];
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
int gumbo_vector_index_of(GumboVector* vector, const void* element) {
|
|
86
|
+
for (unsigned int i = 0; i < vector->length; ++i) {
|
|
87
|
+
if (vector->data[i] == element) {
|
|
88
|
+
return i;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return -1;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
|
|
95
|
+
unsigned int index, GumboVector* vector) {
|
|
96
|
+
assert(index >= 0);
|
|
97
|
+
assert(index <= vector->length);
|
|
98
|
+
enlarge_vector_if_full(parser, vector);
|
|
99
|
+
++vector->length;
|
|
100
|
+
memmove(&vector->data[index + 1], &vector->data[index],
|
|
101
|
+
sizeof(void*) * (vector->length - index - 1));
|
|
102
|
+
vector->data[index] = element;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
void gumbo_vector_remove(
|
|
106
|
+
struct GumboInternalParser* parser, void* node, GumboVector* vector) {
|
|
107
|
+
int index = gumbo_vector_index_of(vector, node);
|
|
108
|
+
if (index == -1) {
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
gumbo_vector_remove_at(parser, index, vector);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
|
|
115
|
+
unsigned int index, GumboVector* vector) {
|
|
116
|
+
assert(index >= 0);
|
|
117
|
+
assert(index < vector->length);
|
|
118
|
+
void* result = vector->data[index];
|
|
119
|
+
memmove(&vector->data[index], &vector->data[index + 1],
|
|
120
|
+
sizeof(void*) * (vector->length - index - 1));
|
|
121
|
+
--vector->length;
|
|
122
|
+
return result;
|
|
123
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
//
|
|
15
|
+
// Author: jdtang@google.com (Jonathan Tang)
|
|
16
|
+
|
|
17
|
+
#ifndef GUMBO_VECTOR_H_
|
|
18
|
+
#define GUMBO_VECTOR_H_
|
|
19
|
+
|
|
20
|
+
#include "gumbo.h"
|
|
21
|
+
|
|
22
|
+
#ifdef __cplusplus
|
|
23
|
+
extern "C" {
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
// Forward declaration since it's passed into some of the functions in this
|
|
27
|
+
// header.
|
|
28
|
+
struct GumboInternalParser;
|
|
29
|
+
|
|
30
|
+
// Initializes a new GumboVector with the specified initial capacity.
|
|
31
|
+
void gumbo_vector_init(struct GumboInternalParser* parser,
|
|
32
|
+
size_t initial_capacity, GumboVector* vector);
|
|
33
|
+
|
|
34
|
+
// Frees the memory used by an GumboVector. Does not free the contained
|
|
35
|
+
// pointers.
|
|
36
|
+
void gumbo_vector_destroy(
|
|
37
|
+
struct GumboInternalParser* parser, GumboVector* vector);
|
|
38
|
+
|
|
39
|
+
// Adds a new element to an GumboVector.
|
|
40
|
+
void gumbo_vector_add(
|
|
41
|
+
struct GumboInternalParser* parser, void* element, GumboVector* vector);
|
|
42
|
+
|
|
43
|
+
// Removes and returns the element most recently added to the GumboVector.
|
|
44
|
+
// Ownership is transferred to caller. Capacity is unchanged. If the vector is
|
|
45
|
+
// empty, NULL is returned.
|
|
46
|
+
void* gumbo_vector_pop(struct GumboInternalParser* parser, GumboVector* vector);
|
|
47
|
+
|
|
48
|
+
// Inserts an element at a specific index. This is potentially O(N) time, but
|
|
49
|
+
// is necessary for some of the spec's behavior.
|
|
50
|
+
void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
|
|
51
|
+
unsigned int index, GumboVector* vector);
|
|
52
|
+
|
|
53
|
+
// Removes an element from the vector, or does nothing if the element is not in
|
|
54
|
+
// the vector.
|
|
55
|
+
void gumbo_vector_remove(
|
|
56
|
+
struct GumboInternalParser* parser, void* element, GumboVector* vector);
|
|
57
|
+
|
|
58
|
+
// Removes and returns an element at a specific index. Note that this is
|
|
59
|
+
// potentially O(N) time and should be used sparingly.
|
|
60
|
+
void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
|
|
61
|
+
unsigned int index, GumboVector* vector);
|
|
62
|
+
|
|
63
|
+
#ifdef __cplusplus
|
|
64
|
+
}
|
|
65
|
+
#endif
|
|
66
|
+
|
|
67
|
+
#endif // GUMBO_VECTOR_H_
|
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
#include "html_document.h"
|
|
2
|
+
#include "html_element.h"
|
|
3
|
+
|
|
4
|
+
#include <cstring>
|
|
5
|
+
#include <string>
|
|
6
|
+
|
|
7
|
+
namespace html {
|
|
8
|
+
|
|
9
|
+
Napi::FunctionReference Document::constructor;
|
|
10
|
+
|
|
11
|
+
namespace {
|
|
12
|
+
|
|
13
|
+
bool GetStringArg(const Napi::CallbackInfo& info, size_t index,
|
|
14
|
+
const char *name, std::string *value) {
|
|
15
|
+
Napi::Env env = info.Env();
|
|
16
|
+
if (info.Length() <= index || !info[index].IsString()) {
|
|
17
|
+
Napi::TypeError::New(env, std::string(name) + " must be a string")
|
|
18
|
+
.ThrowAsJavaScriptException();
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
*value = info[index].As<Napi::String>().Utf8Value();
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
bool HasDocument(const Napi::CallbackInfo& info, Document *doc) {
|
|
26
|
+
if (doc->xdoc_wrapper_ && doc->xdoc_wrapper_->xdoc) {
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
Napi::Error::New(info.Env(), "Invalid document").ThrowAsJavaScriptException();
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
class QueryHandle {
|
|
34
|
+
public:
|
|
35
|
+
explicit QueryHandle(const char *selector)
|
|
36
|
+
: query_(selector ? xnode_query_create(selector, NULL, NULL) : nullptr) {}
|
|
37
|
+
|
|
38
|
+
~QueryHandle() {
|
|
39
|
+
if (query_) {
|
|
40
|
+
xnode_query_free(query_);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
XNodeQuery *get() const { return query_; }
|
|
45
|
+
|
|
46
|
+
private:
|
|
47
|
+
XNodeQuery *query_;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
} // namespace
|
|
51
|
+
|
|
52
|
+
Document::Document(const Napi::CallbackInfo& info)
|
|
53
|
+
: Napi::ObjectWrap<Document>(info), xdoc_wrapper_(nullptr) {
|
|
54
|
+
|
|
55
|
+
std::string html;
|
|
56
|
+
if (!GetStringArg(info, 0, "html", &html)) {
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
XNodeDocument *xdoc = xnode_parse_html(html.c_str(), html.size());
|
|
60
|
+
xdoc_wrapper_ = new XDocWrapper(xdoc);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
Document::~Document() {
|
|
64
|
+
if (xdoc_wrapper_ && (--xdoc_wrapper_->ref_count == 0)) {
|
|
65
|
+
delete xdoc_wrapper_;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
Napi::Value Document::GetDocumentElement(const Napi::CallbackInfo& info) {
|
|
70
|
+
Napi::Env env = info.Env();
|
|
71
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
72
|
+
if (!HasDocument(info, xdoc)) {
|
|
73
|
+
return env.Undefined();
|
|
74
|
+
}
|
|
75
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
76
|
+
return Element::Create(env, xdoc->xdoc_wrapper_, root);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
void Document::Init(Napi::Env env) {
|
|
80
|
+
Napi::Function func = DefineClass(env, "Document", {
|
|
81
|
+
InstanceMethod("find", &Document::Find),
|
|
82
|
+
InstanceMethod("first", &Document::First),
|
|
83
|
+
InstanceMethod("first_s", &Document::FirstSafe),
|
|
84
|
+
InstanceMethod("only", &Document::Only),
|
|
85
|
+
InstanceMethod("only_s", &Document::OnlySafe),
|
|
86
|
+
InstanceAccessor("documentElement", &Document::GetDocumentElement, nullptr),
|
|
87
|
+
|
|
88
|
+
InstanceMethod("firstOrThrow", &Document::FirstOrThrow),
|
|
89
|
+
InstanceMethod("onlyOrThrow", &Document::OnlyOrThrow),
|
|
90
|
+
InstanceMethod("text", &Document::Text),
|
|
91
|
+
InstanceMethod("textOrThrow", &Document::TextOrThrow),
|
|
92
|
+
InstanceMethod("exists", &Document::Exists),
|
|
93
|
+
InstanceMethod("count", &Document::Count),
|
|
94
|
+
InstanceMethod("attr", &Document::Attr2),
|
|
95
|
+
InstanceMethod("attrOrThrow", &Document::AttrOrThrow2),
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
constructor = Napi::Persistent(func);
|
|
99
|
+
constructor.SuppressDestruct();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
Napi::Value Document::Find(const Napi::CallbackInfo& info) {
|
|
103
|
+
Napi::Env env = info.Env();
|
|
104
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
105
|
+
if (!HasDocument(info, xdoc)) {
|
|
106
|
+
return env.Undefined();
|
|
107
|
+
}
|
|
108
|
+
std::string selector;
|
|
109
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
110
|
+
return env.Undefined();
|
|
111
|
+
}
|
|
112
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
113
|
+
return Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
Napi::Value Document::First(const Napi::CallbackInfo& info) {
|
|
117
|
+
Napi::Env env = info.Env();
|
|
118
|
+
Napi::HandleScope scope(env);
|
|
119
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
120
|
+
if (!HasDocument(info, xdoc)) {
|
|
121
|
+
return env.Undefined();
|
|
122
|
+
}
|
|
123
|
+
std::string selector;
|
|
124
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
125
|
+
return env.Undefined();
|
|
126
|
+
}
|
|
127
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
128
|
+
Napi::Value value = Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
|
|
129
|
+
if (!value.IsArray()) {
|
|
130
|
+
return env.Undefined();
|
|
131
|
+
}
|
|
132
|
+
Napi::Array array = value.As<Napi::Array>();
|
|
133
|
+
if (array.Length() > 0) {
|
|
134
|
+
return array.Get(uint32_t(0));
|
|
135
|
+
}
|
|
136
|
+
return env.Null();
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
Napi::Value Document::FirstSafe(const Napi::CallbackInfo& info) {
|
|
140
|
+
Napi::Env env = info.Env();
|
|
141
|
+
Napi::HandleScope scope(env);
|
|
142
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
143
|
+
if (!HasDocument(info, xdoc)) {
|
|
144
|
+
return env.Undefined();
|
|
145
|
+
}
|
|
146
|
+
std::string selector;
|
|
147
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
148
|
+
return env.Undefined();
|
|
149
|
+
}
|
|
150
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
151
|
+
Napi::Value value = Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
|
|
152
|
+
if (!value.IsArray()) {
|
|
153
|
+
return env.Undefined();
|
|
154
|
+
}
|
|
155
|
+
Napi::Array array = value.As<Napi::Array>();
|
|
156
|
+
if (array.Length() > 0) {
|
|
157
|
+
return array.Get(uint32_t(0));
|
|
158
|
+
}
|
|
159
|
+
Napi::Error::New(env, "No element found").ThrowAsJavaScriptException();
|
|
160
|
+
return env.Undefined();
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
Napi::Value Document::Only(const Napi::CallbackInfo& info) {
|
|
164
|
+
Napi::Env env = info.Env();
|
|
165
|
+
Napi::HandleScope scope(env);
|
|
166
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
167
|
+
if (!HasDocument(info, xdoc)) {
|
|
168
|
+
return env.Undefined();
|
|
169
|
+
}
|
|
170
|
+
std::string selector;
|
|
171
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
172
|
+
return env.Undefined();
|
|
173
|
+
}
|
|
174
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
175
|
+
Napi::Value value = Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
|
|
176
|
+
if (!value.IsArray()) {
|
|
177
|
+
return env.Undefined();
|
|
178
|
+
}
|
|
179
|
+
Napi::Array array = value.As<Napi::Array>();
|
|
180
|
+
if (array.Length() == 1) {
|
|
181
|
+
return array.Get(uint32_t(0));
|
|
182
|
+
}
|
|
183
|
+
return env.Null();
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
Napi::Value Document::OnlySafe(const Napi::CallbackInfo& info) {
|
|
187
|
+
Napi::Env env = info.Env();
|
|
188
|
+
Napi::HandleScope scope(env);
|
|
189
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
190
|
+
if (!HasDocument(info, xdoc)) {
|
|
191
|
+
return env.Undefined();
|
|
192
|
+
}
|
|
193
|
+
std::string selector;
|
|
194
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
195
|
+
return env.Undefined();
|
|
196
|
+
}
|
|
197
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
198
|
+
Napi::Value value = Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
|
|
199
|
+
if (!value.IsArray()) {
|
|
200
|
+
return env.Undefined();
|
|
201
|
+
}
|
|
202
|
+
Napi::Array array = value.As<Napi::Array>();
|
|
203
|
+
if (array.Length() == 1) {
|
|
204
|
+
return array.Get(uint32_t(0));
|
|
205
|
+
}
|
|
206
|
+
Napi::Error::New(env, "Not a single element").ThrowAsJavaScriptException();
|
|
207
|
+
return env.Undefined();
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
Napi::Value Document::FirstOrThrow(const Napi::CallbackInfo& info) {
|
|
211
|
+
return FirstSafe(info);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
Napi::Value Document::OnlyOrThrow(const Napi::CallbackInfo& info) {
|
|
215
|
+
return OnlySafe(info);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
Napi::Value Document::Text(const Napi::CallbackInfo& info) {
|
|
219
|
+
Napi::Env env = info.Env();
|
|
220
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
221
|
+
if (!HasDocument(info, xdoc)) {
|
|
222
|
+
return env.Undefined();
|
|
223
|
+
}
|
|
224
|
+
std::string selector;
|
|
225
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
226
|
+
return env.Undefined();
|
|
227
|
+
}
|
|
228
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
229
|
+
|
|
230
|
+
QueryHandle query(selector.c_str());
|
|
231
|
+
if (!query.get()) {
|
|
232
|
+
Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
|
|
233
|
+
return env.Undefined();
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const XNodeArray *result = xnode_query_execute(query.get(), root);
|
|
237
|
+
if (result && xnode_array_size(result) > 0) {
|
|
238
|
+
XNode *node = xnode_array_get(result, 0);
|
|
239
|
+
std::string text;
|
|
240
|
+
if (info.Length() > 1 && info[1].IsObject()) {
|
|
241
|
+
Napi::Object opts = info[1].As<Napi::Object>();
|
|
242
|
+
if (opts.Has("separator") && opts.Get("separator").IsString()) {
|
|
243
|
+
std::string sep = opts.Get("separator").As<Napi::String>();
|
|
244
|
+
text = join_text_with_separator(node, sep);
|
|
245
|
+
} else {
|
|
246
|
+
text = get_inner_text(node);
|
|
247
|
+
}
|
|
248
|
+
if (opts.Has("normalize") && opts.Get("normalize").As<Napi::Boolean>()) {
|
|
249
|
+
text = normalize_whitespace(text);
|
|
250
|
+
}
|
|
251
|
+
} else {
|
|
252
|
+
text = get_inner_text(node);
|
|
253
|
+
}
|
|
254
|
+
return Napi::String::New(env, text);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return env.Null();
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
Napi::Value Document::TextOrThrow(const Napi::CallbackInfo& info) {
|
|
261
|
+
Napi::Env env = info.Env();
|
|
262
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
263
|
+
if (!HasDocument(info, xdoc)) {
|
|
264
|
+
return env.Undefined();
|
|
265
|
+
}
|
|
266
|
+
std::string selector;
|
|
267
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
268
|
+
return env.Undefined();
|
|
269
|
+
}
|
|
270
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
271
|
+
|
|
272
|
+
QueryHandle query(selector.c_str());
|
|
273
|
+
if (!query.get()) {
|
|
274
|
+
Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
|
|
275
|
+
return env.Undefined();
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
const XNodeArray *result = xnode_query_execute(query.get(), root);
|
|
279
|
+
if (result && xnode_array_size(result) > 0) {
|
|
280
|
+
XNode *node = xnode_array_get(result, 0);
|
|
281
|
+
return Napi::String::New(env, get_inner_text(node));
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
Napi::Error::New(env, "No element found").ThrowAsJavaScriptException();
|
|
285
|
+
return env.Undefined();
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
Napi::Value Document::Exists(const Napi::CallbackInfo& info) {
|
|
289
|
+
Napi::Env env = info.Env();
|
|
290
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
291
|
+
if (!HasDocument(info, xdoc)) {
|
|
292
|
+
return env.Undefined();
|
|
293
|
+
}
|
|
294
|
+
std::string selector;
|
|
295
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
296
|
+
return env.Undefined();
|
|
297
|
+
}
|
|
298
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
299
|
+
|
|
300
|
+
QueryHandle query(selector.c_str());
|
|
301
|
+
if (!query.get()) {
|
|
302
|
+
Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
|
|
303
|
+
return env.Undefined();
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const XNodeArray *result = xnode_query_execute(query.get(), root);
|
|
307
|
+
return Napi::Boolean::New(env, result && xnode_array_size(result) > 0);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
Napi::Value Document::Count(const Napi::CallbackInfo& info) {
|
|
311
|
+
Napi::Env env = info.Env();
|
|
312
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
313
|
+
if (!HasDocument(info, xdoc)) {
|
|
314
|
+
return env.Undefined();
|
|
315
|
+
}
|
|
316
|
+
std::string selector;
|
|
317
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
318
|
+
return env.Undefined();
|
|
319
|
+
}
|
|
320
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
321
|
+
|
|
322
|
+
QueryHandle query(selector.c_str());
|
|
323
|
+
if (!query.get()) {
|
|
324
|
+
Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
|
|
325
|
+
return env.Undefined();
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const XNodeArray *result = xnode_query_execute(query.get(), root);
|
|
329
|
+
return Napi::Number::New(env, result ? xnode_array_size(result) : 0);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
Napi::Value Document::Attr2(const Napi::CallbackInfo& info) {
|
|
333
|
+
Napi::Env env = info.Env();
|
|
334
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
335
|
+
if (!HasDocument(info, xdoc)) {
|
|
336
|
+
return env.Undefined();
|
|
337
|
+
}
|
|
338
|
+
std::string selector;
|
|
339
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
340
|
+
return env.Undefined();
|
|
341
|
+
}
|
|
342
|
+
std::string attrName;
|
|
343
|
+
if (!GetStringArg(info, 1, "name", &attrName)) {
|
|
344
|
+
return env.Undefined();
|
|
345
|
+
}
|
|
346
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
347
|
+
|
|
348
|
+
QueryHandle query(selector.c_str());
|
|
349
|
+
if (!query.get()) {
|
|
350
|
+
Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
|
|
351
|
+
return env.Undefined();
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
const XNodeArray *result = xnode_query_execute(query.get(), root);
|
|
355
|
+
if (result && xnode_array_size(result) > 0) {
|
|
356
|
+
XNode *node = xnode_array_get(result, 0);
|
|
357
|
+
const char *value = xnode_attr(node, attrName.c_str());
|
|
358
|
+
if (value) {
|
|
359
|
+
return Napi::String::New(env, value);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
return env.Undefined();
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
Napi::Value Document::AttrOrThrow2(const Napi::CallbackInfo& info) {
|
|
367
|
+
Napi::Env env = info.Env();
|
|
368
|
+
Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
|
|
369
|
+
if (!HasDocument(info, xdoc)) {
|
|
370
|
+
return env.Undefined();
|
|
371
|
+
}
|
|
372
|
+
std::string selector;
|
|
373
|
+
if (!GetStringArg(info, 0, "selector", &selector)) {
|
|
374
|
+
return env.Undefined();
|
|
375
|
+
}
|
|
376
|
+
std::string attrName;
|
|
377
|
+
if (!GetStringArg(info, 1, "name", &attrName)) {
|
|
378
|
+
return env.Undefined();
|
|
379
|
+
}
|
|
380
|
+
XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
|
|
381
|
+
|
|
382
|
+
QueryHandle query(selector.c_str());
|
|
383
|
+
if (!query.get()) {
|
|
384
|
+
Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
|
|
385
|
+
return env.Undefined();
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
const XNodeArray *result = xnode_query_execute(query.get(), root);
|
|
389
|
+
if (result && xnode_array_size(result) > 0) {
|
|
390
|
+
XNode *node = xnode_array_get(result, 0);
|
|
391
|
+
const char *value = xnode_attr(node, attrName.c_str());
|
|
392
|
+
if (value) {
|
|
393
|
+
return Napi::String::New(env, value);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
Napi::Error::New(env, "Attribute not found").ThrowAsJavaScriptException();
|
|
398
|
+
return env.Undefined();
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
Napi::Value Document::Parse(const Napi::CallbackInfo& info) {
|
|
402
|
+
Napi::Env env = info.Env();
|
|
403
|
+
if (info.Length() == 0 || !info[0].IsString()) {
|
|
404
|
+
Napi::TypeError::New(env, "html must be a string")
|
|
405
|
+
.ThrowAsJavaScriptException();
|
|
406
|
+
return env.Undefined();
|
|
407
|
+
}
|
|
408
|
+
return Document::constructor.New({ info[0] });
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
} // namespace html
|