gumbo-html 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +27 -10
  2. package/binding.gyp +49 -0
  3. package/examples/example.js +87 -0
  4. package/examples/scrape.js +301 -0
  5. package/index.d.ts +58 -3
  6. package/index.js +7 -2
  7. package/lib/wrapper.js +385 -0
  8. package/package.json +36 -5
  9. package/src/addon.cc +19 -0
  10. package/src/gumbo-parser/COPYING +201 -0
  11. package/src/gumbo-parser/README.md +8 -0
  12. package/src/gumbo-parser/src/attribute.c +44 -0
  13. package/src/gumbo-parser/src/attribute.h +37 -0
  14. package/src/gumbo-parser/src/char_ref.c +23069 -0
  15. package/src/gumbo-parser/src/char_ref.h +60 -0
  16. package/src/gumbo-parser/src/error.c +279 -0
  17. package/src/gumbo-parser/src/error.h +225 -0
  18. package/src/gumbo-parser/src/gumbo.h +671 -0
  19. package/src/gumbo-parser/src/insertion_mode.h +57 -0
  20. package/src/gumbo-parser/src/parser.c +4192 -0
  21. package/src/gumbo-parser/src/parser.h +57 -0
  22. package/src/gumbo-parser/src/string_buffer.c +110 -0
  23. package/src/gumbo-parser/src/string_buffer.h +84 -0
  24. package/src/gumbo-parser/src/string_piece.c +48 -0
  25. package/src/gumbo-parser/src/string_piece.h +38 -0
  26. package/src/gumbo-parser/src/tag.c +95 -0
  27. package/src/gumbo-parser/src/tag_enum.h +153 -0
  28. package/src/gumbo-parser/src/tag_gperf.h +105 -0
  29. package/src/gumbo-parser/src/tag_sizes.h +4 -0
  30. package/src/gumbo-parser/src/tag_strings.h +153 -0
  31. package/src/gumbo-parser/src/token_type.h +41 -0
  32. package/src/gumbo-parser/src/tokenizer.c +2897 -0
  33. package/src/gumbo-parser/src/tokenizer.h +123 -0
  34. package/src/gumbo-parser/src/tokenizer_states.h +103 -0
  35. package/src/gumbo-parser/src/utf8.c +270 -0
  36. package/src/gumbo-parser/src/utf8.h +132 -0
  37. package/src/gumbo-parser/src/util.c +58 -0
  38. package/src/gumbo-parser/src/util.h +60 -0
  39. package/src/gumbo-parser/src/vector.c +123 -0
  40. package/src/gumbo-parser/src/vector.h +67 -0
  41. package/src/html_document.cc +411 -0
  42. package/src/html_document.h +56 -0
  43. package/src/html_element.cc +963 -0
  44. package/src/html_element.h +70 -0
  45. package/src/include/win/strings.h +11 -0
  46. package/src/jsa.c +182 -0
  47. package/src/jsa.h +44 -0
  48. package/src/xnode.c +372 -0
  49. package/src/xnode_query.c +330 -0
  50. package/src/xnode_query.h +186 -0
  51. package/src/xnode_query_parser.c +414 -0
  52. package/install.js +0 -15
@@ -0,0 +1,60 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Author: jdtang@google.com (Jonathan Tang)
16
+ //
17
+ // This contains some utility functions that didn't fit into any of the other
18
+ // headers.
19
+
20
+ #ifndef GUMBO_UTIL_H_
21
+ #define GUMBO_UTIL_H_
22
+ #ifdef _MSC_VER
23
+ #define _CRT_SECURE_NO_WARNINGS
24
+ #endif
25
+ #include <stdbool.h>
26
+ #include <stddef.h>
27
+
28
+ #ifdef __cplusplus
29
+ extern "C" {
30
+ #endif
31
+
32
+ // Forward declaration since it's passed into some of the functions in this
33
+ // header.
34
+ struct GumboInternalParser;
35
+
36
+ // Utility function for allocating & copying a null-terminated string into a
37
+ // freshly-allocated buffer. This is necessary for proper memory management; we
38
+ // have the convention that all const char* in parse tree structures are
39
+ // freshly-allocated, so if we didn't copy, we'd try to delete a literal string
40
+ // when the parse tree is destroyed.
41
+ char* gumbo_copy_stringz(struct GumboInternalParser* parser, const char* str);
42
+
43
+ // Allocate a chunk of memory, using the allocator specified in the Parser's
44
+ // config options.
45
+ void* gumbo_parser_allocate(
46
+ struct GumboInternalParser* parser, size_t num_bytes);
47
+
48
+ // Deallocate a chunk of memory, using the deallocator specified in the Parser's
49
+ // config options.
50
+ void gumbo_parser_deallocate(struct GumboInternalParser* parser, void* ptr);
51
+
52
+ // Debug wrapper for printf, to make it easier to turn off debugging info when
53
+ // required.
54
+ void gumbo_debug(const char* format, ...);
55
+
56
+ #ifdef __cplusplus
57
+ }
58
+ #endif
59
+
60
+ #endif // GUMBO_UTIL_H_
@@ -0,0 +1,123 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Author: jdtang@google.com (Jonathan Tang)
16
+
17
+ #include "vector.h"
18
+
19
+ #include <assert.h>
20
+ #include <stdlib.h>
21
+ #include <string.h>
22
+ #include <strings.h>
23
+
24
+ #include "util.h"
25
+
26
+ struct GumboInternalParser;
27
+
28
+ const GumboVector kGumboEmptyVector = {NULL, 0, 0};
29
+
30
+ void gumbo_vector_init(struct GumboInternalParser* parser,
31
+ size_t initial_capacity, GumboVector* vector) {
32
+ vector->length = 0;
33
+ vector->capacity = initial_capacity;
34
+ if (initial_capacity > 0) {
35
+ vector->data =
36
+ gumbo_parser_allocate(parser, sizeof(void*) * initial_capacity);
37
+ } else {
38
+ vector->data = NULL;
39
+ }
40
+ }
41
+
42
+ void gumbo_vector_destroy(
43
+ struct GumboInternalParser* parser, GumboVector* vector) {
44
+ if (vector->capacity > 0) {
45
+ gumbo_parser_deallocate(parser, vector->data);
46
+ }
47
+ }
48
+
49
+ static void enlarge_vector_if_full(
50
+ struct GumboInternalParser* parser, GumboVector* vector) {
51
+ if (vector->length >= vector->capacity) {
52
+ if (vector->capacity) {
53
+ size_t old_num_bytes = sizeof(void*) * vector->capacity;
54
+ vector->capacity *= 2;
55
+ size_t num_bytes = sizeof(void*) * vector->capacity;
56
+ void** temp = gumbo_parser_allocate(parser, num_bytes);
57
+ memcpy(temp, vector->data, old_num_bytes);
58
+ gumbo_parser_deallocate(parser, vector->data);
59
+ vector->data = temp;
60
+ } else {
61
+ // 0-capacity vector; no previous array to deallocate.
62
+ vector->capacity = 2;
63
+ vector->data =
64
+ gumbo_parser_allocate(parser, sizeof(void*) * vector->capacity);
65
+ }
66
+ }
67
+ }
68
+
69
+ void gumbo_vector_add(
70
+ struct GumboInternalParser* parser, void* element, GumboVector* vector) {
71
+ enlarge_vector_if_full(parser, vector);
72
+ assert(vector->data);
73
+ assert(vector->length < vector->capacity);
74
+ vector->data[vector->length++] = element;
75
+ }
76
+
77
+ void* gumbo_vector_pop(
78
+ struct GumboInternalParser* parser, GumboVector* vector) {
79
+ if (vector->length == 0) {
80
+ return NULL;
81
+ }
82
+ return vector->data[--vector->length];
83
+ }
84
+
85
+ int gumbo_vector_index_of(GumboVector* vector, const void* element) {
86
+ for (unsigned int i = 0; i < vector->length; ++i) {
87
+ if (vector->data[i] == element) {
88
+ return i;
89
+ }
90
+ }
91
+ return -1;
92
+ }
93
+
94
+ void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
95
+ unsigned int index, GumboVector* vector) {
96
+ assert(index >= 0);
97
+ assert(index <= vector->length);
98
+ enlarge_vector_if_full(parser, vector);
99
+ ++vector->length;
100
+ memmove(&vector->data[index + 1], &vector->data[index],
101
+ sizeof(void*) * (vector->length - index - 1));
102
+ vector->data[index] = element;
103
+ }
104
+
105
+ void gumbo_vector_remove(
106
+ struct GumboInternalParser* parser, void* node, GumboVector* vector) {
107
+ int index = gumbo_vector_index_of(vector, node);
108
+ if (index == -1) {
109
+ return;
110
+ }
111
+ gumbo_vector_remove_at(parser, index, vector);
112
+ }
113
+
114
+ void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
115
+ unsigned int index, GumboVector* vector) {
116
+ assert(index >= 0);
117
+ assert(index < vector->length);
118
+ void* result = vector->data[index];
119
+ memmove(&vector->data[index], &vector->data[index + 1],
120
+ sizeof(void*) * (vector->length - index - 1));
121
+ --vector->length;
122
+ return result;
123
+ }
@@ -0,0 +1,67 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Author: jdtang@google.com (Jonathan Tang)
16
+
17
+ #ifndef GUMBO_VECTOR_H_
18
+ #define GUMBO_VECTOR_H_
19
+
20
+ #include "gumbo.h"
21
+
22
+ #ifdef __cplusplus
23
+ extern "C" {
24
+ #endif
25
+
26
+ // Forward declaration since it's passed into some of the functions in this
27
+ // header.
28
+ struct GumboInternalParser;
29
+
30
+ // Initializes a new GumboVector with the specified initial capacity.
31
+ void gumbo_vector_init(struct GumboInternalParser* parser,
32
+ size_t initial_capacity, GumboVector* vector);
33
+
34
+ // Frees the memory used by an GumboVector. Does not free the contained
35
+ // pointers.
36
+ void gumbo_vector_destroy(
37
+ struct GumboInternalParser* parser, GumboVector* vector);
38
+
39
+ // Adds a new element to an GumboVector.
40
+ void gumbo_vector_add(
41
+ struct GumboInternalParser* parser, void* element, GumboVector* vector);
42
+
43
+ // Removes and returns the element most recently added to the GumboVector.
44
+ // Ownership is transferred to caller. Capacity is unchanged. If the vector is
45
+ // empty, NULL is returned.
46
+ void* gumbo_vector_pop(struct GumboInternalParser* parser, GumboVector* vector);
47
+
48
+ // Inserts an element at a specific index. This is potentially O(N) time, but
49
+ // is necessary for some of the spec's behavior.
50
+ void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
51
+ unsigned int index, GumboVector* vector);
52
+
53
+ // Removes an element from the vector, or does nothing if the element is not in
54
+ // the vector.
55
+ void gumbo_vector_remove(
56
+ struct GumboInternalParser* parser, void* element, GumboVector* vector);
57
+
58
+ // Removes and returns an element at a specific index. Note that this is
59
+ // potentially O(N) time and should be used sparingly.
60
+ void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
61
+ unsigned int index, GumboVector* vector);
62
+
63
+ #ifdef __cplusplus
64
+ }
65
+ #endif
66
+
67
+ #endif // GUMBO_VECTOR_H_
@@ -0,0 +1,411 @@
1
+ #include "html_document.h"
2
+ #include "html_element.h"
3
+
4
+ #include <cstring>
5
+ #include <string>
6
+
7
+ namespace html {
8
+
9
+ Napi::FunctionReference Document::constructor;
10
+
11
+ namespace {
12
+
13
+ bool GetStringArg(const Napi::CallbackInfo& info, size_t index,
14
+ const char *name, std::string *value) {
15
+ Napi::Env env = info.Env();
16
+ if (info.Length() <= index || !info[index].IsString()) {
17
+ Napi::TypeError::New(env, std::string(name) + " must be a string")
18
+ .ThrowAsJavaScriptException();
19
+ return false;
20
+ }
21
+ *value = info[index].As<Napi::String>().Utf8Value();
22
+ return true;
23
+ }
24
+
25
+ bool HasDocument(const Napi::CallbackInfo& info, Document *doc) {
26
+ if (doc->xdoc_wrapper_ && doc->xdoc_wrapper_->xdoc) {
27
+ return true;
28
+ }
29
+ Napi::Error::New(info.Env(), "Invalid document").ThrowAsJavaScriptException();
30
+ return false;
31
+ }
32
+
33
+ class QueryHandle {
34
+ public:
35
+ explicit QueryHandle(const char *selector)
36
+ : query_(selector ? xnode_query_create(selector, NULL, NULL) : nullptr) {}
37
+
38
+ ~QueryHandle() {
39
+ if (query_) {
40
+ xnode_query_free(query_);
41
+ }
42
+ }
43
+
44
+ XNodeQuery *get() const { return query_; }
45
+
46
+ private:
47
+ XNodeQuery *query_;
48
+ };
49
+
50
+ } // namespace
51
+
52
+ Document::Document(const Napi::CallbackInfo& info)
53
+ : Napi::ObjectWrap<Document>(info), xdoc_wrapper_(nullptr) {
54
+
55
+ std::string html;
56
+ if (!GetStringArg(info, 0, "html", &html)) {
57
+ return;
58
+ }
59
+ XNodeDocument *xdoc = xnode_parse_html(html.c_str(), html.size());
60
+ xdoc_wrapper_ = new XDocWrapper(xdoc);
61
+ }
62
+
63
+ Document::~Document() {
64
+ if (xdoc_wrapper_ && (--xdoc_wrapper_->ref_count == 0)) {
65
+ delete xdoc_wrapper_;
66
+ }
67
+ }
68
+
69
+ Napi::Value Document::GetDocumentElement(const Napi::CallbackInfo& info) {
70
+ Napi::Env env = info.Env();
71
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
72
+ if (!HasDocument(info, xdoc)) {
73
+ return env.Undefined();
74
+ }
75
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
76
+ return Element::Create(env, xdoc->xdoc_wrapper_, root);
77
+ }
78
+
79
+ void Document::Init(Napi::Env env) {
80
+ Napi::Function func = DefineClass(env, "Document", {
81
+ InstanceMethod("find", &Document::Find),
82
+ InstanceMethod("first", &Document::First),
83
+ InstanceMethod("first_s", &Document::FirstSafe),
84
+ InstanceMethod("only", &Document::Only),
85
+ InstanceMethod("only_s", &Document::OnlySafe),
86
+ InstanceAccessor("documentElement", &Document::GetDocumentElement, nullptr),
87
+
88
+ InstanceMethod("firstOrThrow", &Document::FirstOrThrow),
89
+ InstanceMethod("onlyOrThrow", &Document::OnlyOrThrow),
90
+ InstanceMethod("text", &Document::Text),
91
+ InstanceMethod("textOrThrow", &Document::TextOrThrow),
92
+ InstanceMethod("exists", &Document::Exists),
93
+ InstanceMethod("count", &Document::Count),
94
+ InstanceMethod("attr", &Document::Attr2),
95
+ InstanceMethod("attrOrThrow", &Document::AttrOrThrow2),
96
+ });
97
+
98
+ constructor = Napi::Persistent(func);
99
+ constructor.SuppressDestruct();
100
+ }
101
+
102
+ Napi::Value Document::Find(const Napi::CallbackInfo& info) {
103
+ Napi::Env env = info.Env();
104
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
105
+ if (!HasDocument(info, xdoc)) {
106
+ return env.Undefined();
107
+ }
108
+ std::string selector;
109
+ if (!GetStringArg(info, 0, "selector", &selector)) {
110
+ return env.Undefined();
111
+ }
112
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
113
+ return Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
114
+ }
115
+
116
+ Napi::Value Document::First(const Napi::CallbackInfo& info) {
117
+ Napi::Env env = info.Env();
118
+ Napi::HandleScope scope(env);
119
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
120
+ if (!HasDocument(info, xdoc)) {
121
+ return env.Undefined();
122
+ }
123
+ std::string selector;
124
+ if (!GetStringArg(info, 0, "selector", &selector)) {
125
+ return env.Undefined();
126
+ }
127
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
128
+ Napi::Value value = Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
129
+ if (!value.IsArray()) {
130
+ return env.Undefined();
131
+ }
132
+ Napi::Array array = value.As<Napi::Array>();
133
+ if (array.Length() > 0) {
134
+ return array.Get(uint32_t(0));
135
+ }
136
+ return env.Null();
137
+ }
138
+
139
+ Napi::Value Document::FirstSafe(const Napi::CallbackInfo& info) {
140
+ Napi::Env env = info.Env();
141
+ Napi::HandleScope scope(env);
142
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
143
+ if (!HasDocument(info, xdoc)) {
144
+ return env.Undefined();
145
+ }
146
+ std::string selector;
147
+ if (!GetStringArg(info, 0, "selector", &selector)) {
148
+ return env.Undefined();
149
+ }
150
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
151
+ Napi::Value value = Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
152
+ if (!value.IsArray()) {
153
+ return env.Undefined();
154
+ }
155
+ Napi::Array array = value.As<Napi::Array>();
156
+ if (array.Length() > 0) {
157
+ return array.Get(uint32_t(0));
158
+ }
159
+ Napi::Error::New(env, "No element found").ThrowAsJavaScriptException();
160
+ return env.Undefined();
161
+ }
162
+
163
+ Napi::Value Document::Only(const Napi::CallbackInfo& info) {
164
+ Napi::Env env = info.Env();
165
+ Napi::HandleScope scope(env);
166
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
167
+ if (!HasDocument(info, xdoc)) {
168
+ return env.Undefined();
169
+ }
170
+ std::string selector;
171
+ if (!GetStringArg(info, 0, "selector", &selector)) {
172
+ return env.Undefined();
173
+ }
174
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
175
+ Napi::Value value = Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
176
+ if (!value.IsArray()) {
177
+ return env.Undefined();
178
+ }
179
+ Napi::Array array = value.As<Napi::Array>();
180
+ if (array.Length() == 1) {
181
+ return array.Get(uint32_t(0));
182
+ }
183
+ return env.Null();
184
+ }
185
+
186
+ Napi::Value Document::OnlySafe(const Napi::CallbackInfo& info) {
187
+ Napi::Env env = info.Env();
188
+ Napi::HandleScope scope(env);
189
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
190
+ if (!HasDocument(info, xdoc)) {
191
+ return env.Undefined();
192
+ }
193
+ std::string selector;
194
+ if (!GetStringArg(info, 0, "selector", &selector)) {
195
+ return env.Undefined();
196
+ }
197
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
198
+ Napi::Value value = Element::Query(env, xdoc->xdoc_wrapper_, root, selector.c_str());
199
+ if (!value.IsArray()) {
200
+ return env.Undefined();
201
+ }
202
+ Napi::Array array = value.As<Napi::Array>();
203
+ if (array.Length() == 1) {
204
+ return array.Get(uint32_t(0));
205
+ }
206
+ Napi::Error::New(env, "Not a single element").ThrowAsJavaScriptException();
207
+ return env.Undefined();
208
+ }
209
+
210
+ Napi::Value Document::FirstOrThrow(const Napi::CallbackInfo& info) {
211
+ return FirstSafe(info);
212
+ }
213
+
214
+ Napi::Value Document::OnlyOrThrow(const Napi::CallbackInfo& info) {
215
+ return OnlySafe(info);
216
+ }
217
+
218
+ Napi::Value Document::Text(const Napi::CallbackInfo& info) {
219
+ Napi::Env env = info.Env();
220
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
221
+ if (!HasDocument(info, xdoc)) {
222
+ return env.Undefined();
223
+ }
224
+ std::string selector;
225
+ if (!GetStringArg(info, 0, "selector", &selector)) {
226
+ return env.Undefined();
227
+ }
228
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
229
+
230
+ QueryHandle query(selector.c_str());
231
+ if (!query.get()) {
232
+ Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
233
+ return env.Undefined();
234
+ }
235
+
236
+ const XNodeArray *result = xnode_query_execute(query.get(), root);
237
+ if (result && xnode_array_size(result) > 0) {
238
+ XNode *node = xnode_array_get(result, 0);
239
+ std::string text;
240
+ if (info.Length() > 1 && info[1].IsObject()) {
241
+ Napi::Object opts = info[1].As<Napi::Object>();
242
+ if (opts.Has("separator") && opts.Get("separator").IsString()) {
243
+ std::string sep = opts.Get("separator").As<Napi::String>();
244
+ text = join_text_with_separator(node, sep);
245
+ } else {
246
+ text = get_inner_text(node);
247
+ }
248
+ if (opts.Has("normalize") && opts.Get("normalize").As<Napi::Boolean>()) {
249
+ text = normalize_whitespace(text);
250
+ }
251
+ } else {
252
+ text = get_inner_text(node);
253
+ }
254
+ return Napi::String::New(env, text);
255
+ }
256
+
257
+ return env.Null();
258
+ }
259
+
260
+ Napi::Value Document::TextOrThrow(const Napi::CallbackInfo& info) {
261
+ Napi::Env env = info.Env();
262
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
263
+ if (!HasDocument(info, xdoc)) {
264
+ return env.Undefined();
265
+ }
266
+ std::string selector;
267
+ if (!GetStringArg(info, 0, "selector", &selector)) {
268
+ return env.Undefined();
269
+ }
270
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
271
+
272
+ QueryHandle query(selector.c_str());
273
+ if (!query.get()) {
274
+ Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
275
+ return env.Undefined();
276
+ }
277
+
278
+ const XNodeArray *result = xnode_query_execute(query.get(), root);
279
+ if (result && xnode_array_size(result) > 0) {
280
+ XNode *node = xnode_array_get(result, 0);
281
+ return Napi::String::New(env, get_inner_text(node));
282
+ }
283
+
284
+ Napi::Error::New(env, "No element found").ThrowAsJavaScriptException();
285
+ return env.Undefined();
286
+ }
287
+
288
+ Napi::Value Document::Exists(const Napi::CallbackInfo& info) {
289
+ Napi::Env env = info.Env();
290
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
291
+ if (!HasDocument(info, xdoc)) {
292
+ return env.Undefined();
293
+ }
294
+ std::string selector;
295
+ if (!GetStringArg(info, 0, "selector", &selector)) {
296
+ return env.Undefined();
297
+ }
298
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
299
+
300
+ QueryHandle query(selector.c_str());
301
+ if (!query.get()) {
302
+ Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
303
+ return env.Undefined();
304
+ }
305
+
306
+ const XNodeArray *result = xnode_query_execute(query.get(), root);
307
+ return Napi::Boolean::New(env, result && xnode_array_size(result) > 0);
308
+ }
309
+
310
+ Napi::Value Document::Count(const Napi::CallbackInfo& info) {
311
+ Napi::Env env = info.Env();
312
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
313
+ if (!HasDocument(info, xdoc)) {
314
+ return env.Undefined();
315
+ }
316
+ std::string selector;
317
+ if (!GetStringArg(info, 0, "selector", &selector)) {
318
+ return env.Undefined();
319
+ }
320
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
321
+
322
+ QueryHandle query(selector.c_str());
323
+ if (!query.get()) {
324
+ Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
325
+ return env.Undefined();
326
+ }
327
+
328
+ const XNodeArray *result = xnode_query_execute(query.get(), root);
329
+ return Napi::Number::New(env, result ? xnode_array_size(result) : 0);
330
+ }
331
+
332
+ Napi::Value Document::Attr2(const Napi::CallbackInfo& info) {
333
+ Napi::Env env = info.Env();
334
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
335
+ if (!HasDocument(info, xdoc)) {
336
+ return env.Undefined();
337
+ }
338
+ std::string selector;
339
+ if (!GetStringArg(info, 0, "selector", &selector)) {
340
+ return env.Undefined();
341
+ }
342
+ std::string attrName;
343
+ if (!GetStringArg(info, 1, "name", &attrName)) {
344
+ return env.Undefined();
345
+ }
346
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
347
+
348
+ QueryHandle query(selector.c_str());
349
+ if (!query.get()) {
350
+ Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
351
+ return env.Undefined();
352
+ }
353
+
354
+ const XNodeArray *result = xnode_query_execute(query.get(), root);
355
+ if (result && xnode_array_size(result) > 0) {
356
+ XNode *node = xnode_array_get(result, 0);
357
+ const char *value = xnode_attr(node, attrName.c_str());
358
+ if (value) {
359
+ return Napi::String::New(env, value);
360
+ }
361
+ }
362
+
363
+ return env.Undefined();
364
+ }
365
+
366
+ Napi::Value Document::AttrOrThrow2(const Napi::CallbackInfo& info) {
367
+ Napi::Env env = info.Env();
368
+ Document* xdoc = Document::Unwrap(info.This().As<Napi::Object>());
369
+ if (!HasDocument(info, xdoc)) {
370
+ return env.Undefined();
371
+ }
372
+ std::string selector;
373
+ if (!GetStringArg(info, 0, "selector", &selector)) {
374
+ return env.Undefined();
375
+ }
376
+ std::string attrName;
377
+ if (!GetStringArg(info, 1, "name", &attrName)) {
378
+ return env.Undefined();
379
+ }
380
+ XNode *root = (XNode*) xnode_document_root(xdoc->xdoc_wrapper_->xdoc);
381
+
382
+ QueryHandle query(selector.c_str());
383
+ if (!query.get()) {
384
+ Napi::Error::New(env, "Bad selector.").ThrowAsJavaScriptException();
385
+ return env.Undefined();
386
+ }
387
+
388
+ const XNodeArray *result = xnode_query_execute(query.get(), root);
389
+ if (result && xnode_array_size(result) > 0) {
390
+ XNode *node = xnode_array_get(result, 0);
391
+ const char *value = xnode_attr(node, attrName.c_str());
392
+ if (value) {
393
+ return Napi::String::New(env, value);
394
+ }
395
+ }
396
+
397
+ Napi::Error::New(env, "Attribute not found").ThrowAsJavaScriptException();
398
+ return env.Undefined();
399
+ }
400
+
401
+ Napi::Value Document::Parse(const Napi::CallbackInfo& info) {
402
+ Napi::Env env = info.Env();
403
+ if (info.Length() == 0 || !info[0].IsString()) {
404
+ Napi::TypeError::New(env, "html must be a string")
405
+ .ThrowAsJavaScriptException();
406
+ return env.Undefined();
407
+ }
408
+ return Document::constructor.New({ info[0] });
409
+ }
410
+
411
+ } // namespace html