selectolax 0.3.30__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/parser.pxd ADDED
@@ -0,0 +1,581 @@
1
+
2
+ cdef extern from "myhtml/myhtml.h" nogil:
3
+ ctypedef unsigned int mystatus_t
4
+ ctypedef struct myhtml_t
5
+ ctypedef size_t myhtml_tag_id_t
6
+
7
+ ctypedef struct myhtml_tree_t:
8
+ # not completed struct
9
+ myhtml_t* myhtml
10
+ myhtml_tree_node_t* document
11
+ myhtml_tree_node_t* node_html
12
+
13
+ ctypedef struct mchar_async_t
14
+ ctypedef struct mycore_string_t:
15
+ char* data
16
+ size_t size
17
+ size_t length
18
+
19
+ mchar_async_t *mchar
20
+ size_t node_idx
21
+
22
+ ctypedef struct mycore_string_raw_t:
23
+ char* data
24
+ size_t size
25
+ size_t length
26
+
27
+ myhtml_namespace ns
28
+
29
+ ctypedef enum myhtml_options:
30
+ MyHTML_OPTIONS_DEFAULT = 0x00
31
+ MyHTML_OPTIONS_PARSE_MODE_SINGLE = 0x01
32
+ MyHTML_OPTIONS_PARSE_MODE_ALL_IN_ONE = 0x02
33
+ MyHTML_OPTIONS_PARSE_MODE_SEPARATELY = 0x04
34
+
35
+ ctypedef struct myhtml_collection_t:
36
+ myhtml_tree_node_t **list;
37
+ size_t size;
38
+ size_t length;
39
+
40
+ ctypedef struct myhtml_tree_node_t:
41
+ myhtml_tree_node_flags flags
42
+
43
+ myhtml_tag_id_t tag_id
44
+ myhtml_namespace ns
45
+
46
+ myhtml_tree_node_t* prev
47
+ myhtml_tree_node_t* next
48
+ myhtml_tree_node_t* child
49
+ myhtml_tree_node_t* parent
50
+
51
+ myhtml_tree_node_t* last_child
52
+
53
+ myhtml_token_node_t* token
54
+ void* data
55
+
56
+ myhtml_tree_t* tree
57
+
58
+ ctypedef enum myhtml_namespace:
59
+ MyHTML_NAMESPACE_UNDEF = 0x00
60
+ MyHTML_NAMESPACE_HTML = 0x01
61
+ MyHTML_NAMESPACE_MATHML = 0x02
62
+ MyHTML_NAMESPACE_SVG = 0x03
63
+ MyHTML_NAMESPACE_XLINK = 0x04
64
+ MyHTML_NAMESPACE_XML = 0x05
65
+ MyHTML_NAMESPACE_XMLNS = 0x06
66
+ MyHTML_NAMESPACE_ANY = 0x07
67
+ MyHTML_NAMESPACE_LAST_ENTRY = 0x07
68
+
69
+ ctypedef enum myhtml_tree_node_flags:
70
+ MyHTML_TREE_NODE_UNDEF = 0
71
+ MyHTML_TREE_NODE_PARSER_INSERTED = 1
72
+ MyHTML_TREE_NODE_BLOCKING = 2
73
+
74
+ ctypedef enum myhtml_token_type:
75
+ MyHTML_TOKEN_TYPE_OPEN = 0x000
76
+ MyHTML_TOKEN_TYPE_CLOSE = 0x001
77
+ MyHTML_TOKEN_TYPE_CLOSE_SELF = 0x002
78
+ MyHTML_TOKEN_TYPE_DONE = 0x004
79
+ MyHTML_TOKEN_TYPE_WHITESPACE = 0x008
80
+ MyHTML_TOKEN_TYPE_RCDATA = 0x010
81
+ MyHTML_TOKEN_TYPE_RAWTEXT = 0x020
82
+ MyHTML_TOKEN_TYPE_SCRIPT = 0x040
83
+ MyHTML_TOKEN_TYPE_PLAINTEXT = 0x080
84
+ MyHTML_TOKEN_TYPE_CDATA = 0x100
85
+ MyHTML_TOKEN_TYPE_DATA = 0x200
86
+ MyHTML_TOKEN_TYPE_COMMENT = 0x400
87
+ MyHTML_TOKEN_TYPE_NULL = 0x800
88
+
89
+
90
+ ctypedef enum myhtml_tags:
91
+ MyHTML_TAG__UNDEF = 0x000
92
+ MyHTML_TAG__TEXT = 0x001
93
+ MyHTML_TAG__COMMENT = 0x002
94
+ MyHTML_TAG__DOCTYPE = 0x003
95
+ MyHTML_TAG_A = 0x004
96
+ MyHTML_TAG_ABBR = 0x005
97
+ MyHTML_TAG_ACRONYM = 0x006
98
+ MyHTML_TAG_ADDRESS = 0x007
99
+ MyHTML_TAG_ANNOTATION_XML = 0x008
100
+ MyHTML_TAG_APPLET = 0x009
101
+ MyHTML_TAG_AREA = 0x00a
102
+ MyHTML_TAG_ARTICLE = 0x00b
103
+ MyHTML_TAG_ASIDE = 0x00c
104
+ MyHTML_TAG_AUDIO = 0x00d
105
+ MyHTML_TAG_B = 0x00e
106
+ MyHTML_TAG_BASE = 0x00f
107
+ MyHTML_TAG_BASEFONT = 0x010
108
+ MyHTML_TAG_BDI = 0x011
109
+ MyHTML_TAG_BDO = 0x012
110
+ MyHTML_TAG_BGSOUND = 0x013
111
+ MyHTML_TAG_BIG = 0x014
112
+ MyHTML_TAG_BLINK = 0x015
113
+ MyHTML_TAG_BLOCKQUOTE = 0x016
114
+ MyHTML_TAG_BODY = 0x017
115
+ MyHTML_TAG_BR = 0x018
116
+ MyHTML_TAG_BUTTON = 0x019
117
+ MyHTML_TAG_CANVAS = 0x01a
118
+ MyHTML_TAG_CAPTION = 0x01b
119
+ MyHTML_TAG_CENTER = 0x01c
120
+ MyHTML_TAG_CITE = 0x01d
121
+ MyHTML_TAG_CODE = 0x01e
122
+ MyHTML_TAG_COL = 0x01f
123
+ MyHTML_TAG_COLGROUP = 0x020
124
+ MyHTML_TAG_COMMAND = 0x021
125
+ MyHTML_TAG_COMMENT = 0x022
126
+ MyHTML_TAG_DATALIST = 0x023
127
+ MyHTML_TAG_DD = 0x024
128
+ MyHTML_TAG_DEL = 0x025
129
+ MyHTML_TAG_DETAILS = 0x026
130
+ MyHTML_TAG_DFN = 0x027
131
+ MyHTML_TAG_DIALOG = 0x028
132
+ MyHTML_TAG_DIR = 0x029
133
+ MyHTML_TAG_DIV = 0x02a
134
+ MyHTML_TAG_DL = 0x02b
135
+ MyHTML_TAG_DT = 0x02c
136
+ MyHTML_TAG_EM = 0x02d
137
+ MyHTML_TAG_EMBED = 0x02e
138
+ MyHTML_TAG_FIELDSET = 0x02f
139
+ MyHTML_TAG_FIGCAPTION = 0x030
140
+ MyHTML_TAG_FIGURE = 0x031
141
+ MyHTML_TAG_FONT = 0x032
142
+ MyHTML_TAG_FOOTER = 0x033
143
+ MyHTML_TAG_FORM = 0x034
144
+ MyHTML_TAG_FRAME = 0x035
145
+ MyHTML_TAG_FRAMESET = 0x036
146
+ MyHTML_TAG_H1 = 0x037
147
+ MyHTML_TAG_H2 = 0x038
148
+ MyHTML_TAG_H3 = 0x039
149
+ MyHTML_TAG_H4 = 0x03a
150
+ MyHTML_TAG_H5 = 0x03b
151
+ MyHTML_TAG_H6 = 0x03c
152
+ MyHTML_TAG_HEAD = 0x03d
153
+ MyHTML_TAG_HEADER = 0x03e
154
+ MyHTML_TAG_HGROUP = 0x03f
155
+ MyHTML_TAG_HR = 0x040
156
+ MyHTML_TAG_HTML = 0x041
157
+ MyHTML_TAG_I = 0x042
158
+ MyHTML_TAG_IFRAME = 0x043
159
+ MyHTML_TAG_IMAGE = 0x044
160
+ MyHTML_TAG_IMG = 0x045
161
+ MyHTML_TAG_INPUT = 0x046
162
+ MyHTML_TAG_INS = 0x047
163
+ MyHTML_TAG_ISINDEX = 0x048
164
+ MyHTML_TAG_KBD = 0x049
165
+ MyHTML_TAG_KEYGEN = 0x04a
166
+ MyHTML_TAG_LABEL = 0x04b
167
+ MyHTML_TAG_LEGEND = 0x04c
168
+ MyHTML_TAG_LI = 0x04d
169
+ MyHTML_TAG_LINK = 0x04e
170
+ MyHTML_TAG_LISTING = 0x04f
171
+ MyHTML_TAG_MAIN = 0x050
172
+ MyHTML_TAG_MAP = 0x051
173
+ MyHTML_TAG_MARK = 0x052
174
+ MyHTML_TAG_MARQUEE = 0x053
175
+ MyHTML_TAG_MENU = 0x054
176
+ MyHTML_TAG_MENUITEM = 0x055
177
+ MyHTML_TAG_META = 0x056
178
+ MyHTML_TAG_METER = 0x057
179
+ MyHTML_TAG_MTEXT = 0x058
180
+ MyHTML_TAG_NAV = 0x059
181
+ MyHTML_TAG_NOBR = 0x05a
182
+ MyHTML_TAG_NOEMBED = 0x05b
183
+ MyHTML_TAG_NOFRAMES = 0x05c
184
+ MyHTML_TAG_NOSCRIPT = 0x05d
185
+ MyHTML_TAG_OBJECT = 0x05e
186
+ MyHTML_TAG_OL = 0x05f
187
+ MyHTML_TAG_OPTGROUP = 0x060
188
+ MyHTML_TAG_OPTION = 0x061
189
+ MyHTML_TAG_OUTPUT = 0x062
190
+ MyHTML_TAG_P = 0x063
191
+ MyHTML_TAG_PARAM = 0x064
192
+ MyHTML_TAG_PLAINTEXT = 0x065
193
+ MyHTML_TAG_PRE = 0x066
194
+ MyHTML_TAG_PROGRESS = 0x067
195
+ MyHTML_TAG_Q = 0x068
196
+ MyHTML_TAG_RB = 0x069
197
+ MyHTML_TAG_RP = 0x06a
198
+ MyHTML_TAG_RT = 0x06b
199
+ MyHTML_TAG_RTC = 0x06c
200
+ MyHTML_TAG_RUBY = 0x06d
201
+ MyHTML_TAG_S = 0x06e
202
+ MyHTML_TAG_SAMP = 0x06f
203
+ MyHTML_TAG_SCRIPT = 0x070
204
+ MyHTML_TAG_SECTION = 0x071
205
+ MyHTML_TAG_SELECT = 0x072
206
+ MyHTML_TAG_SMALL = 0x073
207
+ MyHTML_TAG_SOURCE = 0x074
208
+ MyHTML_TAG_SPAN = 0x075
209
+ MyHTML_TAG_STRIKE = 0x076
210
+ MyHTML_TAG_STRONG = 0x077
211
+ MyHTML_TAG_STYLE = 0x078
212
+ MyHTML_TAG_SUB = 0x079
213
+ MyHTML_TAG_SUMMARY = 0x07a
214
+ MyHTML_TAG_SUP = 0x07b
215
+ MyHTML_TAG_SVG = 0x07c
216
+ MyHTML_TAG_TABLE = 0x07d
217
+ MyHTML_TAG_TBODY = 0x07e
218
+ MyHTML_TAG_TD = 0x07f
219
+ MyHTML_TAG_TEMPLATE = 0x080
220
+ MyHTML_TAG_TEXTAREA = 0x081
221
+ MyHTML_TAG_TFOOT = 0x082
222
+ MyHTML_TAG_TH = 0x083
223
+ MyHTML_TAG_THEAD = 0x084
224
+ MyHTML_TAG_TIME = 0x085
225
+ MyHTML_TAG_TITLE = 0x086
226
+ MyHTML_TAG_TR = 0x087
227
+ MyHTML_TAG_TRACK = 0x088
228
+ MyHTML_TAG_TT = 0x089
229
+ MyHTML_TAG_U = 0x08a
230
+ MyHTML_TAG_UL = 0x08b
231
+ MyHTML_TAG_VAR = 0x08c
232
+ MyHTML_TAG_VIDEO = 0x08d
233
+ MyHTML_TAG_WBR = 0x08e
234
+ MyHTML_TAG_XMP = 0x08f
235
+ MyHTML_TAG_ALTGLYPH = 0x090
236
+ MyHTML_TAG_ALTGLYPHDEF = 0x091
237
+ MyHTML_TAG_ALTGLYPHITEM = 0x092
238
+ MyHTML_TAG_ANIMATE = 0x093
239
+ MyHTML_TAG_ANIMATECOLOR = 0x094
240
+ MyHTML_TAG_ANIMATEMOTION = 0x095
241
+ MyHTML_TAG_ANIMATETRANSFORM = 0x096
242
+ MyHTML_TAG_CIRCLE = 0x097
243
+ MyHTML_TAG_CLIPPATH = 0x098
244
+ MyHTML_TAG_COLOR_PROFILE = 0x099
245
+ MyHTML_TAG_CURSOR = 0x09a
246
+ MyHTML_TAG_DEFS = 0x09b
247
+ MyHTML_TAG_DESC = 0x09c
248
+ MyHTML_TAG_ELLIPSE = 0x09d
249
+ MyHTML_TAG_FEBLEND = 0x09e
250
+ MyHTML_TAG_FECOLORMATRIX = 0x09f
251
+ MyHTML_TAG_FECOMPONENTTRANSFER = 0x0a0
252
+ MyHTML_TAG_FECOMPOSITE = 0x0a1
253
+ MyHTML_TAG_FECONVOLVEMATRIX = 0x0a2
254
+ MyHTML_TAG_FEDIFFUSELIGHTING = 0x0a3
255
+ MyHTML_TAG_FEDISPLACEMENTMAP = 0x0a4
256
+ MyHTML_TAG_FEDISTANTLIGHT = 0x0a5
257
+ MyHTML_TAG_FEDROPSHADOW = 0x0a6
258
+ MyHTML_TAG_FEFLOOD = 0x0a7
259
+ MyHTML_TAG_FEFUNCA = 0x0a8
260
+ MyHTML_TAG_FEFUNCB = 0x0a9
261
+ MyHTML_TAG_FEFUNCG = 0x0aa
262
+ MyHTML_TAG_FEFUNCR = 0x0ab
263
+ MyHTML_TAG_FEGAUSSIANBLUR = 0x0ac
264
+ MyHTML_TAG_FEIMAGE = 0x0ad
265
+ MyHTML_TAG_FEMERGE = 0x0ae
266
+ MyHTML_TAG_FEMERGENODE = 0x0af
267
+ MyHTML_TAG_FEMORPHOLOGY = 0x0b0
268
+ MyHTML_TAG_FEOFFSET = 0x0b1
269
+ MyHTML_TAG_FEPOINTLIGHT = 0x0b2
270
+ MyHTML_TAG_FESPECULARLIGHTING = 0x0b3
271
+ MyHTML_TAG_FESPOTLIGHT = 0x0b4
272
+ MyHTML_TAG_FETILE = 0x0b5
273
+ MyHTML_TAG_FETURBULENCE = 0x0b6
274
+ MyHTML_TAG_FILTER = 0x0b7
275
+ MyHTML_TAG_FONT_FACE = 0x0b8
276
+ MyHTML_TAG_FONT_FACE_FORMAT = 0x0b9
277
+ MyHTML_TAG_FONT_FACE_NAME = 0x0ba
278
+ MyHTML_TAG_FONT_FACE_SRC = 0x0bb
279
+ MyHTML_TAG_FONT_FACE_URI = 0x0bc
280
+ MyHTML_TAG_FOREIGNOBJECT = 0x0bd
281
+ MyHTML_TAG_G = 0x0be
282
+ MyHTML_TAG_GLYPH = 0x0bf
283
+ MyHTML_TAG_GLYPHREF = 0x0c0
284
+ MyHTML_TAG_HKERN = 0x0c1
285
+ MyHTML_TAG_LINE = 0x0c2
286
+ MyHTML_TAG_LINEARGRADIENT = 0x0c3
287
+ MyHTML_TAG_MARKER = 0x0c4
288
+ MyHTML_TAG_MASK = 0x0c5
289
+ MyHTML_TAG_METADATA = 0x0c6
290
+ MyHTML_TAG_MISSING_GLYPH = 0x0c7
291
+ MyHTML_TAG_MPATH = 0x0c8
292
+ MyHTML_TAG_PATH = 0x0c9
293
+ MyHTML_TAG_PATTERN = 0x0ca
294
+ MyHTML_TAG_POLYGON = 0x0cb
295
+ MyHTML_TAG_POLYLINE = 0x0cc
296
+ MyHTML_TAG_RADIALGRADIENT = 0x0cd
297
+ MyHTML_TAG_RECT = 0x0ce
298
+ MyHTML_TAG_SET = 0x0cf
299
+ MyHTML_TAG_STOP = 0x0d0
300
+ MyHTML_TAG_SWITCH = 0x0d1
301
+ MyHTML_TAG_SYMBOL = 0x0d2
302
+ MyHTML_TAG_TEXT = 0x0d3
303
+ MyHTML_TAG_TEXTPATH = 0x0d4
304
+ MyHTML_TAG_TREF = 0x0d5
305
+ MyHTML_TAG_TSPAN = 0x0d6
306
+ MyHTML_TAG_USE = 0x0d7
307
+ MyHTML_TAG_VIEW = 0x0d8
308
+ MyHTML_TAG_VKERN = 0x0d9
309
+ MyHTML_TAG_MATH = 0x0da
310
+ MyHTML_TAG_MACTION = 0x0db
311
+ MyHTML_TAG_MALIGNGROUP = 0x0dc
312
+ MyHTML_TAG_MALIGNMARK = 0x0dd
313
+ MyHTML_TAG_MENCLOSE = 0x0de
314
+ MyHTML_TAG_MERROR = 0x0df
315
+ MyHTML_TAG_MFENCED = 0x0e0
316
+ MyHTML_TAG_MFRAC = 0x0e1
317
+ MyHTML_TAG_MGLYPH = 0x0e2
318
+ MyHTML_TAG_MI = 0x0e3
319
+ MyHTML_TAG_MLABELEDTR = 0x0e4
320
+ MyHTML_TAG_MLONGDIV = 0x0e5
321
+ MyHTML_TAG_MMULTISCRIPTS = 0x0e6
322
+ MyHTML_TAG_MN = 0x0e7
323
+ MyHTML_TAG_MO = 0x0e8
324
+ MyHTML_TAG_MOVER = 0x0e9
325
+ MyHTML_TAG_MPADDED = 0x0ea
326
+ MyHTML_TAG_MPHANTOM = 0x0eb
327
+ MyHTML_TAG_MROOT = 0x0ec
328
+ MyHTML_TAG_MROW = 0x0ed
329
+ MyHTML_TAG_MS = 0x0ee
330
+ MyHTML_TAG_MSCARRIES = 0x0ef
331
+ MyHTML_TAG_MSCARRY = 0x0f0
332
+ MyHTML_TAG_MSGROUP = 0x0f1
333
+ MyHTML_TAG_MSLINE = 0x0f2
334
+ MyHTML_TAG_MSPACE = 0x0f3
335
+ MyHTML_TAG_MSQRT = 0x0f4
336
+ MyHTML_TAG_MSROW = 0x0f5
337
+ MyHTML_TAG_MSTACK = 0x0f6
338
+ MyHTML_TAG_MSTYLE = 0x0f7
339
+ MyHTML_TAG_MSUB = 0x0f8
340
+ MyHTML_TAG_MSUP = 0x0f9
341
+ MyHTML_TAG_MSUBSUP = 0x0fa
342
+ MyHTML_TAG__END_OF_FILE = 0x0fb
343
+ MyHTML_TAG_FIRST_ENTRY = MyHTML_TAG__TEXT
344
+ MyHTML_TAG_LAST_ENTRY = 0x0fc
345
+
346
+ ctypedef enum myhtml_tree_parse_flags_t:
347
+ MyHTML_TREE_PARSE_FLAGS_CLEAN = 0x000
348
+ MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001
349
+ MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003
350
+ MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004
351
+ MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008
352
+
353
+ ctypedef struct myhtml_token_node_t:
354
+ myhtml_tag_id_t tag_id
355
+
356
+ mycore_string_t str
357
+
358
+ size_t raw_begin
359
+ size_t raw_length
360
+
361
+ size_t element_begin
362
+ size_t element_length
363
+
364
+ myhtml_token_attr_t* attr_first
365
+ myhtml_token_attr_t* attr_last
366
+
367
+ myhtml_token_type type
368
+
369
+ ctypedef struct myhtml_token_attr_t:
370
+ myhtml_token_attr_t* next
371
+ myhtml_token_attr_t* prev
372
+
373
+ mycore_string_t key
374
+ mycore_string_t value
375
+
376
+ size_t raw_key_begin
377
+ size_t raw_key_length
378
+ size_t raw_value_begin
379
+ size_t raw_value_length
380
+
381
+ myhtml_namespace ns
382
+
383
+ ctypedef struct myhtml_tree_attr_t:
384
+ myhtml_tree_attr_t* next
385
+ myhtml_tree_attr_t* prev
386
+
387
+ mycore_string_t key
388
+ mycore_string_t value
389
+
390
+ size_t raw_key_begin
391
+ size_t raw_key_length
392
+ size_t raw_value_begin
393
+ size_t raw_value_length
394
+
395
+
396
+
397
+ myhtml_t * myhtml_create()
398
+ mystatus_t myhtml_init(myhtml_t* myhtml, myhtml_options opt, size_t thread_count, size_t queue_size)
399
+ myhtml_tree_t * myhtml_tree_create()
400
+ mystatus_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml)
401
+ mystatus_t myhtml_parse(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size)
402
+
403
+ myhtml_tree_attr_t* myhtml_node_attribute_first(myhtml_tree_node_t* node)
404
+ myhtml_tree_attr_t* myhtml_attribute_by_key(myhtml_tree_node_t *node, const char *key, size_t key_len)
405
+ const char* myhtml_node_text(myhtml_tree_node_t *node, size_t *length)
406
+ mycore_string_t * myhtml_node_string(myhtml_tree_node_t *node)
407
+ const char * myhtml_tag_name_by_id(myhtml_tree_t* tree, myhtml_tag_id_t tag_id, size_t *length)
408
+
409
+ myhtml_collection_t * myhtml_collection_destroy(myhtml_collection_t *collection)
410
+ myhtml_tree_t * myhtml_tree_destroy(myhtml_tree_t* tree)
411
+ myhtml_t* myhtml_destroy(myhtml_t* myhtml)
412
+
413
+ myhtml_tree_node_t* myhtml_tree_get_document(myhtml_tree_t* tree)
414
+ myhtml_tree_node_t* myhtml_tree_get_node_body(myhtml_tree_t* tree)
415
+ myhtml_tree_node_t* myhtml_tree_get_node_head(myhtml_tree_t* tree)
416
+
417
+ myhtml_collection_t* myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection,
418
+ const char* name, size_t length, mystatus_t *status)
419
+
420
+ void myhtml_node_delete(myhtml_tree_node_t *node)
421
+ void myhtml_node_delete_recursive(myhtml_tree_node_t *node)
422
+ void myhtml_tree_parse_flags_set(myhtml_tree_t* tree, myhtml_tree_parse_flags_t parse_flags)
423
+ myhtml_tree_node_t * myhtml_node_insert_before(myhtml_tree_node_t *target, myhtml_tree_node_t *node)
424
+ myhtml_tree_node_t * myhtml_node_insert_after(myhtml_tree_node_t *target, myhtml_tree_node_t *node)
425
+ myhtml_tree_node_t * myhtml_node_create(myhtml_tree_t* tree, myhtml_tag_id_t tag_id, myhtml_namespace ns)
426
+ myhtml_tree_node_t * myhtml_node_clone_deep(myhtml_tree_t* dest_tree, myhtml_tree_node_t* src)
427
+ myhtml_tree_node_t * myhtml_node_append_child(myhtml_tree_node_t* target, myhtml_tree_node_t* node)
428
+
429
+ mycore_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length,
430
+ myencoding_t encoding)
431
+ myhtml_tree_attr_t * myhtml_attribute_by_key(myhtml_tree_node_t *node, const char *key, size_t key_len)
432
+ myhtml_tree_attr_t * myhtml_attribute_remove_by_key(myhtml_tree_node_t *node, const char *key, size_t key_len)
433
+ myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char *key, size_t key_len,
434
+ const char *value, size_t value_len, myencoding_t encoding)
435
+
436
+ myhtml_tree_node_t * myhtml_node_insert_to_appropriate_place(myhtml_tree_node_t *target, myhtml_tree_node_t *node)
437
+
438
+ cdef extern from "myhtml/tree.h" nogil:
439
+ myhtml_tree_node_t * myhtml_tree_node_clone(myhtml_tree_node_t* node)
440
+ myhtml_tree_node_t * myhtml_tree_node_insert_root(myhtml_tree_t* tree, myhtml_token_node_t* token,
441
+ myhtml_namespace ns)
442
+ void myhtml_tree_node_add_child(myhtml_tree_node_t* root, myhtml_tree_node_t* node)
443
+
444
+ cdef extern from "myhtml/serialization.h" nogil:
445
+ mystatus_t myhtml_serialization(myhtml_tree_node_t* scope_node, mycore_string_raw_t* str)
446
+
447
+
448
+ cdef extern from "myencoding/encoding.h" nogil:
449
+ ctypedef enum myencoding_t:
450
+ MyENCODING_DEFAULT = 0x00
451
+ # MyENCODING_AUTO = 0x01 // future
452
+ MyENCODING_NOT_DETERMINED = 0x02
453
+ MyENCODING_UTF_8 = 0x00 # default encoding
454
+ MyENCODING_UTF_16LE = 0x04
455
+ MyENCODING_UTF_16BE = 0x05
456
+ MyENCODING_X_USER_DEFINED = 0x06
457
+ MyENCODING_BIG5 = 0x07
458
+ MyENCODING_EUC_JP = 0x08
459
+ MyENCODING_EUC_KR = 0x09
460
+ MyENCODING_GB18030 = 0x0a
461
+ MyENCODING_GBK = 0x0b
462
+ MyENCODING_IBM866 = 0x0c
463
+ MyENCODING_ISO_2022_JP = 0x0d
464
+ MyENCODING_ISO_8859_10 = 0x0e
465
+ MyENCODING_ISO_8859_13 = 0x0f
466
+ MyENCODING_ISO_8859_14 = 0x10
467
+ MyENCODING_ISO_8859_15 = 0x11
468
+ MyENCODING_ISO_8859_16 = 0x12
469
+ MyENCODING_ISO_8859_2 = 0x13
470
+ MyENCODING_ISO_8859_3 = 0x14
471
+ MyENCODING_ISO_8859_4 = 0x15
472
+ MyENCODING_ISO_8859_5 = 0x16
473
+ MyENCODING_ISO_8859_6 = 0x17
474
+ MyENCODING_ISO_8859_7 = 0x18
475
+ MyENCODING_ISO_8859_8 = 0x19
476
+ MyENCODING_ISO_8859_8_I = 0x1a
477
+ MyENCODING_KOI8_R = 0x1b
478
+ MyENCODING_KOI8_U = 0x1c
479
+ MyENCODING_MACINTOSH = 0x1d
480
+ MyENCODING_SHIFT_JIS = 0x1e
481
+ MyENCODING_WINDOWS_1250 = 0x1f
482
+ MyENCODING_WINDOWS_1251 = 0x20
483
+ MyENCODING_WINDOWS_1252 = 0x21
484
+ MyENCODING_WINDOWS_1253 = 0x22
485
+ MyENCODING_WINDOWS_1254 = 0x23
486
+ MyENCODING_WINDOWS_1255 = 0x24
487
+ MyENCODING_WINDOWS_1256 = 0x25
488
+ MyENCODING_WINDOWS_1257 = 0x26
489
+ MyENCODING_WINDOWS_1258 = 0x27
490
+ MyENCODING_WINDOWS_874 = 0x28
491
+ MyENCODING_X_MAC_CYRILLIC = 0x29
492
+ MyENCODING_LAST_ENTRY = 0x2a
493
+
494
+ bint myencoding_detect_bom(const char *text, size_t length, myencoding_t *encoding)
495
+ bint myencoding_detect(const char *text, size_t length, myencoding_t *encoding)
496
+ myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size)
497
+ const char* myencoding_name_by_id(myencoding_t encoding, size_t *length)
498
+
499
+
500
+ cdef extern from "mycss/mycss.h" nogil:
501
+ ctypedef struct mycss_entry_t:
502
+ # not completed struct
503
+ mycss_t* mycss
504
+
505
+ ctypedef struct mycss_t
506
+
507
+ ctypedef struct mycss_selectors_t
508
+
509
+ ctypedef struct mycss_selectors_entries_list_t
510
+ ctypedef struct mycss_declaration_entry_t
511
+
512
+ ctypedef enum mycss_selectors_flags:
513
+ MyCSS_SELECTORS_FLAGS_UNDEF = 0x00
514
+ MyCSS_SELECTORS_FLAGS_SELECTOR_BAD = 0x01
515
+ ctypedef mycss_selectors_flags mycss_selectors_flags_t
516
+
517
+ ctypedef struct mycss_selectors_list_t:
518
+ mycss_selectors_entries_list_t* entries_list;
519
+ size_t entries_list_length;
520
+
521
+ mycss_declaration_entry_t* declaration_entry;
522
+
523
+ mycss_selectors_flags_t flags;
524
+
525
+ mycss_selectors_list_t* parent;
526
+ mycss_selectors_list_t* next;
527
+ mycss_selectors_list_t* prev;
528
+
529
+ # CSS init routines
530
+ mycss_t * mycss_create()
531
+ mystatus_t mycss_init(mycss_t* mycss)
532
+ mycss_entry_t * mycss_entry_create()
533
+ mystatus_t mycss_entry_init(mycss_t* mycss, mycss_entry_t* entry)
534
+
535
+ mycss_selectors_list_t * mycss_selectors_parse(mycss_selectors_t* selectors, myencoding_t encoding,
536
+ const char* data, size_t data_size, mystatus_t* out_status)
537
+ mycss_selectors_t * mycss_entry_selectors(mycss_entry_t* entry)
538
+
539
+ mycss_selectors_list_t * mycss_selectors_list_destroy(mycss_selectors_t* selectors,
540
+ mycss_selectors_list_t* selectors_list, bint self_destroy)
541
+ mycss_entry_t * mycss_entry_destroy(mycss_entry_t* entry, bint self_destroy)
542
+ mycss_t * mycss_destroy(mycss_t* mycss, bint self_destroy)
543
+
544
+
545
+
546
+ cdef extern from "modest/finder/finder.h" nogil:
547
+ ctypedef struct modest_finder_t
548
+ modest_finder_t* modest_finder_create_simple()
549
+ mystatus_t modest_finder_by_selectors_list(modest_finder_t* finder, myhtml_tree_node_t* scope_node,
550
+ mycss_selectors_list_t* selector_list, myhtml_collection_t** collection)
551
+ modest_finder_t * modest_finder_destroy(modest_finder_t* finder, bint self_destroy)
552
+
553
+
554
+ cdef class HTMLParser:
555
+ cdef myhtml_tree_t *html_tree
556
+ cdef public bint detect_encoding
557
+ cdef public bint use_meta_tags
558
+ cdef myencoding_t _encoding
559
+ cdef public unicode decode_errors
560
+ cdef public bytes raw_html
561
+ cdef object cached_script_texts
562
+ cdef object cached_script_srcs
563
+
564
+ cdef void _detect_encoding(self, char* html, size_t html_len) nogil
565
+ cdef _parse_html(self, char* html, size_t html_len)
566
+ @staticmethod
567
+ cdef HTMLParser from_tree(
568
+ myhtml_tree_t * tree, bytes raw_html, bint detect_encoding, bint use_meta_tags, str decode_errors,
569
+ myencoding_t encoding
570
+ )
571
+
572
+
573
+ cdef class Stack:
574
+ cdef size_t capacity
575
+ cdef size_t top
576
+ cdef myhtml_tree_node_t ** _stack
577
+
578
+ cdef bint is_empty(self)
579
+ cdef push(self, myhtml_tree_node_t* res)
580
+ cdef myhtml_tree_node_t * pop(self)
581
+ cdef resize(self)