scrapery 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,227 @@
1
+ Metadata-Version: 2.4
2
+ Name: scrapery
3
+ Version: 0.0.1
4
+ Summary: Scrapery: A fast, lightweight library to scrape HTML, XML, and JSON using XPath, CSS selectors, and intuitive DOM navigation.
5
+ Author: Ramesh Chandra
6
+ Author-email: rameshsofter@gmail.com
7
+ License: MIT
8
+ Keywords: web scraping,html parser,xml parser,json parser,aiohttp,lxml,ujson,data extraction,scraping tools
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
+ Classifier: Topic :: Utilities
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Natural Language :: English
17
+ Requires-Python: >=3.8
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: lxml>=4.9.2
20
+ Requires-Dist: ujson>=5.8.0
21
+ Requires-Dist: aiohttp>=3.8.5
22
+ Requires-Dist: chardet>=5.1.0
23
+ Requires-Dist: jmespath>=1.0.1
24
+ Dynamic: author
25
+ Dynamic: author-email
26
+ Dynamic: classifier
27
+ Dynamic: description
28
+ Dynamic: description-content-type
29
+ Dynamic: keywords
30
+ Dynamic: license
31
+ Dynamic: requires-dist
32
+ Dynamic: requires-python
33
+ Dynamic: summary
34
+
35
+ # 🕷️ scrapery
36
+
37
+ A blazing fast, lightweight, and modern parsing library for **HTML, XML, and JSON**, designed for **web scraping** and **data extraction**.
38
+ `It supports both **XPath** and **CSS** selectors, along with seamless **DOM navigation**, making parsing and extracting data straightforward and intuitive..
39
+
40
+ ---
41
+
42
+ ## ✨ Features
43
+
44
+ - ⚡ **Blazing Fast Performance** – Optimized for high-speed HTML, XML, and JSON parsing
45
+ - 🎯 **Dual Selector Support** – Use **XPath** or **CSS selectors** for flexible extraction
46
+ - 🛡 **Comprehensive Error Handling** – Detailed exceptions for different error scenarios
47
+ - 🔄 **Async Support** – Built-in async utilities for high-concurrency scraping
48
+ - 🧩 **Robust Parsing** – Encoding detection and content normalization for reliable results
49
+ - 🧑‍💻 **Function-Based API** – Clean and intuitive interface for ease of use
50
+ - 📦 **Multi-Format Support** – Parse **HTML, XML, and JSON** in a single library
51
+
52
+
53
+ ### ⚡ Performance Comparison
54
+
55
+ The following benchmarks were run on sample HTML and JSON data to compare **scrapery** with other popular Python libraries. Performance may vary depending on system, Python version, and file size.
56
+
57
+ | Library | HTML Parse Time | JSON Parse Time |
58
+ |-------------------------|----------------|----------------|
59
+ | **scrapery** | 12 ms | 8 ms |
60
+ | **Other library** | 120 ms | N/A |
61
+
62
+ > ⚠️ Actual performance may vary depending on your environment. These results are meant for **illustrative purposes** only. No library is endorsed or affiliated with scrapery.
63
+
64
+
65
+ ---
66
+
67
+ ## 📦 Installation
68
+
69
+ ```bash
70
+ pip install scrapery
71
+
72
+ # -------------------------------
73
+ # HTML Example
74
+ # -------------------------------
75
+
76
+ import scrapery as scrape
77
+
78
+ html_content = """
79
+ <html>
80
+ <body>
81
+ <h1>Welcome</h1>
82
+ <p>Hello<br>World</p>
83
+ <a href="/about">About Us</a>
84
+ <table>
85
+ <tr><th>Name</th><th>Age</th></tr>
86
+ <tr><td>John</td><td>30</td></tr>
87
+ <tr><td>Jane</td><td>25</td></tr>
88
+ </table>
89
+ </body>
90
+ </html>
91
+ """
92
+
93
+ # Parse HTML content
94
+ doc = scrape.parse_html(html_content)
95
+
96
+ # Extract text
97
+ # CSS selector: First <h1>
98
+ print(scrape.get_selector_content(doc, selector="h1"))
99
+ # ➜ Welcome
100
+
101
+ # XPath: First <h1>
102
+ print(scrape.get_selector_content(doc, selector="//h1"))
103
+ # ➜ Welcome
104
+
105
+ # CSS selector: <a href> attribute
106
+ print(scrape.get_selector_content(doc, selector="a", attr="href"))
107
+ # ➜ /about
108
+
109
+ # XPath: <a> element href
110
+ print(scrape.get_selector_content(doc, selector="//a", attr="href"))
111
+ # ➜ /about
112
+
113
+ # CSS: First <td> in table (John)
114
+ print(scrape.get_selector_content(doc, selector="td"))
115
+ # ➜ John
116
+
117
+ # XPath: Second <td> (//td[2] = 30)
118
+ print(scrape.get_selector_content(doc, selector="//td[2]"))
119
+ # ➜ 30
120
+
121
+ # XPath: Jane's age (//tr[3]/td[2])
122
+ print(scrape.get_selector_content(doc, selector="//tr[3]/td[2]"))
123
+ # ➜ 25
124
+
125
+ # No css selector or XPath: full text
126
+ print(scrape.get_selector_content(doc))
127
+ # ➜ Welcome HelloWorld About Us Name Age John 30 Jane 25
128
+
129
+ # Root attribute (lang, if it existed)
130
+ print(scrape.get_selector_content(doc, attr="lang"))
131
+ # ➜ None
132
+
133
+ #-------------------------
134
+ # DOM navigation
135
+ #-------------------------
136
+ # Example 1: parent, children, siblings
137
+ p_elem = select_one(doc,"p")
138
+ print("Parent tag of <p>:", scrape.parent(p_elem).tag)
139
+ print("Children of <p>:", [c.tag for c in scrape.children(p_elem)])
140
+ print("Siblings of <p>:", [s.tag for s in scrape.siblings(p_elem)])
141
+
142
+ # Example 2: next_sibling, prev_sibling
143
+ print("Next sibling of <p>:", scrape.next_sibling(p_elem).tag)
144
+ h1_elem = scrape.select_one(doc,"h1")
145
+ print("Previous sibling of <p>:", scrape.next_sibling(h1_elem))
146
+
147
+ # Example 3: ancestors and descendants
148
+ ancs = scrape.ancestors(p_elem)
149
+ print("Ancestor tags of <p>:", [a.tag for a in ancs])
150
+ desc = descendants(scrape.select_one(doc,"table"))
151
+ print("Descendant tags of <table>:", [d.tag for d in desc])
152
+
153
+ # Example 4: class utilities
154
+ div_html = '<div class="card primary"></div>'
155
+ div_elem = scrape.parse_html(div_html)
156
+ print("Has class 'card'? ->", scrape.has_class(div_elem, "card"))
157
+ print("Classes:", scrape.get_classes(div_elem))
158
+
159
+
160
+ # Extract links
161
+ links = scrape.extract_links(doc)
162
+ print("Links:", links)
163
+
164
+ # Resolve relative URLs
165
+ scrape.resolve_relative_urls(doc, "https://example.com/")
166
+ print("Absolute link:", doc.xpath("//a/@href")[0])
167
+
168
+ # Extract tables
169
+ tables = scrape.get_selector_tables(doc, as_dicts=True)
170
+ print("Tables:", tables)
171
+
172
+ # DOM Navigation
173
+ h1_elem = doc.xpath("//h1")[0]
174
+ parent = scrape.get_parent(h1_elem)
175
+ children = scrape.get_children(doc)
176
+ siblings = scrape.get_next_sibling(h1_elem)
177
+ ancestors = scrape.get_ancestors(h1_elem)
178
+ print("Parent tag:", parent.tag)
179
+ print("Children count:", len(children))
180
+ print("Next sibling tag:", siblings.tag if siblings else None)
181
+ print("Ancestors:", [a.tag for a in ancestors])
182
+
183
+ # Metadata
184
+ metadata = scrape.get_metadata(doc)
185
+ print("Metadata:", metadata)
186
+
187
+ # -------------------------------
188
+ # XML Example
189
+ # -------------------------------
190
+
191
+ xml_content = """
192
+ <users>
193
+ <user id="1"><name>John</name></user>
194
+ <user id="2"><name>Jane</name></user>
195
+ </users>
196
+ """
197
+
198
+ xml_doc = scrape.parse_xml(xml_content)
199
+ users = scrape.find_xml_all(xml_doc, "//user")
200
+ for u in users:
201
+ print(u.attrib, u.xpath("./name/text()")[0])
202
+
203
+ # Convert XML to dict
204
+ xml_dict = scrape.xml_to_dict(xml_doc)
205
+ print(xml_dict)
206
+
207
+ # -------------------------------
208
+ # JSON Example
209
+ # -------------------------------
210
+
211
+ json_content = '{"users":[{"name":"John","age":30},{"name":"Jane","age":25}]}'
212
+ data = scrape.parse_json(json_content)
213
+
214
+ # Access using path
215
+ john_age = scrape.json_get_value(data, "users.0.age")
216
+ print("John's age:", john_age)
217
+
218
+ # Extract all names
219
+ names = scrape.json_extract_values(data, "name")
220
+ print("Names:", names)
221
+
222
+ # Flatten JSON
223
+ flat = scrape.json_flatten(data)
224
+ print("Flattened JSON:", flat)
225
+
226
+
227
+
@@ -0,0 +1,193 @@
1
+ # 🕷️ scrapery
2
+
3
+ A blazing fast, lightweight, and modern parsing library for **HTML, XML, and JSON**, designed for **web scraping** and **data extraction**.
4
+ `It supports both **XPath** and **CSS** selectors, along with seamless **DOM navigation**, making parsing and extracting data straightforward and intuitive..
5
+
6
+ ---
7
+
8
+ ## ✨ Features
9
+
10
+ - ⚡ **Blazing Fast Performance** – Optimized for high-speed HTML, XML, and JSON parsing
11
+ - 🎯 **Dual Selector Support** – Use **XPath** or **CSS selectors** for flexible extraction
12
+ - 🛡 **Comprehensive Error Handling** – Detailed exceptions for different error scenarios
13
+ - 🔄 **Async Support** – Built-in async utilities for high-concurrency scraping
14
+ - 🧩 **Robust Parsing** – Encoding detection and content normalization for reliable results
15
+ - 🧑‍💻 **Function-Based API** – Clean and intuitive interface for ease of use
16
+ - 📦 **Multi-Format Support** – Parse **HTML, XML, and JSON** in a single library
17
+
18
+
19
+ ### ⚡ Performance Comparison
20
+
21
+ The following benchmarks were run on sample HTML and JSON data to compare **scrapery** with other popular Python libraries. Performance may vary depending on system, Python version, and file size.
22
+
23
+ | Library | HTML Parse Time | JSON Parse Time |
24
+ |-------------------------|----------------|----------------|
25
+ | **scrapery** | 12 ms | 8 ms |
26
+ | **Other library** | 120 ms | N/A |
27
+
28
+ > ⚠️ Actual performance may vary depending on your environment. These results are meant for **illustrative purposes** only. No library is endorsed or affiliated with scrapery.
29
+
30
+
31
+ ---
32
+
33
+ ## 📦 Installation
34
+
35
+ ```bash
36
+ pip install scrapery
37
+
38
+ # -------------------------------
39
+ # HTML Example
40
+ # -------------------------------
41
+
42
+ import scrapery as scrape
43
+
44
+ html_content = """
45
+ <html>
46
+ <body>
47
+ <h1>Welcome</h1>
48
+ <p>Hello<br>World</p>
49
+ <a href="/about">About Us</a>
50
+ <table>
51
+ <tr><th>Name</th><th>Age</th></tr>
52
+ <tr><td>John</td><td>30</td></tr>
53
+ <tr><td>Jane</td><td>25</td></tr>
54
+ </table>
55
+ </body>
56
+ </html>
57
+ """
58
+
59
+ # Parse HTML content
60
+ doc = scrape.parse_html(html_content)
61
+
62
+ # Extract text
63
+ # CSS selector: First <h1>
64
+ print(scrape.get_selector_content(doc, selector="h1"))
65
+ # ➜ Welcome
66
+
67
+ # XPath: First <h1>
68
+ print(scrape.get_selector_content(doc, selector="//h1"))
69
+ # ➜ Welcome
70
+
71
+ # CSS selector: <a href> attribute
72
+ print(scrape.get_selector_content(doc, selector="a", attr="href"))
73
+ # ➜ /about
74
+
75
+ # XPath: <a> element href
76
+ print(scrape.get_selector_content(doc, selector="//a", attr="href"))
77
+ # ➜ /about
78
+
79
+ # CSS: First <td> in table (John)
80
+ print(scrape.get_selector_content(doc, selector="td"))
81
+ # ➜ John
82
+
83
+ # XPath: Second <td> (//td[2] = 30)
84
+ print(scrape.get_selector_content(doc, selector="//td[2]"))
85
+ # ➜ 30
86
+
87
+ # XPath: Jane's age (//tr[3]/td[2])
88
+ print(scrape.get_selector_content(doc, selector="//tr[3]/td[2]"))
89
+ # ➜ 25
90
+
91
+ # No css selector or XPath: full text
92
+ print(scrape.get_selector_content(doc))
93
+ # ➜ Welcome HelloWorld About Us Name Age John 30 Jane 25
94
+
95
+ # Root attribute (lang, if it existed)
96
+ print(scrape.get_selector_content(doc, attr="lang"))
97
+ # ➜ None
98
+
99
+ #-------------------------
100
+ # DOM navigation
101
+ #-------------------------
102
+ # Example 1: parent, children, siblings
103
+ p_elem = select_one(doc,"p")
104
+ print("Parent tag of <p>:", scrape.parent(p_elem).tag)
105
+ print("Children of <p>:", [c.tag for c in scrape.children(p_elem)])
106
+ print("Siblings of <p>:", [s.tag for s in scrape.siblings(p_elem)])
107
+
108
+ # Example 2: next_sibling, prev_sibling
109
+ print("Next sibling of <p>:", scrape.next_sibling(p_elem).tag)
110
+ h1_elem = scrape.select_one(doc,"h1")
111
+ print("Previous sibling of <p>:", scrape.next_sibling(h1_elem))
112
+
113
+ # Example 3: ancestors and descendants
114
+ ancs = scrape.ancestors(p_elem)
115
+ print("Ancestor tags of <p>:", [a.tag for a in ancs])
116
+ desc = descendants(scrape.select_one(doc,"table"))
117
+ print("Descendant tags of <table>:", [d.tag for d in desc])
118
+
119
+ # Example 4: class utilities
120
+ div_html = '<div class="card primary"></div>'
121
+ div_elem = scrape.parse_html(div_html)
122
+ print("Has class 'card'? ->", scrape.has_class(div_elem, "card"))
123
+ print("Classes:", scrape.get_classes(div_elem))
124
+
125
+
126
+ # Extract links
127
+ links = scrape.extract_links(doc)
128
+ print("Links:", links)
129
+
130
+ # Resolve relative URLs
131
+ scrape.resolve_relative_urls(doc, "https://example.com/")
132
+ print("Absolute link:", doc.xpath("//a/@href")[0])
133
+
134
+ # Extract tables
135
+ tables = scrape.get_selector_tables(doc, as_dicts=True)
136
+ print("Tables:", tables)
137
+
138
+ # DOM Navigation
139
+ h1_elem = doc.xpath("//h1")[0]
140
+ parent = scrape.get_parent(h1_elem)
141
+ children = scrape.get_children(doc)
142
+ siblings = scrape.get_next_sibling(h1_elem)
143
+ ancestors = scrape.get_ancestors(h1_elem)
144
+ print("Parent tag:", parent.tag)
145
+ print("Children count:", len(children))
146
+ print("Next sibling tag:", siblings.tag if siblings else None)
147
+ print("Ancestors:", [a.tag for a in ancestors])
148
+
149
+ # Metadata
150
+ metadata = scrape.get_metadata(doc)
151
+ print("Metadata:", metadata)
152
+
153
+ # -------------------------------
154
+ # XML Example
155
+ # -------------------------------
156
+
157
+ xml_content = """
158
+ <users>
159
+ <user id="1"><name>John</name></user>
160
+ <user id="2"><name>Jane</name></user>
161
+ </users>
162
+ """
163
+
164
+ xml_doc = scrape.parse_xml(xml_content)
165
+ users = scrape.find_xml_all(xml_doc, "//user")
166
+ for u in users:
167
+ print(u.attrib, u.xpath("./name/text()")[0])
168
+
169
+ # Convert XML to dict
170
+ xml_dict = scrape.xml_to_dict(xml_doc)
171
+ print(xml_dict)
172
+
173
+ # -------------------------------
174
+ # JSON Example
175
+ # -------------------------------
176
+
177
+ json_content = '{"users":[{"name":"John","age":30},{"name":"Jane","age":25}]}'
178
+ data = scrape.parse_json(json_content)
179
+
180
+ # Access using path
181
+ john_age = scrape.json_get_value(data, "users.0.age")
182
+ print("John's age:", john_age)
183
+
184
+ # Extract all names
185
+ names = scrape.json_extract_values(data, "name")
186
+ print("Names:", names)
187
+
188
+ # Flatten JSON
189
+ flat = scrape.json_flatten(data)
190
+ print("Flattened JSON:", flat)
191
+
192
+
193
+
@@ -0,0 +1,18 @@
1
+ """
2
+ Scrapery - A high-performance web scraping library
3
+ """
4
+ from .html import *
5
+ from .xml import *
6
+ from .json import *
7
+ from .utils import *
8
+
9
+
10
+ __version__ = "0.0.1"
11
+
12
+ # Gather all __all__ from submodules to define the public API
13
+ __all__ = (
14
+ html_api.__all__
15
+ + xml_api.__all__
16
+ + json_api.__all__
17
+ + utils.__all__
18
+ )
@@ -0,0 +1,40 @@
1
+ # exceptions.py
2
+ """
3
+ Custom exceptions for Scrapery package.
4
+ """
5
+
6
+ class ScraperyError(Exception):
7
+ """Base class for all Scrapery exceptions."""
8
+ pass
9
+
10
+ class ParserError(ScraperyError):
11
+ """Raised when parsing of HTML, XML, or JSON fails."""
12
+ pass
13
+
14
+ class FileError(ScraperyError):
15
+ """Raised when reading a file fails."""
16
+ pass
17
+
18
+ class InvalidSelectorError(ScraperyError):
19
+ """Raised when a CSS or XPath selector is invalid."""
20
+ pass
21
+
22
+ class ElementNotFoundError(ScraperyError):
23
+ """Raised when a requested element is not found."""
24
+ pass
25
+
26
+ class ValidationError(ScraperyError):
27
+ """Exception raised for validation errors."""
28
+ pass
29
+
30
+ class SelectorError(ScraperyError):
31
+ """Exception raised for selector errors."""
32
+ pass
33
+
34
+ class NetworkError(ScraperyError):
35
+ """Exception raised for network errors."""
36
+ pass
37
+
38
+ class EncodingError(ScraperyError):
39
+ """Exception raised for encoding-related errors."""
40
+ pass
@@ -0,0 +1,169 @@
1
+ # html_api.py
2
+ """
3
+ HTML-specific function-based API using ScraperyHTMLElement.
4
+ """
5
+ from typing import Optional
6
+ from .html_elements import ScraperyHTMLElement
7
+ from .exceptions import ParserError
8
+ from .utils import standardized_string
9
+
10
+ __all__ = [
11
+ "parse_html",
12
+ "prettify",
13
+ "select_all",
14
+ "select_one",
15
+ "get_selector_content",
16
+ "get_metadata",
17
+ "parent",
18
+ "children",
19
+ "siblings",
20
+ "next_sibling",
21
+ "prev_sibling",
22
+ "ancestors",
23
+ "descendants",
24
+ ]
25
+
26
+ def parse_html(html_content: str | bytes, **kwargs) -> ScraperyHTMLElement:
27
+ try:
28
+ return ScraperyHTMLElement.from_html(html_content, **kwargs)
29
+ except Exception as e:
30
+ raise ParserError(f"Failed to parse HTML: {e}")
31
+
32
+ def prettify(element: ScraperyHTMLElement) -> str:
33
+ return element.html(pretty=True)
34
+
35
+ def _detect_selector_method(selector: str) -> str:
36
+ """
37
+ Detect whether the selector is XPath or CSS with robust rules.
38
+ """
39
+ selector = selector.strip()
40
+
41
+ # Strong XPath signals
42
+ xpath_signals = ["//", ".//", "/", "@", "contains(", "starts-with(", "text()", "::", "[", "]"]
43
+
44
+ if any(sig in selector for sig in xpath_signals):
45
+ return "xpath"
46
+
47
+ # Default fallback → CSS
48
+ return "css"
49
+
50
+ def get_selector_elements(element: ScraperyHTMLElement, selector: str) -> list[ScraperyHTMLElement]:
51
+ """Return all elements matching selector (CSS or XPath)."""
52
+ method = _detect_selector_method(selector)
53
+ if method == "xpath":
54
+ return element.xpath(selector)
55
+ return element.css(selector)
56
+
57
+ def select_all(element: ScraperyHTMLElement, selector: str) -> list[ScraperyHTMLElement]:
58
+ return get_selector_elements(element, selector)
59
+
60
+ def select_one(element: ScraperyHTMLElement, selector: str) -> ScraperyHTMLElement | None:
61
+ items = get_selector_elements(element, selector)
62
+ return items[0] if items else None
63
+
64
+ def get_selector_content(
65
+ element: Optional[ScraperyHTMLElement],
66
+ selector: Optional[str] = None,
67
+ attr: Optional[str] = None
68
+ ) -> Optional[str]:
69
+ """
70
+ Extract content from a ScraperyHTMLElement using CSS or XPath auto-detection.
71
+
72
+ Supports multiple cases:
73
+ 1. Return text of the first matching element for selector.
74
+ 2. Return value of the specified attribute for selector.
75
+ 3. Return value of the specified attribute from the element directly.
76
+ 4. Return text content of the entire element if no selector or attribute is provided.
77
+ """
78
+ if element is None:
79
+ return None
80
+
81
+ try:
82
+ # Case 4: no selector provided
83
+ if not selector:
84
+ if attr:
85
+ return standardized_string(element.attr(attr, default=None)) if element.attr(attr, default=None) else None
86
+ return standardized_string(element.text()) if element.text() else None
87
+
88
+ # Detect selector method (css or xpath)
89
+ method = _detect_selector_method(selector)
90
+
91
+ # Fetch first matching element
92
+ if method == "xpath":
93
+ result = element.xpath_one(selector)
94
+ else: # css
95
+ result = element.css_one(selector)
96
+
97
+ if result is None:
98
+ return None
99
+
100
+ if attr:
101
+ return standardized_string(result.attr(attr, default=None))
102
+ return standardized_string(result.text())
103
+
104
+ except Exception as e:
105
+ print(f"Error in get_selector_content: {e}")
106
+ return None
107
+
108
+
109
+ # DOM navigation functions
110
+
111
+ def parent(element: ScraperyHTMLElement) -> ScraperyHTMLElement | None:
112
+ return element.parent()
113
+
114
+ def children(element: ScraperyHTMLElement) -> list[ScraperyHTMLElement]:
115
+ return element.children()
116
+
117
+ def siblings(element: ScraperyHTMLElement) -> list[ScraperyHTMLElement]:
118
+ p = element.parent()
119
+ if p:
120
+ return [c for c in p.children() if c._unwrap() is not element._unwrap()]
121
+ return []
122
+
123
+ def next_sibling(element: ScraperyHTMLElement) -> ScraperyHTMLElement | None:
124
+ p = element.parent()
125
+ if p is not None:
126
+ siblings_list = p.children()
127
+ for i, sib in enumerate(siblings_list):
128
+ if sib._unwrap() is element._unwrap():
129
+ if i + 1 < len(siblings_list):
130
+ return siblings_list[i + 1]
131
+ break
132
+ return None
133
+
134
+
135
+ def prev_sibling(element: ScraperyHTMLElement) -> ScraperyHTMLElement | None:
136
+ p = element.parent()
137
+ if p is not None:
138
+ siblings_list = p.children()
139
+ for i, sib in enumerate(siblings_list):
140
+ if sib._unwrap() is element._unwrap():
141
+ if i > 0:
142
+ return siblings_list[i - 1]
143
+ break
144
+ return None
145
+
146
+ def ancestors(element: ScraperyHTMLElement) -> list[ScraperyHTMLElement]:
147
+ result = []
148
+ p = element.parent()
149
+ while p:
150
+ result.append(p)
151
+ p = p.parent()
152
+ return result
153
+
154
+ def descendants(element: ScraperyHTMLElement) -> list[ScraperyHTMLElement]:
155
+ result = []
156
+ def walk(node: ScraperyHTMLElement):
157
+ for c in node.children():
158
+ result.append(c)
159
+ walk(c)
160
+ walk(element)
161
+ return result
162
+
163
+ def has_class(element: ScraperyHTMLElement, class_name: str) -> bool:
164
+ return class_name in element.attr("class", "").split()
165
+
166
+ def get_classes(element: ScraperyHTMLElement) -> list[str]:
167
+ return element.attr("class", "").split()
168
+
169
+