html-table-parse 0.2__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- __version__ = '0.2'
1
+ __version__ = '0.2.2'
2
2
 
3
3
  from collections import defaultdict as _defaultdict
4
4
  from functools import partial as _partial
@@ -0,0 +1,71 @@
1
+ Metadata-Version: 2.3
2
+ Name: html-table-parse
3
+ Version: 0.2.2
4
+ Summary: Parse HTML table as Python list or dict
5
+ Author: 5j9
6
+ Author-email: 5j9 <5j9@users.noreply.github.com>
7
+ License: GPL-3.0
8
+ Requires-Dist: lxml>=6.1.1
9
+ Requires-Python: >=3.10
10
+ Project-URL: Homepage, https://github.com/5j9/html-table-parse
11
+ Description-Content-Type: text/markdown
12
+
13
+ # HTML Table Parse
14
+
15
+ A lightweight HTML table parser that converts tables to Python data structures without pandas.
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install html-table-parse
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ```python
26
+ from html_table_parse import to_list, to_dict, to_dicts
27
+
28
+ html = """
29
+ <table>
30
+ <tr><th>Name</th><th>Age</th><th>City</th></tr>
31
+ <tr><td>Alice</td><td>30</td><td>NYC</td></tr>
32
+ <tr><td>Bob</td><td>25</td><td>LA</td></tr>
33
+ </table>
34
+ """
35
+
36
+ # List of lists
37
+ to_list(html)
38
+ # [['Name', 'Age', 'City'], ['Alice', '30', 'NYC'], ['Bob', '25', 'LA']]
39
+
40
+ # Dictionary of columns
41
+ to_dict(html)
42
+ # {'Name': ['Alice', 'Bob'], 'Age': ['30', '25'], 'City': ['NYC', 'LA']}
43
+
44
+ # List of dictionaries
45
+ to_dicts(html)
46
+ # [{'Name': 'Alice', 'Age': '30', 'City': 'NYC'},
47
+ # {'Name': 'Bob', 'Age': '25', 'City': 'LA'}]
48
+ ```
49
+
50
+ ## Features
51
+
52
+ - No pandas required - lightweight alternative to `pandas.read_html()`
53
+ - Supports `colspan` and `rowspan` attributes
54
+ - Handles duplicate headers (auto-numbered)
55
+ - Multiple output formats: lists, dict of columns, or list of dicts
56
+ - Automatic whitespace normalization
57
+ - Fast parsing with ```lxml```
58
+
59
+ ## API
60
+
61
+ ### `to_list(html: str, index: int = 0) -> list[list]`
62
+
63
+ Parse table as list of rows.
64
+
65
+ ### `to_dict(html: str, index: int = 0) -> dict[str, list]`
66
+
67
+ Parse table as dictionary of columns (first row = headers).
68
+
69
+ ### `to_dicts(html: str, index: int = 0) -> list[dict]`
70
+
71
+ Parse table as list of dictionaries (first row = headers).
@@ -0,0 +1,4 @@
1
+ html_table_parse/__init__.py,sha256=ucjFk6-e7MpqRw_MTbaSIBJWkfOsfAJq7mfixAd350E,4944
2
+ html_table_parse-0.2.2.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
3
+ html_table_parse-0.2.2.dist-info/METADATA,sha256=zjUa1mJaN7X60BRE9RPB_-MINvItiH5mzxOjPuqdY_Y,1837
4
+ html_table_parse-0.2.2.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: html-table-parse
3
- Version: 0.2
4
- Summary: Parse HTML table as Python list or dict
5
- Author: 5j9
6
- Author-email: 5j9 <5j9@users.noreply.github.com>
7
- License: GPL-3.0
8
- Requires-Dist: lxml>=6.1.1
9
- Requires-Python: >=3.14
10
- Project-URL: Homepage, https://github.com/5j9/html-table-parse
11
- Description-Content-Type: text/x-rst
12
-
13
- Parse HTML table as Python list or dict
14
-
15
- Requires Python 3.14+
@@ -1,4 +0,0 @@
1
- html_table_parse/__init__.py,sha256=WBw2LjxXiqRISTzXeoYlE0_LfUQPW9In2wQVhwpXDLk,4942
2
- html_table_parse-0.2.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
3
- html_table_parse-0.2.dist-info/METADATA,sha256=FcUDwK2un5zDd6RgyOUEL9RENEmLxuK04Dl1555E5BQ,400
4
- html_table_parse-0.2.dist-info/RECORD,,