html-table-parse 0.2__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ Metadata-Version: 2.3
2
+ Name: html-table-parse
3
+ Version: 0.2.2
4
+ Summary: Parse HTML table as Python list or dict
5
+ Author: 5j9
6
+ Author-email: 5j9 <5j9@users.noreply.github.com>
7
+ License: GPL-3.0
8
+ Requires-Dist: lxml>=6.1.1
9
+ Requires-Python: >=3.10
10
+ Project-URL: Homepage, https://github.com/5j9/html-table-parse
11
+ Description-Content-Type: text/markdown
12
+
13
+ # HTML Table Parse
14
+
15
+ A lightweight HTML table parser that converts tables to Python data structures without pandas.
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install html-table-parse
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ```python
26
+ from html_table_parse import to_list, to_dict, to_dicts
27
+
28
+ html = """
29
+ <table>
30
+ <tr><th>Name</th><th>Age</th><th>City</th></tr>
31
+ <tr><td>Alice</td><td>30</td><td>NYC</td></tr>
32
+ <tr><td>Bob</td><td>25</td><td>LA</td></tr>
33
+ </table>
34
+ """
35
+
36
+ # List of lists
37
+ to_list(html)
38
+ # [['Name', 'Age', 'City'], ['Alice', '30', 'NYC'], ['Bob', '25', 'LA']]
39
+
40
+ # Dictionary of columns
41
+ to_dict(html)
42
+ # {'Name': ['Alice', 'Bob'], 'Age': ['30', '25'], 'City': ['NYC', 'LA']}
43
+
44
+ # List of dictionaries
45
+ to_dicts(html)
46
+ # [{'Name': 'Alice', 'Age': '30', 'City': 'NYC'},
47
+ # {'Name': 'Bob', 'Age': '25', 'City': 'LA'}]
48
+ ```
49
+
50
+ ## Features
51
+
52
+ - No pandas required - lightweight alternative to `pandas.read_html()`
53
+ - Supports `colspan` and `rowspan` attributes
54
+ - Handles duplicate headers (auto-numbered)
55
+ - Multiple output formats: lists, dict of columns, or list of dicts
56
+ - Automatic whitespace normalization
57
+ - Fast parsing with ```lxml```
58
+
59
+ ## API
60
+
61
+ ### `to_list(html: str, index: int = 0) -> list[list]`
62
+
63
+ Parse table as list of rows.
64
+
65
+ ### `to_dict(html: str, index: int = 0) -> dict[str, list]`
66
+
67
+ Parse table as dictionary of columns (first row = headers).
68
+
69
+ ### `to_dicts(html: str, index: int = 0) -> list[dict]`
70
+
71
+ Parse table as list of dictionaries (first row = headers).
@@ -0,0 +1,59 @@
1
+ # HTML Table Parse
2
+
3
+ A lightweight HTML table parser that converts tables to Python data structures without pandas.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install html-table-parse
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```python
14
+ from html_table_parse import to_list, to_dict, to_dicts
15
+
16
+ html = """
17
+ <table>
18
+ <tr><th>Name</th><th>Age</th><th>City</th></tr>
19
+ <tr><td>Alice</td><td>30</td><td>NYC</td></tr>
20
+ <tr><td>Bob</td><td>25</td><td>LA</td></tr>
21
+ </table>
22
+ """
23
+
24
+ # List of lists
25
+ to_list(html)
26
+ # [['Name', 'Age', 'City'], ['Alice', '30', 'NYC'], ['Bob', '25', 'LA']]
27
+
28
+ # Dictionary of columns
29
+ to_dict(html)
30
+ # {'Name': ['Alice', 'Bob'], 'Age': ['30', '25'], 'City': ['NYC', 'LA']}
31
+
32
+ # List of dictionaries
33
+ to_dicts(html)
34
+ # [{'Name': 'Alice', 'Age': '30', 'City': 'NYC'},
35
+ # {'Name': 'Bob', 'Age': '25', 'City': 'LA'}]
36
+ ```
37
+
38
+ ## Features
39
+
40
+ - No pandas required - lightweight alternative to `pandas.read_html()`
41
+ - Supports `colspan` and `rowspan` attributes
42
+ - Handles duplicate headers (auto-numbered)
43
+ - Multiple output formats: lists, dict of columns, or list of dicts
44
+ - Automatic whitespace normalization
45
+ - Fast parsing with ```lxml```
46
+
47
+ ## API
48
+
49
+ ### `to_list(html: str, index: int = 0) -> list[list]`
50
+
51
+ Parse table as list of rows.
52
+
53
+ ### `to_dict(html: str, index: int = 0) -> dict[str, list]`
54
+
55
+ Parse table as dictionary of columns (first row = headers).
56
+
57
+ ### `to_dicts(html: str, index: int = 0) -> list[dict]`
58
+
59
+ Parse table as list of dictionaries (first row = headers).
@@ -1,4 +1,4 @@
1
- __version__ = '0.2'
1
+ __version__ = '0.2.2'
2
2
 
3
3
  from collections import defaultdict as _defaultdict
4
4
  from functools import partial as _partial
@@ -4,11 +4,11 @@ build-backend = 'uv_build'
4
4
 
5
5
  [project]
6
6
  name = 'html-table-parse'
7
- version = "0.2"
7
+ version = "0.2.2"
8
8
  authors = [{ name = '5j9', email = '5j9@users.noreply.github.com' }]
9
9
  description = 'Parse HTML table as Python list or dict'
10
- readme = 'README.rst'
11
- requires-python = '>=3.14'
10
+ readme = 'README.md'
11
+ requires-python = '>=3.10'
12
12
  dependencies = [
13
13
  "lxml>=6.1.1",
14
14
  ]
@@ -1,15 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: html-table-parse
3
- Version: 0.2
4
- Summary: Parse HTML table as Python list or dict
5
- Author: 5j9
6
- Author-email: 5j9 <5j9@users.noreply.github.com>
7
- License: GPL-3.0
8
- Requires-Dist: lxml>=6.1.1
9
- Requires-Python: >=3.14
10
- Project-URL: Homepage, https://github.com/5j9/html-table-parse
11
- Description-Content-Type: text/x-rst
12
-
13
- Parse HTML table as Python list or dict
14
-
15
- Requires Python 3.14+
@@ -1,3 +0,0 @@
1
- Parse HTML table as Python list or dict
2
-
3
- Requires Python 3.14+