html-table-parse 0.2__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- html_table_parse-0.2.2/PKG-INFO +71 -0
- html_table_parse-0.2.2/README.md +59 -0
- {html_table_parse-0.2 → html_table_parse-0.2.2}/html_table_parse/__init__.py +1 -1
- {html_table_parse-0.2 → html_table_parse-0.2.2}/pyproject.toml +3 -3
- html_table_parse-0.2/PKG-INFO +0 -15
- html_table_parse-0.2/README.rst +0 -3
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: html-table-parse
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: Parse HTML table as Python list or dict
|
|
5
|
+
Author: 5j9
|
|
6
|
+
Author-email: 5j9 <5j9@users.noreply.github.com>
|
|
7
|
+
License: GPL-3.0
|
|
8
|
+
Requires-Dist: lxml>=6.1.1
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Project-URL: Homepage, https://github.com/5j9/html-table-parse
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# HTML Table Parse
|
|
14
|
+
|
|
15
|
+
A lightweight HTML table parser that converts tables to Python data structures without pandas.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install html-table-parse
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Usage
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from html_table_parse import to_list, to_dict, to_dicts
|
|
27
|
+
|
|
28
|
+
html = """
|
|
29
|
+
<table>
|
|
30
|
+
<tr><th>Name</th><th>Age</th><th>City</th></tr>
|
|
31
|
+
<tr><td>Alice</td><td>30</td><td>NYC</td></tr>
|
|
32
|
+
<tr><td>Bob</td><td>25</td><td>LA</td></tr>
|
|
33
|
+
</table>
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# List of lists
|
|
37
|
+
to_list(html)
|
|
38
|
+
# [['Name', 'Age', 'City'], ['Alice', '30', 'NYC'], ['Bob', '25', 'LA']]
|
|
39
|
+
|
|
40
|
+
# Dictionary of columns
|
|
41
|
+
to_dict(html)
|
|
42
|
+
# {'Name': ['Alice', 'Bob'], 'Age': ['30', '25'], 'City': ['NYC', 'LA']}
|
|
43
|
+
|
|
44
|
+
# List of dictionaries
|
|
45
|
+
to_dicts(html)
|
|
46
|
+
# [{'Name': 'Alice', 'Age': '30', 'City': 'NYC'},
|
|
47
|
+
# {'Name': 'Bob', 'Age': '25', 'City': 'LA'}]
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Features
|
|
51
|
+
|
|
52
|
+
- No pandas required - lightweight alternative to `pandas.read_html()`
|
|
53
|
+
- Supports `colspan` and `rowspan` attributes
|
|
54
|
+
- Handles duplicate headers (auto-numbered)
|
|
55
|
+
- Multiple output formats: lists, dict of columns, or list of dicts
|
|
56
|
+
- Automatic whitespace normalization
|
|
57
|
+
- Fast parsing with ```lxml```
|
|
58
|
+
|
|
59
|
+
## API
|
|
60
|
+
|
|
61
|
+
### `to_list(html: str, index: int = 0) -> list[list]`
|
|
62
|
+
|
|
63
|
+
Parse table as list of rows.
|
|
64
|
+
|
|
65
|
+
### `to_dict(html: str, index: int = 0) -> dict[str, list]`
|
|
66
|
+
|
|
67
|
+
Parse table as dictionary of columns (first row = headers).
|
|
68
|
+
|
|
69
|
+
### `to_dicts(html: str, index: int = 0) -> list[dict]`
|
|
70
|
+
|
|
71
|
+
Parse table as list of dictionaries (first row = headers).
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# HTML Table Parse
|
|
2
|
+
|
|
3
|
+
A lightweight HTML table parser that converts tables to Python data structures without pandas.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install html-table-parse
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from html_table_parse import to_list, to_dict, to_dicts
|
|
15
|
+
|
|
16
|
+
html = """
|
|
17
|
+
<table>
|
|
18
|
+
<tr><th>Name</th><th>Age</th><th>City</th></tr>
|
|
19
|
+
<tr><td>Alice</td><td>30</td><td>NYC</td></tr>
|
|
20
|
+
<tr><td>Bob</td><td>25</td><td>LA</td></tr>
|
|
21
|
+
</table>
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# List of lists
|
|
25
|
+
to_list(html)
|
|
26
|
+
# [['Name', 'Age', 'City'], ['Alice', '30', 'NYC'], ['Bob', '25', 'LA']]
|
|
27
|
+
|
|
28
|
+
# Dictionary of columns
|
|
29
|
+
to_dict(html)
|
|
30
|
+
# {'Name': ['Alice', 'Bob'], 'Age': ['30', '25'], 'City': ['NYC', 'LA']}
|
|
31
|
+
|
|
32
|
+
# List of dictionaries
|
|
33
|
+
to_dicts(html)
|
|
34
|
+
# [{'Name': 'Alice', 'Age': '30', 'City': 'NYC'},
|
|
35
|
+
# {'Name': 'Bob', 'Age': '25', 'City': 'LA'}]
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Features
|
|
39
|
+
|
|
40
|
+
- No pandas required - lightweight alternative to `pandas.read_html()`
|
|
41
|
+
- Supports `colspan` and `rowspan` attributes
|
|
42
|
+
- Handles duplicate headers (auto-numbered)
|
|
43
|
+
- Multiple output formats: lists, dict of columns, or list of dicts
|
|
44
|
+
- Automatic whitespace normalization
|
|
45
|
+
- Fast parsing with ```lxml```
|
|
46
|
+
|
|
47
|
+
## API
|
|
48
|
+
|
|
49
|
+
### `to_list(html: str, index: int = 0) -> list[list]`
|
|
50
|
+
|
|
51
|
+
Parse table as list of rows.
|
|
52
|
+
|
|
53
|
+
### `to_dict(html: str, index: int = 0) -> dict[str, list]`
|
|
54
|
+
|
|
55
|
+
Parse table as dictionary of columns (first row = headers).
|
|
56
|
+
|
|
57
|
+
### `to_dicts(html: str, index: int = 0) -> list[dict]`
|
|
58
|
+
|
|
59
|
+
Parse table as list of dictionaries (first row = headers).
|
|
@@ -4,11 +4,11 @@ build-backend = 'uv_build'
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = 'html-table-parse'
|
|
7
|
-
version = "0.2"
|
|
7
|
+
version = "0.2.2"
|
|
8
8
|
authors = [{ name = '5j9', email = '5j9@users.noreply.github.com' }]
|
|
9
9
|
description = 'Parse HTML table as Python list or dict'
|
|
10
|
-
readme = 'README.
|
|
11
|
-
requires-python = '>=3.
|
|
10
|
+
readme = 'README.md'
|
|
11
|
+
requires-python = '>=3.10'
|
|
12
12
|
dependencies = [
|
|
13
13
|
"lxml>=6.1.1",
|
|
14
14
|
]
|
html_table_parse-0.2/PKG-INFO
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: html-table-parse
|
|
3
|
-
Version: 0.2
|
|
4
|
-
Summary: Parse HTML table as Python list or dict
|
|
5
|
-
Author: 5j9
|
|
6
|
-
Author-email: 5j9 <5j9@users.noreply.github.com>
|
|
7
|
-
License: GPL-3.0
|
|
8
|
-
Requires-Dist: lxml>=6.1.1
|
|
9
|
-
Requires-Python: >=3.14
|
|
10
|
-
Project-URL: Homepage, https://github.com/5j9/html-table-parse
|
|
11
|
-
Description-Content-Type: text/x-rst
|
|
12
|
-
|
|
13
|
-
Parse HTML table as Python list or dict
|
|
14
|
-
|
|
15
|
-
Requires Python 3.14+
|
html_table_parse-0.2/README.rst
DELETED