pylindol 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylindol-0.1.0/PKG-INFO +166 -0
- pylindol-0.1.0/README.md +149 -0
- pylindol-0.1.0/pyproject.toml +43 -0
- pylindol-0.1.0/src/pylindol/__init__.py +3 -0
- pylindol-0.1.0/src/pylindol/ca_certificates/GlobalSign RSA OV SSL CA 2018.pem +26 -0
- pylindol-0.1.0/src/pylindol/cli.py +35 -0
- pylindol-0.1.0/src/pylindol/config/__init__.py +3 -0
- pylindol-0.1.0/src/pylindol/config/paths.py +7 -0
- pylindol-0.1.0/src/pylindol/earthquake_info_scraper.py +181 -0
- pylindol-0.1.0/src/pylindol/tests/__init__.py +1 -0
- pylindol-0.1.0/src/pylindol/tests/test_cli.py +83 -0
- pylindol-0.1.0/src/pylindol/tests/test_earthquake_info_scraper.py +217 -0
pylindol-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: pylindol
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A tool to download earthquake report information from the Philippine Institute of Volcanology and Seismology (PHIVOLCS) website.
|
|
5
|
+
Author: clnhrn
|
|
6
|
+
Author-email: clnhrn <herniacln@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Requires-Dist: beautifulsoup4>=4.14.2
|
|
9
|
+
Requires-Dist: certifi>=2024.0.0
|
|
10
|
+
Requires-Dist: click>=8.1.8
|
|
11
|
+
Requires-Dist: loguru>=0.7.3
|
|
12
|
+
Requires-Dist: lxml>=6.0.2
|
|
13
|
+
Requires-Dist: pandas>=2.3.3
|
|
14
|
+
Requires-Dist: requests>=2.32.5
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# pylindol
|
|
19
|
+
|
|
20
|
+

|
|
21
|
+
|
|
22
|
+
pylindol is a lightweight and easy-to-use library designed to scrape or pull the latest earthquake data from the [Philippine Institute of Volcanology and Seismology (PHIVOLCS)](https://earthquake.phivolcs.dost.gov.ph) website. It provides a simple API to get up-to-date information for your applications, scripts, or research projects.
|
|
23
|
+
|
|
24
|
+
## Requirements
|
|
25
|
+
|
|
26
|
+
- Python >= 3.11
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
Install pylindol directly from PyPI:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install pylindol
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Using a virtual environment (recommended)
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Create a virtual environment
|
|
40
|
+
python3 -m venv .venv
|
|
41
|
+
|
|
42
|
+
# Activate the virtual environment
|
|
43
|
+
source .venv/bin/activate # On macOS/Linux
|
|
44
|
+
# .venv\Scripts\activate # On Windows
|
|
45
|
+
|
|
46
|
+
# Install pylindol
|
|
47
|
+
pip install pylindol
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Using uv
|
|
51
|
+
|
|
52
|
+
If you prefer using `uv` for faster package management:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv add pylindol
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Usage
|
|
59
|
+
|
|
60
|
+
### Command Line Interface (CLI)
|
|
61
|
+
|
|
62
|
+
The package provides the `pylindol` command after installation.
|
|
63
|
+
|
|
64
|
+
#### Basic usage (scrape current month)
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pylindol
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
#### Scrape a specific month and year
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pylindol --month 8 --year 2025
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
#### Specify custom output directory
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pylindol --output-path my_data
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
#### Combine options
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pylindol --month 9 --year 2025 --output-path archive
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
#### Get help
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pylindol --help
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Python Library
|
|
95
|
+
|
|
96
|
+
You can also use the scraper as a Python library in your code.
|
|
97
|
+
|
|
98
|
+
#### Import the class
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from pylindol import PhivolcsEarthquakeInfoScraper
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
#### Scrape current month
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
scraper = PhivolcsEarthquakeInfoScraper()
|
|
108
|
+
scraper.run()
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
#### Scrape specific month and year
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
scraper = PhivolcsEarthquakeInfoScraper(month=8, year=2025)
|
|
115
|
+
scraper.run()
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
#### Specify custom output path
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
scraper = PhivolcsEarthquakeInfoScraper(
|
|
122
|
+
month=9,
|
|
123
|
+
year=2025,
|
|
124
|
+
output_path="custom/directory"
|
|
125
|
+
)
|
|
126
|
+
scraper.run()
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Features
|
|
130
|
+
|
|
131
|
+
- ✅ Scrape current month's earthquake data
|
|
132
|
+
- ✅ Scrape historical data by month and year
|
|
133
|
+
- ✅ Automatic CA certificate handling for SSL connections
|
|
134
|
+
- ✅ Input validation (month range, year validation, and future date prevention)
|
|
135
|
+
- ✅ Export data to CSV format
|
|
136
|
+
- ✅ Structured logging with loguru
|
|
137
|
+
|
|
138
|
+
## Output
|
|
139
|
+
|
|
140
|
+
The scraper saves earthquake data as CSV files with the naming convention:
|
|
141
|
+
|
|
142
|
+
```
|
|
143
|
+
phivolcs_earthquake_data_{month}_{year}.csv
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**Default location:** `data/` directory (created automatically if it doesn't exist)
|
|
147
|
+
|
|
148
|
+
**Example:** `data/phivolcs_earthquake_data_10_2025.csv`
|
|
149
|
+
|
|
150
|
+
The CSV files contain earthquake information including date, time, magnitude, location, and depth.
|
|
151
|
+
|
|
152
|
+
## Development
|
|
153
|
+
|
|
154
|
+
If you want to contribute to pylindol or run it from source:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Clone the repository
|
|
158
|
+
git clone git@github.com:clnhrn/pylindol.git
|
|
159
|
+
cd pylindol
|
|
160
|
+
|
|
161
|
+
# Install in development mode
|
|
162
|
+
pip install -e .
|
|
163
|
+
|
|
164
|
+
# Or using uv
|
|
165
|
+
uv sync
|
|
166
|
+
```
|
pylindol-0.1.0/README.md
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# pylindol
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
pylindol is a lightweight and easy-to-use library designed to scrape or pull the latest earthquake data from the [Philippine Institute of Volcanology and Seismology (PHIVOLCS)](https://earthquake.phivolcs.dost.gov.ph) website. It provides a simple API to get up-to-date information for your applications, scripts, or research projects.
|
|
6
|
+
|
|
7
|
+
## Requirements
|
|
8
|
+
|
|
9
|
+
- Python >= 3.11
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
Install pylindol directly from PyPI:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install pylindol
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
### Using a virtual environment (recommended)
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Create a virtual environment
|
|
23
|
+
python3 -m venv .venv
|
|
24
|
+
|
|
25
|
+
# Activate the virtual environment
|
|
26
|
+
source .venv/bin/activate # On macOS/Linux
|
|
27
|
+
# .venv\Scripts\activate # On Windows
|
|
28
|
+
|
|
29
|
+
# Install pylindol
|
|
30
|
+
pip install pylindol
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Using uv
|
|
34
|
+
|
|
35
|
+
If you prefer using `uv` for faster package management:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
uv add pylindol
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Usage
|
|
42
|
+
|
|
43
|
+
### Command Line Interface (CLI)
|
|
44
|
+
|
|
45
|
+
The package provides the `pylindol` command after installation.
|
|
46
|
+
|
|
47
|
+
#### Basic usage (scrape current month)
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pylindol
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
#### Scrape a specific month and year
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pylindol --month 8 --year 2025
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
#### Specify custom output directory
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pylindol --output-path my_data
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
#### Combine options
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pylindol --month 9 --year 2025 --output-path archive
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
#### Get help
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pylindol --help
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Python Library
|
|
78
|
+
|
|
79
|
+
You can also use the scraper as a Python library in your code.
|
|
80
|
+
|
|
81
|
+
#### Import the class
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from pylindol import PhivolcsEarthquakeInfoScraper
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
#### Scrape current month
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
scraper = PhivolcsEarthquakeInfoScraper()
|
|
91
|
+
scraper.run()
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
#### Scrape specific month and year
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
scraper = PhivolcsEarthquakeInfoScraper(month=8, year=2025)
|
|
98
|
+
scraper.run()
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
#### Specify custom output path
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
scraper = PhivolcsEarthquakeInfoScraper(
|
|
105
|
+
month=9,
|
|
106
|
+
year=2025,
|
|
107
|
+
output_path="custom/directory"
|
|
108
|
+
)
|
|
109
|
+
scraper.run()
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Features
|
|
113
|
+
|
|
114
|
+
- ✅ Scrape current month's earthquake data
|
|
115
|
+
- ✅ Scrape historical data by month and year
|
|
116
|
+
- ✅ Automatic CA certificate handling for SSL connections
|
|
117
|
+
- ✅ Input validation (month range, year validation, and future date prevention)
|
|
118
|
+
- ✅ Export data to CSV format
|
|
119
|
+
- ✅ Structured logging with loguru
|
|
120
|
+
|
|
121
|
+
## Output
|
|
122
|
+
|
|
123
|
+
The scraper saves earthquake data as CSV files with the naming convention:
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
phivolcs_earthquake_data_{month}_{year}.csv
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**Default location:** `data/` directory (created automatically if it doesn't exist)
|
|
130
|
+
|
|
131
|
+
**Example:** `data/phivolcs_earthquake_data_10_2025.csv`
|
|
132
|
+
|
|
133
|
+
The CSV files contain earthquake information including date, time, magnitude, location, and depth.
|
|
134
|
+
|
|
135
|
+
## Development
|
|
136
|
+
|
|
137
|
+
If you want to contribute to pylindol or run it from source:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# Clone the repository
|
|
141
|
+
git clone git@github.com:clnhrn/pylindol.git
|
|
142
|
+
cd pylindol
|
|
143
|
+
|
|
144
|
+
# Install in development mode
|
|
145
|
+
pip install -e .
|
|
146
|
+
|
|
147
|
+
# Or using uv
|
|
148
|
+
uv sync
|
|
149
|
+
```
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pylindol"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A tool to download earthquake report information from the Philippine Institute of Volcanology and Seismology (PHIVOLCS) website."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = {text = "MIT"}
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "clnhrn", email = "herniacln@gmail.com" }
|
|
9
|
+
]
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"beautifulsoup4>=4.14.2",
|
|
13
|
+
"certifi>=2024.0.0",
|
|
14
|
+
"click>=8.1.8",
|
|
15
|
+
"loguru>=0.7.3",
|
|
16
|
+
"lxml>=6.0.2",
|
|
17
|
+
"pandas>=2.3.3",
|
|
18
|
+
"requests>=2.32.5",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.scripts]
|
|
22
|
+
pylindol = "pylindol.cli:main"
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["uv_build>=0.8.0,<0.9"]
|
|
26
|
+
build-backend = "uv_build"
|
|
27
|
+
|
|
28
|
+
[dependency-groups]
|
|
29
|
+
dev = [
|
|
30
|
+
"coverage-badge>=1.1.2",
|
|
31
|
+
"pre-commit>=4.3.0",
|
|
32
|
+
"pytest>=8.0.0",
|
|
33
|
+
"pytest-cov>=7.0.0",
|
|
34
|
+
"pytest-mock>=3.12.0",
|
|
35
|
+
"responses>=0.25.0",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[tool.pytest.ini_options]
|
|
39
|
+
testpaths = ["src/pylindol/tests"]
|
|
40
|
+
python_files = ["test_*.py"]
|
|
41
|
+
python_classes = ["Test*"]
|
|
42
|
+
python_functions = ["test_*"]
|
|
43
|
+
addopts = "-v --tb=short"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
-----BEGIN CERTIFICATE-----
|
|
2
|
+
MIIETjCCAzagAwIBAgINAe5fIh38YjvUMzqFVzANBgkqhkiG9w0BAQsFADBMMSAw
|
|
3
|
+
HgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMzETMBEGA1UEChMKR2xvYmFs
|
|
4
|
+
U2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjAeFw0xODExMjEwMDAwMDBaFw0yODEx
|
|
5
|
+
MjEwMDAwMDBaMFAxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9iYWxTaWduIG52
|
|
6
|
+
LXNhMSYwJAYDVQQDEx1HbG9iYWxTaWduIFJTQSBPViBTU0wgQ0EgMjAxODCCASIw
|
|
7
|
+
DQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKdaydUMGCEAI9WXD+uu3Vxoa2uP
|
|
8
|
+
UGATeoHLl+6OimGUSyZ59gSnKvuk2la77qCk8HuKf1UfR5NhDW5xUTolJAgvjOH3
|
|
9
|
+
idaSz6+zpz8w7bXfIa7+9UQX/dhj2S/TgVprX9NHsKzyqzskeU8fxy7quRU6fBhM
|
|
10
|
+
abO1IFkJXinDY+YuRluqlJBJDrnw9UqhCS98NE3QvADFBlV5Bs6i0BDxSEPouVq1
|
|
11
|
+
lVW9MdIbPYa+oewNEtssmSStR8JvA+Z6cLVwzM0nLKWMjsIYPJLJLnNvBhBWk0Cq
|
|
12
|
+
o8VS++XFBdZpaFwGue5RieGKDkFNm5KQConpFmvv73W+eka440eKHRwup08CAwEA
|
|
13
|
+
AaOCASkwggElMA4GA1UdDwEB/wQEAwIBhjASBgNVHRMBAf8ECDAGAQH/AgEAMB0G
|
|
14
|
+
A1UdDgQWBBT473/yzXhnqN5vjySNiPGHAwKz6zAfBgNVHSMEGDAWgBSP8Et/qC5F
|
|
15
|
+
JK5NUPpjmove4t0bvDA+BggrBgEFBQcBAQQyMDAwLgYIKwYBBQUHMAGGImh0dHA6
|
|
16
|
+
Ly9vY3NwMi5nbG9iYWxzaWduLmNvbS9yb290cjMwNgYDVR0fBC8wLTAroCmgJ4Yl
|
|
17
|
+
aHR0cDovL2NybC5nbG9iYWxzaWduLmNvbS9yb290LXIzLmNybDBHBgNVHSAEQDA+
|
|
18
|
+
MDwGBFUdIAAwNDAyBggrBgEFBQcCARYmaHR0cHM6Ly93d3cuZ2xvYmFsc2lnbi5j
|
|
19
|
+
b20vcmVwb3NpdG9yeS8wDQYJKoZIhvcNAQELBQADggEBAJmQyC1fQorUC2bbmANz
|
|
20
|
+
EdSIhlIoU4r7rd/9c446ZwTbw1MUcBQJfMPg+NccmBqixD7b6QDjynCy8SIwIVbb
|
|
21
|
+
0615XoFYC20UgDX1b10d65pHBf9ZjQCxQNqQmJYaumxtf4z1s4DfjGRzNpZ5eWl0
|
|
22
|
+
6r/4ngGPoJVpjemEuunl1Ig423g7mNA2eymw0lIYkN5SQwCuaifIFJ6GlazhgDEw
|
|
23
|
+
fpolu4usBCOmmQDo8dIm7A9+O4orkjgTHY+GzYZSR+Y0fFukAj6KYXwidlNalFMz
|
|
24
|
+
hriSqHKvoflShx8xpfywgVcvzfTO3PYkz6fiNJBonf6q8amaEsybwMbDqKWwIX7e
|
|
25
|
+
SPY=
|
|
26
|
+
-----END CERTIFICATE-----
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
from pylindol.earthquake_info_scraper import PhivolcsEarthquakeInfoScraper
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@click.command()
|
|
7
|
+
@click.option(
|
|
8
|
+
"--month",
|
|
9
|
+
type=int,
|
|
10
|
+
default=None,
|
|
11
|
+
help="Month to scrape (1-12). If not provided, scrapes current month.",
|
|
12
|
+
)
|
|
13
|
+
@click.option(
|
|
14
|
+
"--year",
|
|
15
|
+
type=int,
|
|
16
|
+
default=None,
|
|
17
|
+
help="Year to scrape. If not provided, scrapes current year.",
|
|
18
|
+
)
|
|
19
|
+
@click.option(
|
|
20
|
+
"--output-path",
|
|
21
|
+
type=str,
|
|
22
|
+
default="data",
|
|
23
|
+
help="Path to save the output CSV file. Default is 'data'.",
|
|
24
|
+
)
|
|
25
|
+
def main(month, year, output_path):
|
|
26
|
+
"""
|
|
27
|
+
Scrape earthquake information from PHIVOLCS website.
|
|
28
|
+
|
|
29
|
+
By default, scrapes the current month's data. You can specify a different
|
|
30
|
+
month and year to scrape historical data.
|
|
31
|
+
"""
|
|
32
|
+
scraper = PhivolcsEarthquakeInfoScraper(
|
|
33
|
+
month=month, year=year, output_path=output_path
|
|
34
|
+
)
|
|
35
|
+
scraper.run()
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
from io import StringIO
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import requests
|
|
8
|
+
from bs4 import BeautifulSoup
|
|
9
|
+
from loguru import logger
|
|
10
|
+
|
|
11
|
+
from pylindol.config.paths import CA_CERTIFICATE_PATH
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PhivolcsEarthquakeInfoScraper:
|
|
15
|
+
"""
|
|
16
|
+
This class is used to scrape the latest earthquake information from the
|
|
17
|
+
PHIVOLCS website.
|
|
18
|
+
|
|
19
|
+
You can either scrape the latest earthquake information or a specific month
|
|
20
|
+
and year. By default, it will scrape the latest earthquake information.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
month: Optional[int] = None,
|
|
26
|
+
year: Optional[int] = None,
|
|
27
|
+
output_path: Optional[str] = "data",
|
|
28
|
+
):
|
|
29
|
+
"""
|
|
30
|
+
Initialize the scraper.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
month: The month to scrape.
|
|
34
|
+
year: The year to scrape.
|
|
35
|
+
output_path: The path to export the dataframe.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
self.base_url = "https://earthquake.phivolcs.dost.gov.ph"
|
|
39
|
+
self.output_path = output_path
|
|
40
|
+
|
|
41
|
+
if month is not None and year is None:
|
|
42
|
+
raise ValueError("If month is provided, year must also be provided.")
|
|
43
|
+
elif month is None and year is not None:
|
|
44
|
+
raise ValueError("If year is provided, month must also be provided.")
|
|
45
|
+
|
|
46
|
+
if month is not None and year is not None:
|
|
47
|
+
self.month = self._validate_month_input(month)
|
|
48
|
+
self.year = self._validate_year_input(year)
|
|
49
|
+
month_name = datetime(self.year, self.month, 1).strftime("%B")
|
|
50
|
+
self.month_url = (
|
|
51
|
+
f"{self.base_url}/EQLatest-Monthly/{self.year}/"
|
|
52
|
+
f"{self.year}_{month_name}.html"
|
|
53
|
+
)
|
|
54
|
+
else:
|
|
55
|
+
self.month = datetime.now().month
|
|
56
|
+
self.year = datetime.now().year
|
|
57
|
+
|
|
58
|
+
def _validate_month_input(self, month: int) -> int:
|
|
59
|
+
"""
|
|
60
|
+
Validate the month input.
|
|
61
|
+
"""
|
|
62
|
+
if month is not None and month < 1 or month > 12:
|
|
63
|
+
raise ValueError((f"Month must be between 1 and 12. You provided {month}."))
|
|
64
|
+
return month
|
|
65
|
+
|
|
66
|
+
def _validate_year_input(self, year: int) -> int:
|
|
67
|
+
"""
|
|
68
|
+
Validate the year input.
|
|
69
|
+
"""
|
|
70
|
+
if year is not None and (year < 1900 or year > datetime.now().year):
|
|
71
|
+
raise ValueError(
|
|
72
|
+
(
|
|
73
|
+
"Year must be greater than 1900 and less than the current year "
|
|
74
|
+
f"({datetime.now().year}). You provided {year}."
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
return year
|
|
78
|
+
|
|
79
|
+
def extract_main_page(self) -> bytes:
|
|
80
|
+
"""
|
|
81
|
+
Scrape the main earthquake data page of the PHIVOLCS website.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
bytes: The content of the main page.
|
|
85
|
+
"""
|
|
86
|
+
try:
|
|
87
|
+
with requests.Session() as session:
|
|
88
|
+
response = session.get(self.base_url, verify=CA_CERTIFICATE_PATH)
|
|
89
|
+
response.raise_for_status()
|
|
90
|
+
return response.content
|
|
91
|
+
except Exception as e:
|
|
92
|
+
logger.error(f"Error extracting main page: {e}")
|
|
93
|
+
raise
|
|
94
|
+
|
|
95
|
+
def extract_month_page(self) -> bytes:
|
|
96
|
+
"""
|
|
97
|
+
Scrape the monthly earthquake data page of the PHIVOLCS website.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
bytes: The content of the monthly page.
|
|
101
|
+
"""
|
|
102
|
+
try:
|
|
103
|
+
with requests.Session() as session:
|
|
104
|
+
response = session.get(self.month_url, verify=CA_CERTIFICATE_PATH)
|
|
105
|
+
response.raise_for_status()
|
|
106
|
+
return response.content
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.error(f"Error extracting month page: {e}")
|
|
109
|
+
raise
|
|
110
|
+
|
|
111
|
+
def extract_target_table(self, page: bytes) -> pd.DataFrame:
|
|
112
|
+
"""
|
|
113
|
+
Extract the target table from the page.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
page: The content of the page in bytes.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
pd.DataFrame: Dataframe of the target table.
|
|
120
|
+
"""
|
|
121
|
+
soup = BeautifulSoup(page, "html.parser")
|
|
122
|
+
tables = pd.read_html(StringIO(soup.prettify()))
|
|
123
|
+
return tables[2]
|
|
124
|
+
|
|
125
|
+
def _export_to_csv(self, df: pd.DataFrame):
|
|
126
|
+
"""
|
|
127
|
+
Export the dataframe to a CSV file.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
df: The dataframe to export.
|
|
131
|
+
output_path: The path to export the dataframe.
|
|
132
|
+
"""
|
|
133
|
+
Path(self.output_path).mkdir(exist_ok=True, parents=True)
|
|
134
|
+
file_name = (
|
|
135
|
+
Path(self.output_path)
|
|
136
|
+
/ f"phivolcs_earthquake_data_{self.month}_{self.year}.csv"
|
|
137
|
+
)
|
|
138
|
+
df.to_csv(file_name, index=False)
|
|
139
|
+
logger.info(f"Exported data to {file_name}")
|
|
140
|
+
|
|
141
|
+
def _run_main_scrape(self):
|
|
142
|
+
"""
|
|
143
|
+
Run the scraper for the main page.
|
|
144
|
+
"""
|
|
145
|
+
page = self.extract_main_page()
|
|
146
|
+
table = self.extract_target_table(page)
|
|
147
|
+
self._export_to_csv(table)
|
|
148
|
+
|
|
149
|
+
def _run_month_scrape(self):
|
|
150
|
+
"""
|
|
151
|
+
Run the scraper for the month page.
|
|
152
|
+
"""
|
|
153
|
+
page = self.extract_month_page()
|
|
154
|
+
table = self.extract_target_table(page)
|
|
155
|
+
self._export_to_csv(table)
|
|
156
|
+
|
|
157
|
+
def run(self):
|
|
158
|
+
"""
|
|
159
|
+
Run the scraper.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
output_path: The path to export the dataframe.
|
|
163
|
+
"""
|
|
164
|
+
target_date = date(self.year, self.month, 1)
|
|
165
|
+
current_date = date.today().replace(day=1)
|
|
166
|
+
if target_date > current_date:
|
|
167
|
+
raise ValueError(
|
|
168
|
+
(
|
|
169
|
+
f"Month {self.month} of year {self.year} is in the future. "
|
|
170
|
+
"Please provide a month-year combination that is current "
|
|
171
|
+
"or in the past."
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
elif self.month == datetime.now().month and self.year == datetime.now().year:
|
|
175
|
+
logger.info(
|
|
176
|
+
f"Scraping main (current month) page: {self.month} of {self.year}"
|
|
177
|
+
)
|
|
178
|
+
self._run_main_scrape()
|
|
179
|
+
else:
|
|
180
|
+
logger.info(f"Scraping month {self.month} of year {self.year}")
|
|
181
|
+
self._run_month_scrape()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Tests for the pylindol package."""
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Tests for the CLI interface."""
|
|
2
|
+
|
|
3
|
+
import responses
|
|
4
|
+
from click.testing import CliRunner
|
|
5
|
+
|
|
6
|
+
from pylindol.cli import main
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestCLI:
|
|
10
|
+
"""Test CLI functionality."""
|
|
11
|
+
|
|
12
|
+
def test_cli_help(self):
|
|
13
|
+
"""Test that CLI help command works."""
|
|
14
|
+
runner = CliRunner()
|
|
15
|
+
result = runner.invoke(main, ["--help"])
|
|
16
|
+
|
|
17
|
+
assert result.exit_code == 0
|
|
18
|
+
expected = "Scrape earthquake information from PHIVOLCS website"
|
|
19
|
+
assert expected in result.output
|
|
20
|
+
assert "--month" in result.output
|
|
21
|
+
assert "--year" in result.output
|
|
22
|
+
assert "--output-path" in result.output
|
|
23
|
+
|
|
24
|
+
@responses.activate
|
|
25
|
+
def test_cli_with_valid_options(self, tmp_path, monkeypatch):
|
|
26
|
+
"""Test CLI with valid month and year options."""
|
|
27
|
+
# Mock the HTTP response
|
|
28
|
+
mock_html = """
|
|
29
|
+
<html>
|
|
30
|
+
<body>
|
|
31
|
+
<table><tr><td>Table 1</td></tr></table>
|
|
32
|
+
<table><tr><td>Table 2</td></tr></table>
|
|
33
|
+
<table>
|
|
34
|
+
<tr><th>Date</th><th>Magnitude</th></tr>
|
|
35
|
+
<tr><td>2025-08-01</td><td>5.0</td></tr>
|
|
36
|
+
</table>
|
|
37
|
+
</body>
|
|
38
|
+
</html>
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
url = (
|
|
42
|
+
"https://earthquake.phivolcs.dost.gov.ph/"
|
|
43
|
+
"EQLatest-Monthly/2025/2025_August.html"
|
|
44
|
+
)
|
|
45
|
+
responses.add(
|
|
46
|
+
responses.GET,
|
|
47
|
+
url,
|
|
48
|
+
body=mock_html,
|
|
49
|
+
status=200,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
runner = CliRunner()
|
|
53
|
+
result = runner.invoke(
|
|
54
|
+
main, ["--month", "8", "--year", "2025", "--output-path", str(tmp_path)]
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Should succeed
|
|
58
|
+
assert result.exit_code == 0
|
|
59
|
+
|
|
60
|
+
# Check that CSV file was created
|
|
61
|
+
import os
|
|
62
|
+
|
|
63
|
+
csv_files = [f for f in os.listdir(tmp_path) if f.endswith(".csv")]
|
|
64
|
+
assert len(csv_files) == 1
|
|
65
|
+
assert "phivolcs_earthquake_data_8_2025.csv" in csv_files[0]
|
|
66
|
+
|
|
67
|
+
def test_cli_with_invalid_month(self):
|
|
68
|
+
"""Test CLI rejects invalid month."""
|
|
69
|
+
runner = CliRunner()
|
|
70
|
+
result = runner.invoke(main, ["--month", "13", "--year", "2025"])
|
|
71
|
+
|
|
72
|
+
# Should fail with error
|
|
73
|
+
assert result.exit_code != 0
|
|
74
|
+
|
|
75
|
+
def test_cli_with_only_month(self):
|
|
76
|
+
"""Test CLI rejects month without year."""
|
|
77
|
+
runner = CliRunner()
|
|
78
|
+
result = runner.invoke(main, ["--month", "8"])
|
|
79
|
+
|
|
80
|
+
# Should fail with error
|
|
81
|
+
assert result.exit_code != 0
|
|
82
|
+
# Check that exception was raised (it won't be in output with Click)
|
|
83
|
+
assert isinstance(result.exception, ValueError)
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""Tests for the PhivolcsEarthquakeInfoScraper class."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pytest
|
|
7
|
+
import responses
|
|
8
|
+
|
|
9
|
+
from pylindol.earthquake_info_scraper import PhivolcsEarthquakeInfoScraper
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestPhivolcsEarthquakeInfoScraperInit:
|
|
13
|
+
"""Test initialization of the scraper."""
|
|
14
|
+
|
|
15
|
+
def test_init_with_no_params(self):
|
|
16
|
+
"""Test initialization with no parameters defaults to current month."""
|
|
17
|
+
scraper = PhivolcsEarthquakeInfoScraper()
|
|
18
|
+
assert scraper.month == datetime.now().month
|
|
19
|
+
assert scraper.year == datetime.now().year
|
|
20
|
+
assert scraper.output_path == "data"
|
|
21
|
+
|
|
22
|
+
def test_init_with_valid_month_and_year(self):
|
|
23
|
+
"""Test initialization with valid month and year."""
|
|
24
|
+
scraper = PhivolcsEarthquakeInfoScraper(month=8, year=2025)
|
|
25
|
+
assert scraper.month == 8
|
|
26
|
+
assert scraper.year == 2025
|
|
27
|
+
assert "2025_August.html" in scraper.month_url
|
|
28
|
+
|
|
29
|
+
def test_init_with_custom_output_path(self):
|
|
30
|
+
"""Test initialization with custom output path."""
|
|
31
|
+
scraper = PhivolcsEarthquakeInfoScraper(
|
|
32
|
+
month=8, year=2025, output_path="custom/path"
|
|
33
|
+
)
|
|
34
|
+
assert scraper.output_path == "custom/path"
|
|
35
|
+
|
|
36
|
+
def test_init_with_only_month_raises_error(self):
|
|
37
|
+
"""Test that providing only month raises ValueError."""
|
|
38
|
+
with pytest.raises(ValueError, match="year must also be provided"):
|
|
39
|
+
PhivolcsEarthquakeInfoScraper(month=8)
|
|
40
|
+
|
|
41
|
+
def test_init_with_only_year_raises_error(self):
|
|
42
|
+
"""Test that providing only year raises ValueError."""
|
|
43
|
+
with pytest.raises(ValueError, match="month must also be provided"):
|
|
44
|
+
PhivolcsEarthquakeInfoScraper(year=2025)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class TestPhivolcsEarthquakeInfoScraperValidation:
|
|
48
|
+
"""Test validation methods of the scraper."""
|
|
49
|
+
|
|
50
|
+
def test_month_validation_rejects_less_than_one(self):
|
|
51
|
+
"""Test that month < 1 raises ValueError."""
|
|
52
|
+
with pytest.raises(ValueError, match="Month must be between 1 and 12"):
|
|
53
|
+
PhivolcsEarthquakeInfoScraper(month=0, year=2025)
|
|
54
|
+
|
|
55
|
+
def test_month_validation_rejects_greater_than_twelve(self):
|
|
56
|
+
"""Test that month > 12 raises ValueError."""
|
|
57
|
+
with pytest.raises(ValueError, match="Month must be between 1 and 12"):
|
|
58
|
+
PhivolcsEarthquakeInfoScraper(month=13, year=2025)
|
|
59
|
+
|
|
60
|
+
def test_year_validation_rejects_too_old(self):
|
|
61
|
+
"""Test that year < 1900 raises ValueError."""
|
|
62
|
+
with pytest.raises(ValueError, match="Year must be greater than 1900"):
|
|
63
|
+
PhivolcsEarthquakeInfoScraper(month=1, year=1899)
|
|
64
|
+
|
|
65
|
+
def test_year_validation_rejects_future(self):
|
|
66
|
+
"""Test that year > current year raises ValueError."""
|
|
67
|
+
future_year = datetime.now().year + 1
|
|
68
|
+
with pytest.raises(ValueError, match="less than the current year"):
|
|
69
|
+
PhivolcsEarthquakeInfoScraper(month=1, year=future_year)
|
|
70
|
+
|
|
71
|
+
def test_future_date_validation_in_run(self, tmp_path):
|
|
72
|
+
"""Test that run() rejects future dates."""
|
|
73
|
+
now = datetime.now()
|
|
74
|
+
if now.month < 12:
|
|
75
|
+
future_month = now.month + 1
|
|
76
|
+
future_year = now.year
|
|
77
|
+
else:
|
|
78
|
+
future_month = 1
|
|
79
|
+
future_year = now.year + 1
|
|
80
|
+
|
|
81
|
+
scraper = PhivolcsEarthquakeInfoScraper(
|
|
82
|
+
month=future_month, year=future_year, output_path=str(tmp_path)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
with pytest.raises(ValueError, match="is in the future"):
|
|
86
|
+
scraper.run()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class TestPhivolcsEarthquakeInfoScraperScraping:
|
|
90
|
+
"""Test scraping functionality with mocked requests."""
|
|
91
|
+
|
|
92
|
+
@responses.activate
|
|
93
|
+
def test_extract_main_page_success(self):
|
|
94
|
+
"""Test successful extraction of main page."""
|
|
95
|
+
mock_html = "<html><body>Test content</body></html>"
|
|
96
|
+
responses.add(
|
|
97
|
+
responses.GET,
|
|
98
|
+
"https://earthquake.phivolcs.dost.gov.ph",
|
|
99
|
+
body=mock_html,
|
|
100
|
+
status=200,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
scraper = PhivolcsEarthquakeInfoScraper()
|
|
104
|
+
content = scraper.extract_main_page()
|
|
105
|
+
assert content == mock_html.encode()
|
|
106
|
+
|
|
107
|
+
@responses.activate
|
|
108
|
+
def test_extract_month_page_success(self):
|
|
109
|
+
"""Test successful extraction of monthly page."""
|
|
110
|
+
mock_html = "<html><body>Monthly data</body></html>"
|
|
111
|
+
url = (
|
|
112
|
+
"https://earthquake.phivolcs.dost.gov.ph/"
|
|
113
|
+
"EQLatest-Monthly/2025/2025_August.html"
|
|
114
|
+
)
|
|
115
|
+
responses.add(
|
|
116
|
+
responses.GET,
|
|
117
|
+
url,
|
|
118
|
+
body=mock_html,
|
|
119
|
+
status=200,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
scraper = PhivolcsEarthquakeInfoScraper(month=8, year=2025)
|
|
123
|
+
content = scraper.extract_month_page()
|
|
124
|
+
assert content == mock_html.encode()
|
|
125
|
+
|
|
126
|
+
def test_extract_target_table(self):
|
|
127
|
+
"""Test extraction of target table from HTML."""
|
|
128
|
+
# Create mock HTML with 3 tables (we extract the 3rd one)
|
|
129
|
+
mock_html = """
|
|
130
|
+
<html>
|
|
131
|
+
<body>
|
|
132
|
+
<table><tr><td>Table 1</td></tr></table>
|
|
133
|
+
<table><tr><td>Table 2</td></tr></table>
|
|
134
|
+
<table>
|
|
135
|
+
<tr><th>Date</th><th>Magnitude</th></tr>
|
|
136
|
+
<tr><td>2025-08-01</td><td>5.0</td></tr>
|
|
137
|
+
</table>
|
|
138
|
+
</body>
|
|
139
|
+
</html>
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
scraper = PhivolcsEarthquakeInfoScraper()
|
|
143
|
+
df = scraper.extract_target_table(mock_html.encode())
|
|
144
|
+
|
|
145
|
+
assert isinstance(df, pd.DataFrame)
|
|
146
|
+
assert len(df) > 0
|
|
147
|
+
|
|
148
|
+
@responses.activate
|
|
149
|
+
def test_run_with_current_month(self, tmp_path):
|
|
150
|
+
"""Test run method for current month with mocked response."""
|
|
151
|
+
# Create mock HTML with proper table structure
|
|
152
|
+
mock_html = """
|
|
153
|
+
<html>
|
|
154
|
+
<body>
|
|
155
|
+
<table><tr><td>Table 1</td></tr></table>
|
|
156
|
+
<table><tr><td>Table 2</td></tr></table>
|
|
157
|
+
<table>
|
|
158
|
+
<tr><th>Date</th><th>Magnitude</th></tr>
|
|
159
|
+
<tr><td>2025-10-01</td><td>5.0</td></tr>
|
|
160
|
+
<tr><td>2025-10-02</td><td>4.5</td></tr>
|
|
161
|
+
</table>
|
|
162
|
+
</body>
|
|
163
|
+
</html>
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
responses.add(
|
|
167
|
+
responses.GET,
|
|
168
|
+
"https://earthquake.phivolcs.dost.gov.ph",
|
|
169
|
+
body=mock_html,
|
|
170
|
+
status=200,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
scraper = PhivolcsEarthquakeInfoScraper(output_path=str(tmp_path))
|
|
174
|
+
scraper.run()
|
|
175
|
+
|
|
176
|
+
# Check that CSV was created
|
|
177
|
+
now = datetime.now()
|
|
178
|
+
expected_file = (
|
|
179
|
+
tmp_path / f"phivolcs_earthquake_data_{now.month}_{now.year}.csv"
|
|
180
|
+
)
|
|
181
|
+
assert expected_file.exists()
|
|
182
|
+
|
|
183
|
+
@responses.activate
|
|
184
|
+
def test_run_with_specific_month(self, tmp_path):
|
|
185
|
+
"""Test run method for specific month with mocked response."""
|
|
186
|
+
mock_html = """
|
|
187
|
+
<html>
|
|
188
|
+
<body>
|
|
189
|
+
<table><tr><td>Table 1</td></tr></table>
|
|
190
|
+
<table><tr><td>Table 2</td></tr></table>
|
|
191
|
+
<table>
|
|
192
|
+
<tr><th>Date</th><th>Magnitude</th></tr>
|
|
193
|
+
<tr><td>2025-08-01</td><td>5.0</td></tr>
|
|
194
|
+
</table>
|
|
195
|
+
</body>
|
|
196
|
+
</html>
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
url = (
|
|
200
|
+
"https://earthquake.phivolcs.dost.gov.ph/"
|
|
201
|
+
"EQLatest-Monthly/2025/2025_August.html"
|
|
202
|
+
)
|
|
203
|
+
responses.add(
|
|
204
|
+
responses.GET,
|
|
205
|
+
url,
|
|
206
|
+
body=mock_html,
|
|
207
|
+
status=200,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
scraper = PhivolcsEarthquakeInfoScraper(
|
|
211
|
+
month=8, year=2025, output_path=str(tmp_path)
|
|
212
|
+
)
|
|
213
|
+
scraper.run()
|
|
214
|
+
|
|
215
|
+
# Check that CSV was created
|
|
216
|
+
expected_file = tmp_path / "phivolcs_earthquake_data_8_2025.csv"
|
|
217
|
+
assert expected_file.exists()
|