gdelt-client 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gdelt_client-0.1.0/.github/FUNDING.yml +3 -0
- gdelt_client-0.1.0/.github/workflows/lint.yml +32 -0
- gdelt_client-0.1.0/.github/workflows/publish.yml +37 -0
- gdelt_client-0.1.0/.github/workflows/test.yml +34 -0
- gdelt_client-0.1.0/.gitignore +68 -0
- gdelt_client-0.1.0/.python-version +1 -0
- gdelt_client-0.1.0/CHANGELOG.md +82 -0
- gdelt_client-0.1.0/LICENSE +21 -0
- gdelt_client-0.1.0/PKG-INFO +220 -0
- gdelt_client-0.1.0/README.md +204 -0
- gdelt_client-0.1.0/pyproject.toml +87 -0
- gdelt_client-0.1.0/src/gdelt_client/__init__.py +42 -0
- gdelt_client-0.1.0/src/gdelt_client/api_client.py +591 -0
- gdelt_client-0.1.0/src/gdelt_client/data/schemas/cameoCodes.json +848 -0
- gdelt_client-0.1.0/src/gdelt_client/data/schemas/eventsv2.json +388 -0
- gdelt_client-0.1.0/src/gdelt_client/data/schemas/gkgv2.json +157 -0
- gdelt_client-0.1.0/src/gdelt_client/data/schemas/mentions.json +103 -0
- gdelt_client-0.1.0/src/gdelt_client/enums.py +37 -0
- gdelt_client-0.1.0/src/gdelt_client/errors.py +57 -0
- gdelt_client-0.1.0/src/gdelt_client/filters.py +351 -0
- gdelt_client-0.1.0/src/gdelt_client/helpers.py +266 -0
- gdelt_client-0.1.0/src/gdelt_client/validation.py +92 -0
- gdelt_client-0.1.0/tests/__init__.py +0 -0
- gdelt_client-0.1.0/tests/test_client.py +1051 -0
- gdelt_client-0.1.0/tests/test_enums.py +47 -0
- gdelt_client-0.1.0/tests/test_errors.py +53 -0
- gdelt_client-0.1.0/tests/test_filters.py +182 -0
- gdelt_client-0.1.0/tests/test_helpers.py +144 -0
- gdelt_client-0.1.0/tests/test_validation.py +67 -0
- gdelt_client-0.1.0/uv.lock +1544 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
name: Run linting and type checking
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- main
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
unit-test:
|
|
13
|
+
name: python
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v6
|
|
18
|
+
|
|
19
|
+
- name: Install uv and set the Python version
|
|
20
|
+
uses: astral-sh/setup-uv@v7
|
|
21
|
+
|
|
22
|
+
- name: Install the project
|
|
23
|
+
run: uv sync --locked --dev
|
|
24
|
+
|
|
25
|
+
- name: Run ruff format check
|
|
26
|
+
run: uv run ruff format --check .
|
|
27
|
+
|
|
28
|
+
- name: Run ruff lint
|
|
29
|
+
run: uv run ruff check .
|
|
30
|
+
|
|
31
|
+
- name: Run mypy
|
|
32
|
+
run: uv run mypy ./src --cache-dir .mypy_cache
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: release-main
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v6
|
|
12
|
+
|
|
13
|
+
- name: Update project version
|
|
14
|
+
run: |
|
|
15
|
+
# Strip 'v' prefix from tag if present
|
|
16
|
+
VERSION="${TAG#v}"
|
|
17
|
+
|
|
18
|
+
# Validate semantic version format
|
|
19
|
+
if ! [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?(\+[a-zA-Z0-9.]+)?$ ]]; then
|
|
20
|
+
echo "Error: Invalid semantic version format: $VERSION"
|
|
21
|
+
exit 1
|
|
22
|
+
fi
|
|
23
|
+
|
|
24
|
+
sed -i "s/^version = [\"'].*[\"']/version = \"$VERSION\"/" pyproject.toml
|
|
25
|
+
env:
|
|
26
|
+
TAG: ${{ github.event.release.tag_name }}
|
|
27
|
+
|
|
28
|
+
- name: Install uv and set the Python version
|
|
29
|
+
uses: astral-sh/setup-uv@v7
|
|
30
|
+
|
|
31
|
+
- name: Build package
|
|
32
|
+
run: uv build
|
|
33
|
+
|
|
34
|
+
- name: Publish package
|
|
35
|
+
run: uv publish
|
|
36
|
+
env:
|
|
37
|
+
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: Run unit tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- main
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
unit-test:
|
|
13
|
+
name: python
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
python-version:
|
|
18
|
+
- "3.11"
|
|
19
|
+
- "3.12"
|
|
20
|
+
- "3.13"
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v6
|
|
24
|
+
|
|
25
|
+
- name: Install uv and set the Python version
|
|
26
|
+
uses: astral-sh/setup-uv@v7
|
|
27
|
+
with:
|
|
28
|
+
python-version: ${{ matrix.python-version }}
|
|
29
|
+
|
|
30
|
+
- name: Install the project
|
|
31
|
+
run: uv sync --locked --dev
|
|
32
|
+
|
|
33
|
+
- name: Run unit tests
|
|
34
|
+
run: uv run pytest tests --cov=src/gdelt_client --cov-report=xml --cov-report=term-missing -m "not integration"
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# Installer logs
|
|
30
|
+
pip-log.txt
|
|
31
|
+
pip-delete-this-directory.txt
|
|
32
|
+
|
|
33
|
+
# Unit tests / coverage reports
|
|
34
|
+
htmlcov/
|
|
35
|
+
.tox/
|
|
36
|
+
.nox/
|
|
37
|
+
.coverage
|
|
38
|
+
.coverage.*
|
|
39
|
+
.cache
|
|
40
|
+
nosetests.xml
|
|
41
|
+
coverage.xml
|
|
42
|
+
*.cover
|
|
43
|
+
*.py,cover
|
|
44
|
+
.hypothesis/
|
|
45
|
+
.pytest_cache/
|
|
46
|
+
cover/
|
|
47
|
+
|
|
48
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
49
|
+
__pypackages__/
|
|
50
|
+
|
|
51
|
+
# Environments
|
|
52
|
+
.env
|
|
53
|
+
.venv
|
|
54
|
+
env/
|
|
55
|
+
venv/
|
|
56
|
+
ENV/
|
|
57
|
+
env.bak/
|
|
58
|
+
venv.bak/
|
|
59
|
+
|
|
60
|
+
# mypy
|
|
61
|
+
.mypy_cache/
|
|
62
|
+
.dmypy.json
|
|
63
|
+
dmypy.json
|
|
64
|
+
.idea/
|
|
65
|
+
|
|
66
|
+
# IDE and local testing
|
|
67
|
+
.vscode/
|
|
68
|
+
example.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13.2
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 1.12.0
|
|
4
|
+
|
|
5
|
+
Raise custom errors when the GDELT API returns a non-200 status code (#66)
|
|
6
|
+
|
|
7
|
+
## 1.11.0
|
|
8
|
+
|
|
9
|
+
Allow datetimes to be passed to `start_date` and `end_date` (#63)
|
|
10
|
+
|
|
11
|
+
## 1.10.3
|
|
12
|
+
|
|
13
|
+
Handle empty API responses in timeline search (#62)
|
|
14
|
+
|
|
15
|
+
## 1.10.2
|
|
16
|
+
|
|
17
|
+
Fix `Unpack` type hint for older Python versions (#60)
|
|
18
|
+
|
|
19
|
+
## 1.10.1
|
|
20
|
+
|
|
21
|
+
Add workaround for domain bug to filter docstring (#54)
|
|
22
|
+
Handle 0 results in timeline search (#55)
|
|
23
|
+
|
|
24
|
+
## 1.10.0
|
|
25
|
+
|
|
26
|
+
Add support for `tone` and `tone_absolute` filters (#51)
|
|
27
|
+
Fix type hints in filters (#50)
|
|
28
|
+
|
|
29
|
+
## 1.9.0
|
|
30
|
+
|
|
31
|
+
Fix JSONDecodeError when loading bad responses from the API (#47)
|
|
32
|
+
|
|
33
|
+
## 1.8.0
|
|
34
|
+
|
|
35
|
+
Add multiple nears (#31)(#45)
|
|
36
|
+
|
|
37
|
+
## 1.7.0
|
|
38
|
+
|
|
39
|
+
Add the ability to filter based on 3 letter language (#38)
|
|
40
|
+
|
|
41
|
+
## 1.6.0
|
|
42
|
+
|
|
43
|
+
Only support Python 3.10 and above (#39)
|
|
44
|
+
Format all files with prettier (#40)
|
|
45
|
+
Update package dependencies (#41)
|
|
46
|
+
|
|
47
|
+
## 1.5.0
|
|
48
|
+
|
|
49
|
+
Provide user agent in requests to the API (#22)
|
|
50
|
+
|
|
51
|
+
## 1.4.0
|
|
52
|
+
|
|
53
|
+
Validate `timespan` filter parameter to make sure it's an allowed value
|
|
54
|
+
Catch API errors when a query string is invalid and return them to the user
|
|
55
|
+
|
|
56
|
+
## 1.3.3
|
|
57
|
+
|
|
58
|
+
Fix a bug in `multi_repeat` which meant any filter using `OR` would fail
|
|
59
|
+
|
|
60
|
+
## 1.3.2
|
|
61
|
+
|
|
62
|
+
Fix a bug in `multi_repeat` which caused a bad response from the API which could not be parsed
|
|
63
|
+
|
|
64
|
+
## 1.3.1
|
|
65
|
+
|
|
66
|
+
Fix bug when only the first of the filter conditions (eg. keyword, near, etc.) was used
|
|
67
|
+
|
|
68
|
+
## 1.3.0
|
|
69
|
+
|
|
70
|
+
Recursively load the JSON response to remove improper characters
|
|
71
|
+
|
|
72
|
+
## 1.2.0
|
|
73
|
+
|
|
74
|
+
Add support for filtering by timespan instead of start and end date
|
|
75
|
+
|
|
76
|
+
## 1.1.0
|
|
77
|
+
|
|
78
|
+
Adds support for multiple repeat filters
|
|
79
|
+
|
|
80
|
+
## 1.0.0
|
|
81
|
+
|
|
82
|
+
First version released
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Bob Merkus
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gdelt-client
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A client for the GDELT 2.0 API
|
|
5
|
+
Author-email: Bob Merkus <bob.merkus@gmail.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Requires-Python: >=3.11
|
|
11
|
+
Requires-Dist: aiohttp>=3.13.3
|
|
12
|
+
Requires-Dist: geopandas>=1.1.2
|
|
13
|
+
Requires-Dist: pandas>=3.0.0
|
|
14
|
+
Requires-Dist: requests>=2.32.5
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# GDELT 2.0 API Client
|
|
18
|
+
|
|
19
|
+
A Python client to fetch data from the [GDELT 2.0 API](https://gdeltproject.org/).
|
|
20
|
+
|
|
21
|
+
This client supports both the DOC API for article search and timelines, as well as direct access to GDELT's raw event data files (events, mentions, and GKG). This allows for simpler, small-scale analysis of news coverage and events data without having to deal with the complexities of downloading and managing the raw files from S3, or working with the BigQuery export.
|
|
22
|
+
|
|
23
|
+
The implementation has been forked from [gdeltdoc](https://github.com/alex9smith/gdelt-doc-api).
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
`gdelt-client` is on [PyPi](https://pypi.org/project/gdelt-client/) and is installed through pip:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install gdelt-client
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Use
|
|
34
|
+
|
|
35
|
+
### DOC API - Article Search & Timelines
|
|
36
|
+
|
|
37
|
+
Search for news articles and get timeline data via the GDELT DOC API.
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from gdelt_client import GdeltClient, Filters
|
|
41
|
+
|
|
42
|
+
f = Filters(
|
|
43
|
+
keyword="climate change",
|
|
44
|
+
start_date="2020-05-10",
|
|
45
|
+
end_date="2020-05-11"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
gd = GdeltClient()
|
|
49
|
+
|
|
50
|
+
# Search for articles matching the filters
|
|
51
|
+
articles = gd.article_search(f)
|
|
52
|
+
|
|
53
|
+
# Get a timeline of coverage volume
|
|
54
|
+
timeline = gd.timeline_search("timelinevol", f)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Async example:**
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
import asyncio
|
|
61
|
+
from gdelt_client import GdeltClient, Filters
|
|
62
|
+
|
|
63
|
+
async def main():
|
|
64
|
+
f = Filters(keyword="climate change", start_date="2020-05-10", end_date="2020-05-11")
|
|
65
|
+
|
|
66
|
+
# Use async context manager to properly cleanup resources
|
|
67
|
+
async with GdeltClient() as gd:
|
|
68
|
+
# Async article search
|
|
69
|
+
articles = await gd.aarticle_search(f)
|
|
70
|
+
|
|
71
|
+
# Async timeline search
|
|
72
|
+
timeline = await gd.atimeline_search("timelinevol", f)
|
|
73
|
+
|
|
74
|
+
asyncio.run(main())
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Raw Data Downloads - Events, Mentions & GKG
|
|
78
|
+
|
|
79
|
+
Download and parse GDELT's raw data files directly. Returns data with CAMEO code descriptions for events.
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from gdelt_client import GdeltClient, GdeltTable, OutputFormat
|
|
83
|
+
|
|
84
|
+
gd = GdeltClient()
|
|
85
|
+
|
|
86
|
+
# Download events for a single date
|
|
87
|
+
events = gd.search(
|
|
88
|
+
date="2020-05-10",
|
|
89
|
+
table=GdeltTable.EVENTS,
|
|
90
|
+
output=OutputFormat.DATAFRAME
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Download mentions for a date range with full 15-min coverage
|
|
94
|
+
mentions = gd.search(
|
|
95
|
+
date=["2020-05-10", "2020-05-11"],
|
|
96
|
+
table=GdeltTable.MENTIONS,
|
|
97
|
+
coverage=True # Download all 15-minute intervals
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Get GeoDataFrame with geometry for mapping
|
|
101
|
+
geo_events = gd.search(
|
|
102
|
+
date="2020-05-10",
|
|
103
|
+
table=GdeltTable.EVENTS,
|
|
104
|
+
output=OutputFormat.GEODATAFRAME
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# View table schema
|
|
108
|
+
schema = gd.schema(GdeltTable.EVENTS)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Async example** (downloads files concurrently for better performance):
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
import asyncio
|
|
115
|
+
from gdelt_client import GdeltClient, GdeltTable
|
|
116
|
+
|
|
117
|
+
async def main():
|
|
118
|
+
# Use async context manager to properly cleanup resources
|
|
119
|
+
async with GdeltClient() as gd:
|
|
120
|
+
# Async search with concurrent file downloads
|
|
121
|
+
events = await gd.asearch(
|
|
122
|
+
date=["2020-05-10", "2020-05-11"],
|
|
123
|
+
table=GdeltTable.EVENTS,
|
|
124
|
+
coverage=True
|
|
125
|
+
)
|
|
126
|
+
print(events[:5])
|
|
127
|
+
print(f"Total records {len(events)}")
|
|
128
|
+
asyncio.run(main())
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
**Available tables:** `EVENTS`, `MENTIONS`, `GKG`
|
|
132
|
+
**Available output formats:** `DATAFRAME`, `JSON`, `CSV`, `GEODATAFRAME`
|
|
133
|
+
|
|
134
|
+
### Article List
|
|
135
|
+
|
|
136
|
+
The `article_search()` method (and async `aarticle_search()`) generates a list of news articles that match the filters. Returns a pandas DataFrame with columns: `url`, `url_mobile`, `title`, `seendate`, `socialimage`, `domain`, `language`, `sourcecountry`.
|
|
137
|
+
|
|
138
|
+
### Timeline Search
|
|
139
|
+
|
|
140
|
+
The `timeline_search()` method (and async `atimeline_search()`) supports 5 modes:
|
|
141
|
+
|
|
142
|
+
- `timelinevol` - Timeline of coverage volume as a percentage of all monitored articles
|
|
143
|
+
- `timelinevolraw` - Timeline with actual article counts instead of percentages
|
|
144
|
+
- `timelinelang` - Coverage broken down by language (each language as a column)
|
|
145
|
+
- `timelinesourcecountry` - Coverage broken down by source country (each country as a column)
|
|
146
|
+
- `timelinetone` - Average tone of articles over time (see [GDELT docs](https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/) for tone metric details)
|
|
147
|
+
|
|
148
|
+
All modes return a pandas DataFrame with a `datetime` column and data columns.
|
|
149
|
+
|
|
150
|
+
### Filters
|
|
151
|
+
|
|
152
|
+
The search query passed to the API is constructed from a `gdelt_client.Filters` object.
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from gdelt_client import Filters, near, repeat
|
|
156
|
+
|
|
157
|
+
f = Filters(
|
|
158
|
+
start_date = "2020-05-01",
|
|
159
|
+
end_date = "2020-05-02",
|
|
160
|
+
num_records = 250,
|
|
161
|
+
keyword = "climate change",
|
|
162
|
+
domain = ["bbc.co.uk", "nytimes.com"],
|
|
163
|
+
country = ["UK", "US"],
|
|
164
|
+
theme = "GENERAL_HEALTH",
|
|
165
|
+
near = near(10, "airline", "carbon"),
|
|
166
|
+
repeat = repeat(5, "planet")
|
|
167
|
+
)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Filters for `keyword`, `domain`, `domain_exact`, `country`, `language` and `theme` can be passed either as a single string or as a list of strings. If a list is passed, the values in the list are wrappeed in a boolean OR.
|
|
171
|
+
|
|
172
|
+
You must pass either `start_date` and `end_date`, or `timespan`
|
|
173
|
+
|
|
174
|
+
- `start_date` - The start date for the filter in YYYY-MM-DD format or as a datetime object in UTC time.
|
|
175
|
+
Passing a datetime allows you to specify a time down to seconds granularity. The API officially only supports the most recent 3 months of articles. Making a request for an earlier date range may still return data, but it's not guaranteed.
|
|
176
|
+
- `end_date` - The end date for the filter in YYYY-MM-DD format or as a datetime object in UTC time.
|
|
177
|
+
- `timespan` - A timespan to search for, relative to the time of the request. Must match one of the API's timespan formats - https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/
|
|
178
|
+
- `num_records` - The number of records to return. Only used in article list mode and can be up to 250.
|
|
179
|
+
- `keyword` - Return articles containing the exact phrase `keyword` within the article text.
|
|
180
|
+
- `domain` - Return articles from the specified domain. Does not require an exact match so passing "cnn.com" will match articles from `cnn.com`, `subdomain.cnn.com` and `notactuallycnn.com`.
|
|
181
|
+
- `domain_exact` - Similar to `domain`, but requires an exact match.
|
|
182
|
+
- `country` - Return articles published in a country or list of countries, formatted as the FIPS 2 letter country code.
|
|
183
|
+
- `language` - Return articles published in the given language, formatted as the ISO 639 language code.
|
|
184
|
+
- `theme` - Return articles that cover one of GDELT's GKG Themes. A full list of themes can be found [here](http://data.gdeltproject.org/api/v2/guides/LOOKUP-GKGTHEMES.TXT)
|
|
185
|
+
- `near` - Return articles containing words close to each other in the text. Use `near()` to construct. eg. `near = near(5, "airline", "climate")`, or `multi_near()` if you want to use multiple restrictions eg. `multi_near([(5, "airline", "crisis"), (10, "airline", "climate", "change")], method="AND")` finds "airline" and "crisis" within 5 words, and "airline", "climate", and "change" within 10 words
|
|
186
|
+
- `repeat` - Return articles containing a single word repeated at least a number of times. Use `repeat()` to construct. eg. `repeat =repeat(3, "environment")`, or `multi_repeat()` if you want to use multiple restrictions eg. `repeat = multi_repeat([(2, "airline"), (3, "airport")], "AND")`
|
|
187
|
+
- `tone` - Return articles above or below a particular tone score (ie more positive or more negative than a certain threshold). To use, specify either a greater than or less than sign and a positive or negative number (either an integer or floating point number). To find fairly positive articles, use `tone=">5"` or to search for fairly negative articles, use `tone="<-5"`
|
|
188
|
+
- tone_absolute - The same as `tone` but ignores the positive/negative sign and lets you search for high emotion or low emotion articles, regardless of whether they were happy or sad in tone
|
|
189
|
+
|
|
190
|
+
## Attribution
|
|
191
|
+
|
|
192
|
+
The JSON schema data files in this package (`src/gdelt_client/data/schemas/`) are based on schemas from [gdeltPyR](https://github.com/linwoodc3/gdeltPyR), which is licensed under the GNU General Public License v3.0.
|
|
193
|
+
|
|
194
|
+
## Developing gdelt-client
|
|
195
|
+
|
|
196
|
+
PRs & issues are very welcome!
|
|
197
|
+
|
|
198
|
+
### Setup
|
|
199
|
+
|
|
200
|
+
It's recommended to use a virtual environment for development. Set one up with [uv](https://docs.astral.sh/uv/getting-started/installation/)
|
|
201
|
+
|
|
202
|
+
```
|
|
203
|
+
uv sync
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
Tests for this package use `pytest`. Run them with
|
|
207
|
+
|
|
208
|
+
```
|
|
209
|
+
uv run pytest tests --cov=src/gdelt_client --cov-report=xml --cov-report=term-missing
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
If your PR adds a new feature or helper, please also add some tests
|
|
213
|
+
|
|
214
|
+
### Publishing
|
|
215
|
+
|
|
216
|
+
There's a bit of automation set up to help publish a new version of the package to PyPI,
|
|
217
|
+
|
|
218
|
+
1. Make sure the version string has been updated since the last release. This package follows semantic versioning.
|
|
219
|
+
2. Create a new release in the Github UI, using the new version as the release name
|
|
220
|
+
3. Watch as the `publish.yml` Github action builds the package and pushes it to PyPI
|