gdelt-client 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. gdelt_client-0.1.0/.github/FUNDING.yml +3 -0
  2. gdelt_client-0.1.0/.github/workflows/lint.yml +32 -0
  3. gdelt_client-0.1.0/.github/workflows/publish.yml +37 -0
  4. gdelt_client-0.1.0/.github/workflows/test.yml +34 -0
  5. gdelt_client-0.1.0/.gitignore +68 -0
  6. gdelt_client-0.1.0/.python-version +1 -0
  7. gdelt_client-0.1.0/CHANGELOG.md +82 -0
  8. gdelt_client-0.1.0/LICENSE +21 -0
  9. gdelt_client-0.1.0/PKG-INFO +220 -0
  10. gdelt_client-0.1.0/README.md +204 -0
  11. gdelt_client-0.1.0/pyproject.toml +87 -0
  12. gdelt_client-0.1.0/src/gdelt_client/__init__.py +42 -0
  13. gdelt_client-0.1.0/src/gdelt_client/api_client.py +591 -0
  14. gdelt_client-0.1.0/src/gdelt_client/data/schemas/cameoCodes.json +848 -0
  15. gdelt_client-0.1.0/src/gdelt_client/data/schemas/eventsv2.json +388 -0
  16. gdelt_client-0.1.0/src/gdelt_client/data/schemas/gkgv2.json +157 -0
  17. gdelt_client-0.1.0/src/gdelt_client/data/schemas/mentions.json +103 -0
  18. gdelt_client-0.1.0/src/gdelt_client/enums.py +37 -0
  19. gdelt_client-0.1.0/src/gdelt_client/errors.py +57 -0
  20. gdelt_client-0.1.0/src/gdelt_client/filters.py +351 -0
  21. gdelt_client-0.1.0/src/gdelt_client/helpers.py +266 -0
  22. gdelt_client-0.1.0/src/gdelt_client/validation.py +92 -0
  23. gdelt_client-0.1.0/tests/__init__.py +0 -0
  24. gdelt_client-0.1.0/tests/test_client.py +1051 -0
  25. gdelt_client-0.1.0/tests/test_enums.py +47 -0
  26. gdelt_client-0.1.0/tests/test_errors.py +53 -0
  27. gdelt_client-0.1.0/tests/test_filters.py +182 -0
  28. gdelt_client-0.1.0/tests/test_helpers.py +144 -0
  29. gdelt_client-0.1.0/tests/test_validation.py +67 -0
  30. gdelt_client-0.1.0/uv.lock +1544 -0
@@ -0,0 +1,3 @@
1
+ # These are supported funding model platforms
2
+
3
+ github: BobMerkus
@@ -0,0 +1,32 @@
1
+ name: Run linting and type checking
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+ unit-test:
13
+ name: python
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - uses: actions/checkout@v6
18
+
19
+ - name: Install uv and set the Python version
20
+ uses: astral-sh/setup-uv@v7
21
+
22
+ - name: Install the project
23
+ run: uv sync --locked --dev
24
+
25
+ - name: Run ruff format check
26
+ run: uv run ruff format --check .
27
+
28
+ - name: Run ruff lint
29
+ run: uv run ruff check .
30
+
31
+ - name: Run mypy
32
+ run: uv run mypy ./src --cache-dir .mypy_cache
@@ -0,0 +1,37 @@
1
+ name: release-main
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v6
12
+
13
+ - name: Update project version
14
+ run: |
15
+ # Strip 'v' prefix from tag if present
16
+ VERSION="${TAG#v}"
17
+
18
+ # Validate semantic version format
19
+ if ! [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?(\+[a-zA-Z0-9.]+)?$ ]]; then
20
+ echo "Error: Invalid semantic version format: $VERSION"
21
+ exit 1
22
+ fi
23
+
24
+ sed -i "s/^version = [\"'].*[\"']/version = \"$VERSION\"/" pyproject.toml
25
+ env:
26
+ TAG: ${{ github.event.release.tag_name }}
27
+
28
+ - name: Install uv and set the Python version
29
+ uses: astral-sh/setup-uv@v7
30
+
31
+ - name: Build package
32
+ run: uv build
33
+
34
+ - name: Publish package
35
+ run: uv publish
36
+ env:
37
+ UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
@@ -0,0 +1,34 @@
1
+ name: Run unit tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+ unit-test:
13
+ name: python
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ python-version:
18
+ - "3.11"
19
+ - "3.12"
20
+ - "3.13"
21
+
22
+ steps:
23
+ - uses: actions/checkout@v6
24
+
25
+ - name: Install uv and set the Python version
26
+ uses: astral-sh/setup-uv@v7
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+
30
+ - name: Install the project
31
+ run: uv sync --locked --dev
32
+
33
+ - name: Run unit tests
34
+ run: uv run pytest tests --cov=src/gdelt_client --cov-report=xml --cov-report=term-missing -m "not integration"
@@ -0,0 +1,68 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # Installer logs
30
+ pip-log.txt
31
+ pip-delete-this-directory.txt
32
+
33
+ # Unit tests / coverage reports
34
+ htmlcov/
35
+ .tox/
36
+ .nox/
37
+ .coverage
38
+ .coverage.*
39
+ .cache
40
+ nosetests.xml
41
+ coverage.xml
42
+ *.cover
43
+ *.py,cover
44
+ .hypothesis/
45
+ .pytest_cache/
46
+ cover/
47
+
48
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
49
+ __pypackages__/
50
+
51
+ # Environments
52
+ .env
53
+ .venv
54
+ env/
55
+ venv/
56
+ ENV/
57
+ env.bak/
58
+ venv.bak/
59
+
60
+ # mypy
61
+ .mypy_cache/
62
+ .dmypy.json
63
+ dmypy.json
64
+ .idea/
65
+
66
+ # IDE and local testing
67
+ .vscode/
68
+ example.py
@@ -0,0 +1 @@
1
+ 3.13.2
@@ -0,0 +1,82 @@
1
+ # Changelog
2
+
3
+ ## 1.12.0
4
+
5
+ Raise custom errors when the GDELT API returns a non-200 status code (#66)
6
+
7
+ ## 1.11.0
8
+
9
+ Allow datetimes to be passed to `start_date` and `end_date` (#63)
10
+
11
+ ## 1.10.3
12
+
13
+ Handle empty API responses in timeline search (#62)
14
+
15
+ ## 1.10.2
16
+
17
+ Fix `Unpack` type hint for older Python versions (#60)
18
+
19
+ ## 1.10.1
20
+
21
+ Add workaround for domain bug to filter docstring (#54)
22
+ Handle 0 results in timeline search (#55)
23
+
24
+ ## 1.10.0
25
+
26
+ Add support for `tone` and `tone_absolute` filters (#51)
27
+ Fix type hints in filters (#50)
28
+
29
+ ## 1.9.0
30
+
31
+ Fix JSONDecodeError when loading bad responses from the API (#47)
32
+
33
+ ## 1.8.0
34
+
35
+ Add multiple nears (#31)(#45)
36
+
37
+ ## 1.7.0
38
+
39
+ Add the ability to filter based on 3 letter language (#38)
40
+
41
+ ## 1.6.0
42
+
43
+ Only support Python 3.10 and above (#39)
44
+ Format all files with prettier (#40)
45
+ Update package dependencies (#41)
46
+
47
+ ## 1.5.0
48
+
49
+ Provide user agent in requests to the API (#22)
50
+
51
+ ## 1.4.0
52
+
53
+ Validate `timespan` filter parameter to make sure it's an allowed value
54
+ Catch API errors when a query string is invalid and return them to the user
55
+
56
+ ## 1.3.3
57
+
58
+ Fix a bug in `multi_repeat` which meant any filter using `OR` would fail
59
+
60
+ ## 1.3.2
61
+
62
+ Fix a bug in `multi_repeat` which caused a bad response from the API which could not be parsed
63
+
64
+ ## 1.3.1
65
+
66
+ Fix bug when only the first of the filter conditions (eg. keyword, near, etc.) was used
67
+
68
+ ## 1.3.0
69
+
70
+ Recursively load the JSON response to remove improper characters
71
+
72
+ ## 1.2.0
73
+
74
+ Add support for filtering by timespan instead of start and end date
75
+
76
+ ## 1.1.0
77
+
78
+ Adds support for multiple repeat filters
79
+
80
+ ## 1.0.0
81
+
82
+ First version released
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Bob Merkus
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: gdelt-client
3
+ Version: 0.1.0
4
+ Summary: A client for the GDELT 2.0 API
5
+ Author-email: Bob Merkus <bob.merkus@gmail.com>
6
+ License-File: LICENSE
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 3
10
+ Requires-Python: >=3.11
11
+ Requires-Dist: aiohttp>=3.13.3
12
+ Requires-Dist: geopandas>=1.1.2
13
+ Requires-Dist: pandas>=3.0.0
14
+ Requires-Dist: requests>=2.32.5
15
+ Description-Content-Type: text/markdown
16
+
17
+ # GDELT 2.0 API Client
18
+
19
+ A Python client to fetch data from the [GDELT 2.0 API](https://gdeltproject.org/).
20
+
21
+ This client supports both the DOC API for article search and timelines, as well as direct access to GDELT's raw event data files (events, mentions, and GKG). This allows for simpler, small-scale analysis of news coverage and events data without having to deal with the complexities of downloading and managing the raw files from S3, or working with the BigQuery export.
22
+
23
+ The implementation has been forked from [gdeltdoc](https://github.com/alex9smith/gdelt-doc-api).
24
+
25
+ ## Installation
26
+
27
+ `gdelt-client` is on [PyPi](https://pypi.org/project/gdelt-client/) and is installed through pip:
28
+
29
+ ```bash
30
+ pip install gdelt-client
31
+ ```
32
+
33
+ ## Use
34
+
35
+ ### DOC API - Article Search & Timelines
36
+
37
+ Search for news articles and get timeline data via the GDELT DOC API.
38
+
39
+ ```python
40
+ from gdelt_client import GdeltClient, Filters
41
+
42
+ f = Filters(
43
+ keyword="climate change",
44
+ start_date="2020-05-10",
45
+ end_date="2020-05-11"
46
+ )
47
+
48
+ gd = GdeltClient()
49
+
50
+ # Search for articles matching the filters
51
+ articles = gd.article_search(f)
52
+
53
+ # Get a timeline of coverage volume
54
+ timeline = gd.timeline_search("timelinevol", f)
55
+ ```
56
+
57
+ **Async example:**
58
+
59
+ ```python
60
+ import asyncio
61
+ from gdelt_client import GdeltClient, Filters
62
+
63
+ async def main():
64
+ f = Filters(keyword="climate change", start_date="2020-05-10", end_date="2020-05-11")
65
+
66
+ # Use async context manager to properly cleanup resources
67
+ async with GdeltClient() as gd:
68
+ # Async article search
69
+ articles = await gd.aarticle_search(f)
70
+
71
+ # Async timeline search
72
+ timeline = await gd.atimeline_search("timelinevol", f)
73
+
74
+ asyncio.run(main())
75
+ ```
76
+
77
+ ### Raw Data Downloads - Events, Mentions & GKG
78
+
79
+ Download and parse GDELT's raw data files directly. Returns data with CAMEO code descriptions for events.
80
+
81
+ ```python
82
+ from gdelt_client import GdeltClient, GdeltTable, OutputFormat
83
+
84
+ gd = GdeltClient()
85
+
86
+ # Download events for a single date
87
+ events = gd.search(
88
+ date="2020-05-10",
89
+ table=GdeltTable.EVENTS,
90
+ output=OutputFormat.DATAFRAME
91
+ )
92
+
93
+ # Download mentions for a date range with full 15-min coverage
94
+ mentions = gd.search(
95
+ date=["2020-05-10", "2020-05-11"],
96
+ table=GdeltTable.MENTIONS,
97
+ coverage=True # Download all 15-minute intervals
98
+ )
99
+
100
+ # Get GeoDataFrame with geometry for mapping
101
+ geo_events = gd.search(
102
+ date="2020-05-10",
103
+ table=GdeltTable.EVENTS,
104
+ output=OutputFormat.GEODATAFRAME
105
+ )
106
+
107
+ # View table schema
108
+ schema = gd.schema(GdeltTable.EVENTS)
109
+ ```
110
+
111
+ **Async example** (downloads files concurrently for better performance):
112
+
113
+ ```python
114
+ import asyncio
115
+ from gdelt_client import GdeltClient, GdeltTable
116
+
117
+ async def main():
118
+ # Use async context manager to properly cleanup resources
119
+ async with GdeltClient() as gd:
120
+ # Async search with concurrent file downloads
121
+ events = await gd.asearch(
122
+ date=["2020-05-10", "2020-05-11"],
123
+ table=GdeltTable.EVENTS,
124
+ coverage=True
125
+ )
126
+ print(events[:5])
127
+ print(f"Total records {len(events)}")
128
+ asyncio.run(main())
129
+ ```
130
+
131
+ **Available tables:** `EVENTS`, `MENTIONS`, `GKG`
132
+ **Available output formats:** `DATAFRAME`, `JSON`, `CSV`, `GEODATAFRAME`
133
+
134
+ ### Article List
135
+
136
+ The `article_search()` method (and async `aarticle_search()`) generates a list of news articles that match the filters. Returns a pandas DataFrame with columns: `url`, `url_mobile`, `title`, `seendate`, `socialimage`, `domain`, `language`, `sourcecountry`.
137
+
138
+ ### Timeline Search
139
+
140
+ The `timeline_search()` method (and async `atimeline_search()`) supports 5 modes:
141
+
142
+ - `timelinevol` - Timeline of coverage volume as a percentage of all monitored articles
143
+ - `timelinevolraw` - Timeline with actual article counts instead of percentages
144
+ - `timelinelang` - Coverage broken down by language (each language as a column)
145
+ - `timelinesourcecountry` - Coverage broken down by source country (each country as a column)
146
+ - `timelinetone` - Average tone of articles over time (see [GDELT docs](https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/) for tone metric details)
147
+
148
+ All modes return a pandas DataFrame with a `datetime` column and data columns.
149
+
150
+ ### Filters
151
+
152
+ The search query passed to the API is constructed from a `gdelt_client.Filters` object.
153
+
154
+ ```python
155
+ from gdelt_client import Filters, near, repeat
156
+
157
+ f = Filters(
158
+ start_date = "2020-05-01",
159
+ end_date = "2020-05-02",
160
+ num_records = 250,
161
+ keyword = "climate change",
162
+ domain = ["bbc.co.uk", "nytimes.com"],
163
+ country = ["UK", "US"],
164
+ theme = "GENERAL_HEALTH",
165
+ near = near(10, "airline", "carbon"),
166
+ repeat = repeat(5, "planet")
167
+ )
168
+ ```
169
+
170
+ Filters for `keyword`, `domain`, `domain_exact`, `country`, `language` and `theme` can be passed either as a single string or as a list of strings. If a list is passed, the values in the list are wrappeed in a boolean OR.
171
+
172
+ You must pass either `start_date` and `end_date`, or `timespan`
173
+
174
+ - `start_date` - The start date for the filter in YYYY-MM-DD format or as a datetime object in UTC time.
175
+ Passing a datetime allows you to specify a time down to seconds granularity. The API officially only supports the most recent 3 months of articles. Making a request for an earlier date range may still return data, but it's not guaranteed.
176
+ - `end_date` - The end date for the filter in YYYY-MM-DD format or as a datetime object in UTC time.
177
+ - `timespan` - A timespan to search for, relative to the time of the request. Must match one of the API's timespan formats - https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/
178
+ - `num_records` - The number of records to return. Only used in article list mode and can be up to 250.
179
+ - `keyword` - Return articles containing the exact phrase `keyword` within the article text.
180
+ - `domain` - Return articles from the specified domain. Does not require an exact match so passing "cnn.com" will match articles from `cnn.com`, `subdomain.cnn.com` and `notactuallycnn.com`.
181
+ - `domain_exact` - Similar to `domain`, but requires an exact match.
182
+ - `country` - Return articles published in a country or list of countries, formatted as the FIPS 2 letter country code.
183
+ - `language` - Return articles published in the given language, formatted as the ISO 639 language code.
184
+ - `theme` - Return articles that cover one of GDELT's GKG Themes. A full list of themes can be found [here](http://data.gdeltproject.org/api/v2/guides/LOOKUP-GKGTHEMES.TXT)
185
+ - `near` - Return articles containing words close to each other in the text. Use `near()` to construct. eg. `near = near(5, "airline", "climate")`, or `multi_near()` if you want to use multiple restrictions eg. `multi_near([(5, "airline", "crisis"), (10, "airline", "climate", "change")], method="AND")` finds "airline" and "crisis" within 5 words, and "airline", "climate", and "change" within 10 words
186
+ - `repeat` - Return articles containing a single word repeated at least a number of times. Use `repeat()` to construct. eg. `repeat =repeat(3, "environment")`, or `multi_repeat()` if you want to use multiple restrictions eg. `repeat = multi_repeat([(2, "airline"), (3, "airport")], "AND")`
187
+ - `tone` - Return articles above or below a particular tone score (ie more positive or more negative than a certain threshold). To use, specify either a greater than or less than sign and a positive or negative number (either an integer or floating point number). To find fairly positive articles, use `tone=">5"` or to search for fairly negative articles, use `tone="<-5"`
188
+ - tone_absolute - The same as `tone` but ignores the positive/negative sign and lets you search for high emotion or low emotion articles, regardless of whether they were happy or sad in tone
189
+
190
+ ## Attribution
191
+
192
+ The JSON schema data files in this package (`src/gdelt_client/data/schemas/`) are based on schemas from [gdeltPyR](https://github.com/linwoodc3/gdeltPyR), which is licensed under the GNU General Public License v3.0.
193
+
194
+ ## Developing gdelt-client
195
+
196
+ PRs & issues are very welcome!
197
+
198
+ ### Setup
199
+
200
+ It's recommended to use a virtual environment for development. Set one up with [uv](https://docs.astral.sh/uv/getting-started/installation/)
201
+
202
+ ```
203
+ uv sync
204
+ ```
205
+
206
+ Tests for this package use `pytest`. Run them with
207
+
208
+ ```
209
+ uv run pytest tests --cov=src/gdelt_client --cov-report=xml --cov-report=term-missing
210
+ ```
211
+
212
+ If your PR adds a new feature or helper, please also add some tests
213
+
214
+ ### Publishing
215
+
216
+ There's a bit of automation set up to help publish a new version of the package to PyPI,
217
+
218
+ 1. Make sure the version string has been updated since the last release. This package follows semantic versioning.
219
+ 2. Create a new release in the Github UI, using the new version as the release name
220
+ 3. Watch as the `publish.yml` Github action builds the package and pushes it to PyPI