pageviewapi 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pageviewapi/__init__.py +25 -0
- pageviewapi/client.py +206 -0
- pageviewapi/period.py +41 -0
- pageviewapi-0.5.0.dist-info/METADATA +65 -0
- pageviewapi-0.5.0.dist-info/RECORD +7 -0
- pageviewapi-0.5.0.dist-info/WHEEL +4 -0
- pageviewapi-0.5.0.dist-info/licenses/LICENSE +22 -0
pageviewapi/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Python client for the Wikimedia Pageview API."""
|
|
2
|
+
|
|
3
|
+
from pageviewapi.client import (
|
|
4
|
+
PageviewResponse,
|
|
5
|
+
ThrottlingException,
|
|
6
|
+
ZeroOrDataNotLoadedException,
|
|
7
|
+
__version__,
|
|
8
|
+
aggregate,
|
|
9
|
+
legacy_pagecounts,
|
|
10
|
+
per_article,
|
|
11
|
+
top,
|
|
12
|
+
unique_devices,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"__version__",
|
|
17
|
+
"aggregate",
|
|
18
|
+
"PageviewResponse",
|
|
19
|
+
"legacy_pagecounts",
|
|
20
|
+
"per_article",
|
|
21
|
+
"ThrottlingException",
|
|
22
|
+
"top",
|
|
23
|
+
"unique_devices",
|
|
24
|
+
"ZeroOrDataNotLoadedException",
|
|
25
|
+
]
|
pageviewapi/client.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Client to wikimedia pageview api.
|
|
2
|
+
|
|
3
|
+
API doc: https://wikitech.wikimedia.org/wiki/Analytics/AQS/Pageview_API
|
|
4
|
+
Supported endpoints:
|
|
5
|
+
- per-article
|
|
6
|
+
- top
|
|
7
|
+
- aggregate
|
|
8
|
+
- unique-devices
|
|
9
|
+
- legacy/pagecounts
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
13
|
+
from typing import Any
|
|
14
|
+
from urllib.parse import quote, unquote
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
__version__ = version("pageviewapi")
|
|
20
|
+
except PackageNotFoundError:
|
|
21
|
+
__version__ = "0.4.0"
|
|
22
|
+
|
|
23
|
+
PROJECT_URL = "https://github.com/Commonists/pageview-api"
|
|
24
|
+
USER_AGENT = {"User-Agent": f"Python pageview-api client v{__version__} <{PROJECT_URL}>"}
|
|
25
|
+
|
|
26
|
+
API_BASE_URL = "https://wikimedia.org/api/rest_v1/metrics"
|
|
27
|
+
|
|
28
|
+
PA_ENDPOINT = "pageviews/per-article"
|
|
29
|
+
PA_ARGS = "{project}/{access}/{agent}/{page}/{granularity}/{start}/{end}"
|
|
30
|
+
|
|
31
|
+
TOP_ENDPOINT = "pageviews/top"
|
|
32
|
+
TOP_ARGS = "{project}/{access}/{year}/{month}/{day}"
|
|
33
|
+
|
|
34
|
+
AG_ENDPOINT = "pageviews/aggregate"
|
|
35
|
+
AG_ARGS = "{project}/{access}/{agent}/{granularity}/{start}/{end}"
|
|
36
|
+
|
|
37
|
+
UD_ENDPOINT = "unique-devices"
|
|
38
|
+
UD_ARGS = "{project}/{access}/{granularity}/{start}/{end}"
|
|
39
|
+
|
|
40
|
+
PC_ENDPOINT = "legacy/pagecounts/aggregate"
|
|
41
|
+
PC_ARGS = "{project}/{access_site}/{granularity}/{start}/{end}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class PageviewResponse(dict): # type: ignore[type-arg]
|
|
45
|
+
"""A Wikimedia Pageview API response with attribute-style read access.
|
|
46
|
+
|
|
47
|
+
Recursively wraps nested dicts and lists so the full response tree is
|
|
48
|
+
navigable via attributes. Dict data keys always take priority over built-in
|
|
49
|
+
dict methods, so ``response.items`` returns the ``items`` data value rather
|
|
50
|
+
than the ``dict.items`` method when that key is present.
|
|
51
|
+
|
|
52
|
+
Use ``from_json`` rather than the constructor directly when the input may
|
|
53
|
+
contain nested dicts or lists — the constructor only wraps the top level.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __getattribute__(self, key: str) -> Any:
|
|
57
|
+
if not key.startswith("_"):
|
|
58
|
+
try:
|
|
59
|
+
return dict.__getitem__(self, key)
|
|
60
|
+
except KeyError:
|
|
61
|
+
pass
|
|
62
|
+
return super().__getattribute__(key)
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def from_json(cls, obj: Any) -> Any:
|
|
66
|
+
"""Recursively convert a JSON value into a ``PageviewResponse`` tree."""
|
|
67
|
+
if isinstance(obj, dict):
|
|
68
|
+
return cls({k: cls.from_json(v) for k, v in obj.items()})
|
|
69
|
+
if isinstance(obj, list):
|
|
70
|
+
return [cls.from_json(item) for item in obj]
|
|
71
|
+
return obj
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ZeroOrDataNotLoadedException(Exception):
|
|
75
|
+
"""Raised on 404 — no data or data not yet filled.
|
|
76
|
+
|
|
77
|
+
https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ThrottlingException(Exception):
|
|
82
|
+
"""Raised on 429 — client is sending too many requests.
|
|
83
|
+
|
|
84
|
+
https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def per_article(
|
|
89
|
+
project: str,
|
|
90
|
+
page: str,
|
|
91
|
+
start: str,
|
|
92
|
+
end: str,
|
|
93
|
+
access: str = "all-access",
|
|
94
|
+
agent: str = "all-agents",
|
|
95
|
+
granularity: str = "daily",
|
|
96
|
+
) -> dict[str, Any]:
|
|
97
|
+
"""Per-article pageview counts.
|
|
98
|
+
|
|
99
|
+
>>> import pageviewapi
|
|
100
|
+
>>> pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120')
|
|
101
|
+
"""
|
|
102
|
+
args = PA_ARGS.format(
|
|
103
|
+
project=project,
|
|
104
|
+
page=_quote_article_name(page),
|
|
105
|
+
start=start,
|
|
106
|
+
end=end,
|
|
107
|
+
access=access,
|
|
108
|
+
agent=agent,
|
|
109
|
+
granularity=granularity,
|
|
110
|
+
)
|
|
111
|
+
return _api(PA_ENDPOINT, args)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def top(
|
|
115
|
+
project: str,
|
|
116
|
+
year: int | str,
|
|
117
|
+
month: int | str,
|
|
118
|
+
day: int | str,
|
|
119
|
+
access: str = "all-access",
|
|
120
|
+
) -> dict[str, Any]:
|
|
121
|
+
"""Top 1000 most visited articles for a project on a given date.
|
|
122
|
+
|
|
123
|
+
>>> import pageviewapi
|
|
124
|
+
>>> views = pageviewapi.top('fr.wikipedia', 2015, 11, 14)
|
|
125
|
+
>>> views['items'][0]['articles'][0]
|
|
126
|
+
{'article': 'Wikipédia:Accueil_principal', 'rank': 1, 'views': 1600547}
|
|
127
|
+
"""
|
|
128
|
+
args = TOP_ARGS.format(project=project, access=access, year=year, month=month, day=day)
|
|
129
|
+
return _api(TOP_ENDPOINT, args)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def aggregate(
|
|
133
|
+
project: str,
|
|
134
|
+
start: str,
|
|
135
|
+
end: str,
|
|
136
|
+
access: str = "all-access",
|
|
137
|
+
agent: str = "all-agents",
|
|
138
|
+
granularity: str = "daily",
|
|
139
|
+
) -> dict[str, Any]:
|
|
140
|
+
"""Aggregate pageview counts for a project.
|
|
141
|
+
|
|
142
|
+
>>> import pageviewapi
|
|
143
|
+
>>> pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100')
|
|
144
|
+
"""
|
|
145
|
+
args = AG_ARGS.format(
|
|
146
|
+
project=project,
|
|
147
|
+
start=start,
|
|
148
|
+
end=end,
|
|
149
|
+
access=access,
|
|
150
|
+
agent=agent,
|
|
151
|
+
granularity=granularity,
|
|
152
|
+
)
|
|
153
|
+
return _api(AG_ENDPOINT, args)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def unique_devices(
|
|
157
|
+
project: str,
|
|
158
|
+
start: str,
|
|
159
|
+
end: str,
|
|
160
|
+
access: str = "all-access",
|
|
161
|
+
granularity: str = "daily",
|
|
162
|
+
) -> dict[str, Any]:
|
|
163
|
+
"""Unique devices accessing a project."""
|
|
164
|
+
args = UD_ARGS.format(project=project, start=start, end=end, access=access, granularity=granularity)
|
|
165
|
+
return _api(UD_ENDPOINT, args)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def legacy_pagecounts(
|
|
169
|
+
project: str,
|
|
170
|
+
start: str,
|
|
171
|
+
end: str,
|
|
172
|
+
access_site: str = "all-sites",
|
|
173
|
+
granularity: str = "daily",
|
|
174
|
+
) -> dict[str, Any]:
|
|
175
|
+
"""Legacy pagecounts aggregate.
|
|
176
|
+
|
|
177
|
+
>>> import pageviewapi
|
|
178
|
+
>>> pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100')
|
|
179
|
+
"""
|
|
180
|
+
project_arg = "all-projects" if project == "all-projects" else f"{project}.org"
|
|
181
|
+
args = PC_ARGS.format(
|
|
182
|
+
project=project_arg,
|
|
183
|
+
start=start,
|
|
184
|
+
end=end,
|
|
185
|
+
access_site=access_site,
|
|
186
|
+
granularity=granularity,
|
|
187
|
+
)
|
|
188
|
+
return _api(PC_ENDPOINT, args)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _api(end_point: str, args: str, api_url: str = API_BASE_URL) -> dict[str, Any]:
|
|
192
|
+
url = "/".join([api_url, end_point, args])
|
|
193
|
+
response = requests.get(url, headers=USER_AGENT)
|
|
194
|
+
if response.status_code == 200:
|
|
195
|
+
return PageviewResponse.from_json(response.json())
|
|
196
|
+
elif response.status_code == 404:
|
|
197
|
+
raise ZeroOrDataNotLoadedException()
|
|
198
|
+
elif response.status_code == 429:
|
|
199
|
+
raise ThrottlingException()
|
|
200
|
+
else:
|
|
201
|
+
response.raise_for_status()
|
|
202
|
+
return {} # unreachable, satisfies type checker
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _quote_article_name(page: str) -> str:
|
|
206
|
+
return quote(unquote(page), safe="")
|
pageviewapi/period.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Helper functions for aggregating pageviews over a time period."""
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
|
|
5
|
+
import pageviewapi.client
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def sum_last(
|
|
9
|
+
project: str,
|
|
10
|
+
page: str,
|
|
11
|
+
last: int = 30,
|
|
12
|
+
agent: str = "all-agents",
|
|
13
|
+
access: str = "all-access",
|
|
14
|
+
) -> int:
|
|
15
|
+
"""Total pageviews for a page over the last N days."""
|
|
16
|
+
views = pageviewapi.client.per_article(project, page, _days_ago(last), _today(), access=access, agent=agent)
|
|
17
|
+
return sum(daily["views"] for daily in views["items"])
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def avg_last(
|
|
21
|
+
project: str,
|
|
22
|
+
page: str,
|
|
23
|
+
last: int = 30,
|
|
24
|
+
agent: str = "all-agents",
|
|
25
|
+
access: str = "all-access",
|
|
26
|
+
) -> float:
|
|
27
|
+
"""Average daily pageviews for a page over the last N days."""
|
|
28
|
+
views = pageviewapi.client.per_article(project, page, _days_ago(last), _today(), access=access, agent=agent)
|
|
29
|
+
return _avg([daily["views"] for daily in views["items"]])
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _today() -> str:
|
|
33
|
+
return datetime.date.today().strftime("%Y%m%d")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _days_ago(days: int) -> str:
|
|
37
|
+
return (datetime.date.today() - datetime.timedelta(days=days)).strftime("%Y%m%d")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _avg(values: list[int | float]) -> float:
|
|
41
|
+
return sum(values) / len(values)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pageviewapi
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Wikimedia Pageview API client
|
|
5
|
+
Project-URL: Homepage, https://github.com/Commonists/pageview-api
|
|
6
|
+
Author-email: Commonists <ps.huard@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Requires-Dist: requests>=2.0
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# pageview-api
|
|
14
|
+
[](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml)
|
|
15
|
+
[](https://codecov.io/gh/Commonists/pageview-api)
|
|
16
|
+
[](https://pypi.python.org/pypi/pageviewapi)
|
|
17
|
+
[](http://opensource.org/licenses/MIT)
|
|
18
|
+
|
|
19
|
+
Wikimedia Pageview API client for Python 3.10+
|
|
20
|
+
|
|
21
|
+
Installation
|
|
22
|
+
------------
|
|
23
|
+
In order to install system wide on system using sudo you can use:
|
|
24
|
+
```sh
|
|
25
|
+
pip install git+https://github.com/Commonists/pageview-api.git
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Examples
|
|
29
|
+
--------
|
|
30
|
+
|
|
31
|
+
Number of view on English Wikipedia of article Paris from November 6th to November 20th 2015
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import pageviewapi
|
|
35
|
+
pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120',
|
|
36
|
+
access='all-access', agent='all-agents', granularity='daily')
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Aggregation: Get a daily pageview count timeseries of all projects for the month of October 2015
|
|
40
|
+
```python
|
|
41
|
+
import pageviewapi
|
|
42
|
+
pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100', access='all-access',
|
|
43
|
+
agent='all-agents', granularity='daily')
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Most viewed articles on French Wikipedia on November 14th, 2015
|
|
47
|
+
```python
|
|
48
|
+
import pageviewapi
|
|
49
|
+
pageviewapi.top('fr.wikipedia', 2015, 11, 14, access='all-access')
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Sum (resp. average) of view during last 30 days
|
|
53
|
+
```python
|
|
54
|
+
import pageviewapi.period
|
|
55
|
+
pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
|
|
56
|
+
access='all-access', agent='all-agents')
|
|
57
|
+
|
|
58
|
+
pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Monthly legacy pagecounts (2008 until end of 2016) on a project
|
|
62
|
+
```python
|
|
63
|
+
import pageviewapi
|
|
64
|
+
pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100', granularity='monthly')
|
|
65
|
+
```
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
pageviewapi/__init__.py,sha256=XQuW8nHWyAG_5DbG1w1xmBEWuciq-_hvYfllPAcSMAg,485
|
|
2
|
+
pageviewapi/client.py,sha256=bDB9jZn2J3zRb9aNcrrlKPuWkOUjxTwvDCq49MC3zVk,5998
|
|
3
|
+
pageviewapi/period.py,sha256=CTlTrUAGV-dS4oRFBzgEbcUxL__9aCN9d1VIMp2_xcQ,1153
|
|
4
|
+
pageviewapi-0.5.0.dist-info/METADATA,sha256=bwXjOIoxJoYcD0yeU_7z1j3ErzhbXtfnxawYpnpuntU,2309
|
|
5
|
+
pageviewapi-0.5.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
6
|
+
pageviewapi-0.5.0.dist-info/licenses/LICENSE,sha256=K_ASJjZ01tNhhxFx5LTnc8GvQzALj9rsa1KihGESI0c,1078
|
|
7
|
+
pageviewapi-0.5.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2015 Commonists
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|