pageviewapi 0.2.3__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(uv sync *)",
5
+ "Bash(uv run *)"
6
+ ]
7
+ }
8
+ }
@@ -0,0 +1,10 @@
1
+ [*]
2
+ end_of_line = lf
3
+ insert_final_newline = true
4
+ charset = utf-8
5
+
6
+ # 4 space indentation
7
+ [*.py]
8
+ indent_style = space
9
+ indent_size = 4
10
+ trim_trailing_whitespace = true
@@ -0,0 +1,26 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ lint:
7
+ runs-on: ubuntu-latest
8
+ steps:
9
+ - uses: actions/checkout@v4
10
+ - uses: astral-sh/setup-uv@v5
11
+ - run: uv run ruff check .
12
+ - run: uv run ruff format --check .
13
+
14
+ test:
15
+ runs-on: ubuntu-latest
16
+ strategy:
17
+ matrix:
18
+ python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - uses: astral-sh/setup-uv@v5
22
+ - run: uv run --python ${{ matrix.python-version }} pytest tests/ --cov=pageviewapi --cov-report=term-missing --cov-report=xml
23
+ - uses: codecov/codecov-action@v5
24
+ if: matrix.python-version == '3.14'
25
+ with:
26
+ token: ${{ secrets.CODECOV_TOKEN }}
@@ -0,0 +1,54 @@
1
+ *~
2
+
3
+ *.py[cod]
4
+
5
+ # C extensions
6
+ *.so
7
+
8
+ # Packages
9
+ *.egg
10
+ *.egg-info
11
+ dist
12
+ build
13
+ eggs
14
+ parts
15
+ bin
16
+ var
17
+ sdist
18
+ develop-eggs
19
+ .installed.cfg
20
+ lib
21
+ lib64
22
+
23
+ # Installer logs
24
+ pip-log.txt
25
+
26
+ # Unit test / coverage reports
27
+ .coverage
28
+ .tox
29
+ nosetests.xml
30
+ cover/
31
+
32
+ # Translations
33
+ *.mo
34
+
35
+ # Mr Developer
36
+ .mr.developer.cfg
37
+ .project
38
+ .pydevproject
39
+
40
+ # Typical virtual environments
41
+ .venv
42
+ venv
43
+
44
+ # uv
45
+ uv.lock
46
+ .python-version
47
+
48
+ # pytest / coverage
49
+ .coverage
50
+ htmlcov/
51
+ .pytest_cache/
52
+
53
+ # ruff
54
+ .ruff_cache/
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Commonists
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,65 @@
1
+ Metadata-Version: 2.4
2
+ Name: pageviewapi
3
+ Version: 0.5.0
4
+ Summary: Wikimedia Pageview API client
5
+ Project-URL: Homepage, https://github.com/Commonists/pageview-api
6
+ Author-email: Commonists <ps.huard@gmail.com>
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Requires-Python: >=3.10
10
+ Requires-Dist: requests>=2.0
11
+ Description-Content-Type: text/markdown
12
+
13
+ # pageview-api
14
+ [![CI](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml/badge.svg)](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml)
15
+ [![codecov](https://codecov.io/gh/Commonists/pageview-api/branch/master/graph/badge.svg)](https://codecov.io/gh/Commonists/pageview-api)
16
+ [![Pypi](https://img.shields.io/pypi/v/pageviewapi.svg?style=flat)](https://pypi.python.org/pypi/pageviewapi)
17
+ [![License](http://img.shields.io/badge/license-MIT-orange.svg?style=flat)](http://opensource.org/licenses/MIT)
18
+
19
+ Wikimedia Pageview API client for Python 3.10+
20
+
21
+ Installation
22
+ ------------
23
+ In order to install system wide on system using sudo you can use:
24
+ ```sh
25
+ pip install git+https://github.com/Commonists/pageview-api.git
26
+ ```
27
+
28
+ Examples
29
+ --------
30
+
31
+ Number of view on English Wikipedia of article Paris from November 6th to November 20th 2015
32
+
33
+ ```python
34
+ import pageviewapi
35
+ pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120',
36
+ access='all-access', agent='all-agents', granularity='daily')
37
+ ```
38
+
39
+ Aggregation: Get a daily pageview count timeseries of all projects for the month of October 2015
40
+ ```python
41
+ import pageviewapi
42
+ pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100', access='all-access',
43
+ agent='all-agents', granularity='daily')
44
+ ```
45
+
46
+ Most viewed articles on French Wikipedia on November 14th, 2015
47
+ ```python
48
+ import pageviewapi
49
+ pageviewapi.top('fr.wikipedia', 2015, 11, 14, access='all-access')
50
+ ```
51
+
52
+ Sum (resp. average) of view during last 30 days
53
+ ```python
54
+ import pageviewapi.period
55
+ pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
56
+ access='all-access', agent='all-agents')
57
+
58
+ pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
59
+ ```
60
+
61
+ Monthly legacy pagecounts (2008 until end of 2016) on a project
62
+ ```python
63
+ import pageviewapi
64
+ pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100', granularity='monthly')
65
+ ```
@@ -1,9 +1,10 @@
1
1
  # pageview-api
2
- [![Build Status](https://travis-ci.org/Commonists/pageview-api.svg?branch=master)](https://travis-ci.org/Commonists/pageview-api)
3
- [![Code Health](https://landscape.io/github/Commonists/pageview-api/master/landscape.svg?style=flat)](https://landscape.io/github/Commonists/pageview-api/master)
2
+ [![CI](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml/badge.svg)](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml)
3
+ [![codecov](https://codecov.io/gh/Commonists/pageview-api/branch/master/graph/badge.svg)](https://codecov.io/gh/Commonists/pageview-api)
4
+ [![Pypi](https://img.shields.io/pypi/v/pageviewapi.svg?style=flat)](https://pypi.python.org/pypi/pageviewapi)
4
5
  [![License](http://img.shields.io/badge/license-MIT-orange.svg?style=flat)](http://opensource.org/licenses/MIT)
5
6
 
6
- Wikimedia Pageview API client
7
+ Wikimedia Pageview API client for Python 3.10+
7
8
 
8
9
  Installation
9
10
  ------------
@@ -44,3 +45,9 @@ pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
44
45
 
45
46
  pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
46
47
  ```
48
+
49
+ Monthly legacy pagecounts (2008 until end of 2016) on a project
50
+ ```python
51
+ import pageviewapi
52
+ pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100', granularity='monthly')
53
+ ```
@@ -0,0 +1,25 @@
1
+ """Python client for the Wikimedia Pageview API."""
2
+
3
+ from pageviewapi.client import (
4
+ PageviewResponse,
5
+ ThrottlingException,
6
+ ZeroOrDataNotLoadedException,
7
+ __version__,
8
+ aggregate,
9
+ legacy_pagecounts,
10
+ per_article,
11
+ top,
12
+ unique_devices,
13
+ )
14
+
15
+ __all__ = [
16
+ "__version__",
17
+ "aggregate",
18
+ "PageviewResponse",
19
+ "legacy_pagecounts",
20
+ "per_article",
21
+ "ThrottlingException",
22
+ "top",
23
+ "unique_devices",
24
+ "ZeroOrDataNotLoadedException",
25
+ ]
@@ -0,0 +1,206 @@
1
+ """Client to wikimedia pageview api.
2
+
3
+ API doc: https://wikitech.wikimedia.org/wiki/Analytics/AQS/Pageview_API
4
+ Supported endpoints:
5
+ - per-article
6
+ - top
7
+ - aggregate
8
+ - unique-devices
9
+ - legacy/pagecounts
10
+ """
11
+
12
+ from importlib.metadata import PackageNotFoundError, version
13
+ from typing import Any
14
+ from urllib.parse import quote, unquote
15
+
16
+ import requests
17
+
18
+ try:
19
+ __version__ = version("pageviewapi")
20
+ except PackageNotFoundError:
21
+ __version__ = "0.4.0"
22
+
23
+ PROJECT_URL = "https://github.com/Commonists/pageview-api"
24
+ USER_AGENT = {"User-Agent": f"Python pageview-api client v{__version__} <{PROJECT_URL}>"}
25
+
26
+ API_BASE_URL = "https://wikimedia.org/api/rest_v1/metrics"
27
+
28
+ PA_ENDPOINT = "pageviews/per-article"
29
+ PA_ARGS = "{project}/{access}/{agent}/{page}/{granularity}/{start}/{end}"
30
+
31
+ TOP_ENDPOINT = "pageviews/top"
32
+ TOP_ARGS = "{project}/{access}/{year}/{month}/{day}"
33
+
34
+ AG_ENDPOINT = "pageviews/aggregate"
35
+ AG_ARGS = "{project}/{access}/{agent}/{granularity}/{start}/{end}"
36
+
37
+ UD_ENDPOINT = "unique-devices"
38
+ UD_ARGS = "{project}/{access}/{granularity}/{start}/{end}"
39
+
40
+ PC_ENDPOINT = "legacy/pagecounts/aggregate"
41
+ PC_ARGS = "{project}/{access_site}/{granularity}/{start}/{end}"
42
+
43
+
44
+ class PageviewResponse(dict): # type: ignore[type-arg]
45
+ """A Wikimedia Pageview API response with attribute-style read access.
46
+
47
+ Recursively wraps nested dicts and lists so the full response tree is
48
+ navigable via attributes. Dict data keys always take priority over built-in
49
+ dict methods, so ``response.items`` returns the ``items`` data value rather
50
+ than the ``dict.items`` method when that key is present.
51
+
52
+ Use ``from_json`` rather than the constructor directly when the input may
53
+ contain nested dicts or lists — the constructor only wraps the top level.
54
+ """
55
+
56
+ def __getattribute__(self, key: str) -> Any:
57
+ if not key.startswith("_"):
58
+ try:
59
+ return dict.__getitem__(self, key)
60
+ except KeyError:
61
+ pass
62
+ return super().__getattribute__(key)
63
+
64
+ @classmethod
65
+ def from_json(cls, obj: Any) -> Any:
66
+ """Recursively convert a JSON value into a ``PageviewResponse`` tree."""
67
+ if isinstance(obj, dict):
68
+ return cls({k: cls.from_json(v) for k, v in obj.items()})
69
+ if isinstance(obj, list):
70
+ return [cls.from_json(item) for item in obj]
71
+ return obj
72
+
73
+
74
+ class ZeroOrDataNotLoadedException(Exception):
75
+ """Raised on 404 — no data or data not yet filled.
76
+
77
+ https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
78
+ """
79
+
80
+
81
+ class ThrottlingException(Exception):
82
+ """Raised on 429 — client is sending too many requests.
83
+
84
+ https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
85
+ """
86
+
87
+
88
+ def per_article(
89
+ project: str,
90
+ page: str,
91
+ start: str,
92
+ end: str,
93
+ access: str = "all-access",
94
+ agent: str = "all-agents",
95
+ granularity: str = "daily",
96
+ ) -> dict[str, Any]:
97
+ """Per-article pageview counts.
98
+
99
+ >>> import pageviewapi
100
+ >>> pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120')
101
+ """
102
+ args = PA_ARGS.format(
103
+ project=project,
104
+ page=_quote_article_name(page),
105
+ start=start,
106
+ end=end,
107
+ access=access,
108
+ agent=agent,
109
+ granularity=granularity,
110
+ )
111
+ return _api(PA_ENDPOINT, args)
112
+
113
+
114
+ def top(
115
+ project: str,
116
+ year: int | str,
117
+ month: int | str,
118
+ day: int | str,
119
+ access: str = "all-access",
120
+ ) -> dict[str, Any]:
121
+ """Top 1000 most visited articles for a project on a given date.
122
+
123
+ >>> import pageviewapi
124
+ >>> views = pageviewapi.top('fr.wikipedia', 2015, 11, 14)
125
+ >>> views['items'][0]['articles'][0]
126
+ {'article': 'Wikipédia:Accueil_principal', 'rank': 1, 'views': 1600547}
127
+ """
128
+ args = TOP_ARGS.format(project=project, access=access, year=year, month=month, day=day)
129
+ return _api(TOP_ENDPOINT, args)
130
+
131
+
132
+ def aggregate(
133
+ project: str,
134
+ start: str,
135
+ end: str,
136
+ access: str = "all-access",
137
+ agent: str = "all-agents",
138
+ granularity: str = "daily",
139
+ ) -> dict[str, Any]:
140
+ """Aggregate pageview counts for a project.
141
+
142
+ >>> import pageviewapi
143
+ >>> pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100')
144
+ """
145
+ args = AG_ARGS.format(
146
+ project=project,
147
+ start=start,
148
+ end=end,
149
+ access=access,
150
+ agent=agent,
151
+ granularity=granularity,
152
+ )
153
+ return _api(AG_ENDPOINT, args)
154
+
155
+
156
+ def unique_devices(
157
+ project: str,
158
+ start: str,
159
+ end: str,
160
+ access: str = "all-access",
161
+ granularity: str = "daily",
162
+ ) -> dict[str, Any]:
163
+ """Unique devices accessing a project."""
164
+ args = UD_ARGS.format(project=project, start=start, end=end, access=access, granularity=granularity)
165
+ return _api(UD_ENDPOINT, args)
166
+
167
+
168
+ def legacy_pagecounts(
169
+ project: str,
170
+ start: str,
171
+ end: str,
172
+ access_site: str = "all-sites",
173
+ granularity: str = "daily",
174
+ ) -> dict[str, Any]:
175
+ """Legacy pagecounts aggregate.
176
+
177
+ >>> import pageviewapi
178
+ >>> pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100')
179
+ """
180
+ project_arg = "all-projects" if project == "all-projects" else f"{project}.org"
181
+ args = PC_ARGS.format(
182
+ project=project_arg,
183
+ start=start,
184
+ end=end,
185
+ access_site=access_site,
186
+ granularity=granularity,
187
+ )
188
+ return _api(PC_ENDPOINT, args)
189
+
190
+
191
+ def _api(end_point: str, args: str, api_url: str = API_BASE_URL) -> dict[str, Any]:
192
+ url = "/".join([api_url, end_point, args])
193
+ response = requests.get(url, headers=USER_AGENT)
194
+ if response.status_code == 200:
195
+ return PageviewResponse.from_json(response.json())
196
+ elif response.status_code == 404:
197
+ raise ZeroOrDataNotLoadedException()
198
+ elif response.status_code == 429:
199
+ raise ThrottlingException()
200
+ else:
201
+ response.raise_for_status()
202
+ return {} # unreachable, satisfies type checker
203
+
204
+
205
+ def _quote_article_name(page: str) -> str:
206
+ return quote(unquote(page), safe="")
@@ -0,0 +1,41 @@
1
+ """Helper functions for aggregating pageviews over a time period."""
2
+
3
+ import datetime
4
+
5
+ import pageviewapi.client
6
+
7
+
8
+ def sum_last(
9
+ project: str,
10
+ page: str,
11
+ last: int = 30,
12
+ agent: str = "all-agents",
13
+ access: str = "all-access",
14
+ ) -> int:
15
+ """Total pageviews for a page over the last N days."""
16
+ views = pageviewapi.client.per_article(project, page, _days_ago(last), _today(), access=access, agent=agent)
17
+ return sum(daily["views"] for daily in views["items"])
18
+
19
+
20
+ def avg_last(
21
+ project: str,
22
+ page: str,
23
+ last: int = 30,
24
+ agent: str = "all-agents",
25
+ access: str = "all-access",
26
+ ) -> float:
27
+ """Average daily pageviews for a page over the last N days."""
28
+ views = pageviewapi.client.per_article(project, page, _days_ago(last), _today(), access=access, agent=agent)
29
+ return _avg([daily["views"] for daily in views["items"]])
30
+
31
+
32
+ def _today() -> str:
33
+ return datetime.date.today().strftime("%Y%m%d")
34
+
35
+
36
+ def _days_ago(days: int) -> str:
37
+ return (datetime.date.today() - datetime.timedelta(days=days)).strftime("%Y%m%d")
38
+
39
+
40
+ def _avg(values: list[int | float]) -> float:
41
+ return sum(values) / len(values)
@@ -0,0 +1,31 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "pageviewapi"
7
+ version = "0.5.0"
8
+ description = "Wikimedia Pageview API client"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ authors = [{ name = "Commonists", email = "ps.huard@gmail.com" }]
12
+ requires-python = ">=3.10"
13
+ dependencies = ["requests>=2.0"]
14
+
15
+ [project.urls]
16
+ Homepage = "https://github.com/Commonists/pageview-api"
17
+
18
+ [dependency-groups]
19
+ dev = ["pytest>=8.0", "pytest-cov>=5.0", "ruff>=0.9", "pytest-mock>=3.0"]
20
+
21
+ [tool.ruff]
22
+ line-length = 120
23
+
24
+ [tool.ruff.lint]
25
+ select = ["E", "F", "I", "UP"]
26
+
27
+ [tool.pytest.ini_options]
28
+ testpaths = ["tests"]
29
+
30
+ [tool.coverage.run]
31
+ source = ["pageviewapi"]
File without changes
@@ -0,0 +1,235 @@
1
+ """Tests for pageviewapi.client."""
2
+
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ import pytest
6
+
7
+ from pageviewapi.client import (
8
+ API_BASE_URL,
9
+ USER_AGENT,
10
+ PageviewResponse,
11
+ ThrottlingException,
12
+ ZeroOrDataNotLoadedException,
13
+ _api,
14
+ _quote_article_name,
15
+ aggregate,
16
+ legacy_pagecounts,
17
+ per_article,
18
+ top,
19
+ unique_devices,
20
+ )
21
+
22
+ ITEMS_RESPONSE = {"items": [{"views": 1234}]}
23
+
24
+
25
+ @pytest.fixture
26
+ def ok_response() -> MagicMock:
27
+ r = MagicMock()
28
+ r.status_code = 200
29
+ r.json.return_value = ITEMS_RESPONSE
30
+ return r
31
+
32
+
33
+ @pytest.fixture
34
+ def not_found_response() -> MagicMock:
35
+ r = MagicMock()
36
+ r.status_code = 404
37
+ return r
38
+
39
+
40
+ @pytest.fixture
41
+ def throttled_response() -> MagicMock:
42
+ r = MagicMock()
43
+ r.status_code = 429
44
+ return r
45
+
46
+
47
+ def test_per_article_returns_json(ok_response):
48
+ with patch("requests.get", return_value=ok_response) as mock_get:
49
+ result = per_article("en.wikipedia", "Paris", "20151106", "20151120")
50
+ assert result == ITEMS_RESPONSE
51
+ url = mock_get.call_args[0][0]
52
+ assert "pageviews/per-article" in url
53
+ assert "en.wikipedia" in url
54
+ assert "Paris" in url
55
+ assert "20151106" in url
56
+ assert "20151120" in url
57
+
58
+
59
+ def test_per_article_default_args_in_url(ok_response):
60
+ with patch("requests.get", return_value=ok_response) as mock_get:
61
+ per_article("en.wikipedia", "Paris", "20151106", "20151120")
62
+ url = mock_get.call_args[0][0]
63
+ assert "all-access" in url
64
+ assert "all-agents" in url
65
+ assert "daily" in url
66
+
67
+
68
+ def test_per_article_custom_args(ok_response):
69
+ with patch("requests.get", return_value=ok_response) as mock_get:
70
+ per_article(
71
+ "en.wikipedia", "Paris", "20151106", "20151120", access="desktop", agent="user", granularity="monthly"
72
+ )
73
+ url = mock_get.call_args[0][0]
74
+ assert "desktop" in url
75
+ assert "user" in url
76
+ assert "monthly" in url
77
+
78
+
79
+ def test_per_article_404_raises(not_found_response):
80
+ with patch("requests.get", return_value=not_found_response):
81
+ with pytest.raises(ZeroOrDataNotLoadedException):
82
+ per_article("en.wikipedia", "Paris", "20151106", "20151120")
83
+
84
+
85
+ def test_per_article_429_raises(throttled_response):
86
+ with patch("requests.get", return_value=throttled_response):
87
+ with pytest.raises(ThrottlingException):
88
+ per_article("en.wikipedia", "Paris", "20151106", "20151120")
89
+
90
+
91
+ def test_top_returns_json(ok_response):
92
+ with patch("requests.get", return_value=ok_response) as mock_get:
93
+ result = top("fr.wikipedia", 2015, 11, 14)
94
+ assert result == ITEMS_RESPONSE
95
+ url = mock_get.call_args[0][0]
96
+ assert "pageviews/top" in url
97
+ assert "fr.wikipedia" in url
98
+
99
+
100
+ def test_top_url_contains_date(ok_response):
101
+ with patch("requests.get", return_value=ok_response) as mock_get:
102
+ top("fr.wikipedia", 2015, 11, 14)
103
+ url = mock_get.call_args[0][0]
104
+ assert "2015" in url
105
+ assert "11" in url
106
+ assert "14" in url
107
+
108
+
109
+ def test_aggregate_returns_json(ok_response):
110
+ with patch("requests.get", return_value=ok_response) as mock_get:
111
+ result = aggregate("fr.wikipedia", "2015100100", "2015103100")
112
+ assert result == ITEMS_RESPONSE
113
+ url = mock_get.call_args[0][0]
114
+ assert "pageviews/aggregate" in url
115
+
116
+
117
+ def test_unique_devices_returns_json(ok_response):
118
+ with patch("requests.get", return_value=ok_response) as mock_get:
119
+ result = unique_devices("en.wikipedia", "20200101", "20200131")
120
+ assert result == ITEMS_RESPONSE
121
+ url = mock_get.call_args[0][0]
122
+ assert "unique-devices" in url
123
+
124
+
125
+ def test_legacy_pagecounts_returns_json(ok_response):
126
+ with patch("requests.get", return_value=ok_response) as mock_get:
127
+ result = legacy_pagecounts("fr.wikipedia", "2010010100", "2011010100")
128
+ assert result == ITEMS_RESPONSE
129
+ url = mock_get.call_args[0][0]
130
+ assert "legacy/pagecounts" in url
131
+ assert "fr.wikipedia.org" in url
132
+
133
+
134
+ def test_legacy_pagecounts_all_projects(ok_response):
135
+ with patch("requests.get", return_value=ok_response) as mock_get:
136
+ legacy_pagecounts("all-projects", "2010010100", "2011010100")
137
+ url = mock_get.call_args[0][0]
138
+ assert "all-projects" in url
139
+ assert "all-projects.org" not in url
140
+
141
+
142
+ def test_api_sends_user_agent(ok_response):
143
+ with patch("requests.get", return_value=ok_response) as mock_get:
144
+ _api("pageviews/per-article", "en.wikipedia/all-access/all-agents/Paris/daily/20200101/20200131")
145
+ _, kwargs = mock_get.call_args
146
+ assert kwargs["headers"] == USER_AGENT
147
+
148
+
149
+ def test_api_other_error_raises(ok_response):
150
+ ok_response.status_code = 500
151
+ ok_response.raise_for_status.side_effect = Exception("server error")
152
+ with patch("requests.get", return_value=ok_response):
153
+ with pytest.raises(Exception, match="server error"):
154
+ _api("pageviews/per-article", "bad/args")
155
+
156
+
157
+ def test_api_constructs_url_correctly(ok_response):
158
+ with patch("requests.get", return_value=ok_response) as mock_get:
159
+ _api("my/endpoint", "arg1/arg2")
160
+ url = mock_get.call_args[0][0]
161
+ assert url == f"{API_BASE_URL}/my/endpoint/arg1/arg2"
162
+
163
+
164
+ def test_pageview_response_attribute_access():
165
+ d = PageviewResponse({"rank": 1, "views": 42})
166
+ assert d.rank == 1
167
+ assert d.views == 42
168
+
169
+
170
+ def test_pageview_response_dict_access_still_works():
171
+ d = PageviewResponse({"rank": 1})
172
+ assert d["rank"] == 1
173
+
174
+
175
+ def test_pageview_response_missing_key_raises_attribute_error():
176
+ d = PageviewResponse({"a": 1})
177
+ with pytest.raises(AttributeError):
178
+ _ = d.missing
179
+
180
+
181
+ def test_pageview_response_items_key_accessible_as_attribute():
182
+ d = PageviewResponse.from_json({"items": [{"views": 1}]})
183
+ assert isinstance(d.items, list)
184
+ assert d.items[0].views == 1
185
+
186
+
187
+ def test_pageview_response_keys_key_accessible_as_attribute():
188
+ d = PageviewResponse({"keys": ["a", "b"]})
189
+ assert d.keys == ["a", "b"]
190
+
191
+
192
+ def test_pageview_response_dict_method_accessible_when_key_absent():
193
+ d = PageviewResponse({"rank": 1})
194
+ assert callable(d.items)
195
+
196
+
197
+ def test_pageview_response_nested_dict_is_wrapped():
198
+ d = PageviewResponse.from_json({"outer": {"inner": 99}})
199
+ assert isinstance(d.outer, PageviewResponse)
200
+ assert d.outer.inner == 99
201
+
202
+
203
+ def test_pageview_response_nested_list_of_dicts_is_wrapped():
204
+ d = PageviewResponse.from_json({"articles": [{"title": "Paris", "views": 10}]})
205
+ assert isinstance(d["articles"][0], PageviewResponse)
206
+ assert d["articles"][0].title == "Paris"
207
+
208
+
209
+ def test_api_returns_pageview_response(ok_response):
210
+ with patch("requests.get", return_value=ok_response):
211
+ result = per_article("en.wikipedia", "Paris", "20151106", "20151120")
212
+ assert isinstance(result, PageviewResponse)
213
+
214
+
215
+ def test_quote_article_name_plain():
216
+ assert _quote_article_name("Paris") == "Paris"
217
+
218
+
219
+ def test_quote_article_name_slash():
220
+ assert _quote_article_name("AC/DC") == "AC%2FDC"
221
+
222
+
223
+ def test_per_article_encodes_space_in_page_name(ok_response):
224
+ with patch("requests.get", return_value=ok_response) as mock_get:
225
+ per_article("en.wikipedia", "New York", "20200101", "20200131")
226
+ url = mock_get.call_args[0][0]
227
+ assert "New%20York" in url
228
+ assert "New York" not in url
229
+
230
+
231
+ def test_per_article_encodes_slash_in_page_name(ok_response):
232
+ with patch("requests.get", return_value=ok_response) as mock_get:
233
+ per_article("en.wikipedia", "AC/DC", "20200101", "20200131")
234
+ url = mock_get.call_args[0][0]
235
+ assert "AC%2FDC" in url
@@ -0,0 +1,69 @@
1
+ """Tests for pageviewapi.period."""
2
+
3
+ from unittest.mock import patch
4
+
5
+ import pytest
6
+
7
+ from pageviewapi.period import _avg, _days_ago, _today, avg_last, sum_last
8
+
9
+ MOCK_VIEWS = {"items": [{"views": 100}, {"views": 200}, {"views": 300}]}
10
+
11
+
12
+ def test_today_format():
13
+ result = _today()
14
+ assert len(result) == 8
15
+ assert result.isdigit()
16
+
17
+
18
+ def test_days_ago_format():
19
+ result = _days_ago(30)
20
+ assert len(result) == 8
21
+ assert result.isdigit()
22
+
23
+
24
+ def test_days_ago_is_before_today():
25
+ assert _days_ago(1) < _today()
26
+
27
+
28
+ def test_days_ago_zero_equals_today():
29
+ assert _days_ago(0) == _today()
30
+
31
+
32
+ def test_avg_integers():
33
+ assert _avg([10, 20, 30]) == 20.0
34
+
35
+
36
+ def test_avg_single_value():
37
+ assert _avg([42]) == 42.0
38
+
39
+
40
+ def test_avg_floats():
41
+ assert _avg([1.5, 2.5]) == pytest.approx(2.0)
42
+
43
+
44
+ def test_sum_last_returns_total():
45
+ with patch("pageviewapi.client.per_article", return_value=MOCK_VIEWS):
46
+ result = sum_last("en.wikipedia", "Python_(programming_language)")
47
+ assert result == 600
48
+
49
+
50
+ def test_sum_last_passes_date_range():
51
+ with patch("pageviewapi.client.per_article", return_value=MOCK_VIEWS) as mock_pa:
52
+ sum_last("en.wikipedia", "Python_(programming_language)", last=7)
53
+ call_args = mock_pa.call_args[0]
54
+ start, end = call_args[2], call_args[3]
55
+ assert start < end
56
+
57
+
58
+ def test_avg_last_returns_average():
59
+ with patch("pageviewapi.client.per_article", return_value=MOCK_VIEWS):
60
+ result = avg_last("en.wikipedia", "Python_(programming_language)")
61
+ assert result == pytest.approx(200.0)
62
+
63
+
64
+ def test_avg_last_passes_access_and_agent():
65
+ with patch("pageviewapi.client.per_article", return_value=MOCK_VIEWS) as mock_pa:
66
+ avg_last("en.wikipedia", "Python", access="desktop", agent="user")
67
+ _, kwargs = mock_pa.call_args
68
+ assert kwargs["access"] == "desktop"
69
+ assert kwargs["agent"] == "user"
@@ -1 +0,0 @@
1
- include *.md
@@ -1,63 +0,0 @@
1
- Metadata-Version: 1.1
2
- Name: pageviewapi
3
- Version: 0.2.3
4
- Summary: Wikimedia Pageview API client
5
- Home-page: http://github.com/Commonists/pageviewapi
6
- Author: Commonists
7
- Author-email: ps.huard@gmail.com
8
- License: MIT
9
- Description: # pageview-api
10
- [![Build Status](https://travis-ci.org/Commonists/pageview-api.svg?branch=master)](https://travis-ci.org/Commonists/pageview-api)
11
- [![Code Health](https://landscape.io/github/Commonists/pageview-api/master/landscape.svg?style=flat)](https://landscape.io/github/Commonists/pageview-api/master)
12
- [![License](http://img.shields.io/badge/license-MIT-orange.svg?style=flat)](http://opensource.org/licenses/MIT)
13
-
14
- Wikimedia Pageview API client
15
-
16
- Installation
17
- ------------
18
- In order to install system wide on system using sudo you can use:
19
- ```sh
20
- pip install git+https://github.com/Commonists/pageview-api.git
21
- ```
22
-
23
- Examples
24
- --------
25
-
26
- Number of view on English Wikipedia of article Paris from November 6th to November 20th 2015
27
-
28
- ```python
29
- import pageviewapi
30
- pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120',
31
- access='all-access', agent='all-agents', granularity='daily')
32
- ```
33
-
34
- Aggregation: Get a daily pageview count timeseries of all projects for the month of October 2015
35
- ```python
36
- import pageviewapi
37
- pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100', access='all-access',
38
- agent='all-agents', granularity='daily')
39
- ```
40
-
41
- Most viewed articles on French Wikipedia on November 14th, 2015
42
- ```python
43
- import pageviewapi
44
- pageviewapi.top('fr.wikipedia', 2015, 11, 14, access='all-access')
45
- ```
46
-
47
- Sum (resp. average) of view during last 30 days
48
- ```python
49
- import pageviewapi.period
50
- pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
51
- access='all-access', agent='all-agents')
52
-
53
- pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
54
- ```
55
-
56
- Platform: UNKNOWN
57
- Classifier: Development Status :: 4 - Beta
58
- Classifier: Environment :: Console
59
- Classifier: Intended Audience :: Developers
60
- Classifier: License :: OSI Approved :: MIT License
61
- Classifier: Operating System :: OS Independent
62
- Classifier: Programming Language :: Python
63
- Classifier: Topic :: Utilities
@@ -1,9 +0,0 @@
1
- """Python client for wikimedia pageview api."""
2
-
3
- from pageviewapi.client import per_article
4
- from pageviewapi.client import top
5
- from pageviewapi.client import aggregate
6
- from pageviewapi.client import Access
7
- from pageviewapi.client import Agent
8
-
9
- from pageviewapi.client import __version__
@@ -1,132 +0,0 @@
1
- """Client to wikimedia pageview api.
2
-
3
- API doc: https://wikitech.wikimedia.org/wiki/Analytics/AQS/Pageview_API
4
- Supported endpoints:
5
- - per-article
6
- - top
7
- - aggregate
8
- """
9
-
10
- from attrdict import AttrDict
11
- import requests
12
-
13
- __version__ = "0.2.3"
14
-
15
- # User-agent
16
- PROJECT_URL = "https://github.com/Commonists/pageview-api"
17
- UA = "Python pageview-api client v{version} <{url}>"
18
- USER_AGENT = {
19
- 'User-Agent': UA.format(url=PROJECT_URL, version=__version__)
20
- }
21
-
22
- API_BASE_URL = "https://wikimedia.org/api/rest_v1/metrics/pageviews"
23
- # Per article
24
- PA_ENDPOINT = "per-article"
25
- PA_ARGS = "{project}/{access}/{agent}/{page}/{granularity}/{start}/{end}"
26
-
27
- # Top
28
- TOP_ENDPOINT = "top"
29
- TOP_ARGS = "{project}/{access}/{year}/{month}/{day}"
30
-
31
- # aggregate
32
- AG_ENDPOINT = "aggregate"
33
- AG_ARGS = "{project}/{access}/{agent}/{granularity}/{start}/{end}"
34
-
35
-
36
- def per_article(project, page, start, end,
37
- access='all-access', agent='all-agents', granularity='daily'):
38
- """Per article API.
39
-
40
- >>> import pageviewapi
41
- >>> pageview.per_article('en.wikipedia', 'Paris', '20151106', '20151120')
42
- will requests views for Paris article between 2015-11-06 and 2015-11-20
43
- """
44
- args = PA_ARGS.format(project=project,
45
- page=page,
46
- start=start,
47
- end=end,
48
- access=access,
49
- agent=agent,
50
- granularity=granularity)
51
- return __api__(PA_ENDPOINT, args)
52
-
53
-
54
- def top(project, year, month, day, access='all-access'):
55
- """Top 1000 most visited articles from project on a given date.
56
-
57
- >>> import pageviewapi
58
- >>> views = pageviewapi.top('fr.wikipedia', 2015, 11, 14)
59
- >>> views['items'][0]['articles'][0]
60
- {u'article': u'Wikip\xe9dia:Accueil_principal', u'rank': 1,
61
- u'views': 1600547}
62
- """
63
- args = TOP_ARGS.format(project=project,
64
- access=access,
65
- year=year,
66
- month=month,
67
- day=day)
68
- return __api__(TOP_ENDPOINT, args)
69
-
70
-
71
- def aggregate(project, start, end,
72
- access='all-access', agent='all-agents', granularity='daily'):
73
- """Aggregate API.
74
-
75
- >>> import pageviewapi
76
- >>> pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100')
77
- """
78
- args = AG_ARGS.format(project=project,
79
- start=start,
80
- end=end,
81
- access=access,
82
- agent=agent,
83
- granularity=granularity)
84
- return __api__(AG_ENDPOINT, args)
85
-
86
-
87
- def __api__(end_point, args, api_url=API_BASE_URL):
88
- """Calling API."""
89
- url = "/".join([api_url, end_point, args])
90
- return AttrDict(requests.get(url, headers=USER_AGENT).json())
91
-
92
-
93
- class APIValues(object):
94
-
95
- @classmethod
96
- def allvalues(cls):
97
- """All values in the enum class.
98
-
99
- Returns:
100
- All upper cased value from APIValues.
101
- """
102
- return sorted([vars(cls)[variable] for variable in dir(cls)
103
- if variable.isupper()])
104
-
105
-
106
- class Access(APIValues):
107
- """Access values allows to filter by access.
108
-
109
- If you want to filter by mobile use:
110
- Mobile application: Access.MOBILE_APP
111
- Mobile web: Access.MOBILE_WEB
112
- Desktop: Access.DESKTOP
113
- Default: Access.ALL_ACCESS
114
- """
115
- ALL_ACCESS = "all-access"
116
- DESKTOP = "desktop"
117
- MOBILE_APP = "mobile-app"
118
- MOBILE_WEB = "mobile-web"
119
-
120
-
121
- class Agent(APIValues):
122
- """Agent values allows to filter by kind of user-agent.
123
-
124
- All: Agent.ALL_AGENTS
125
- User: Agent.USER
126
- Bot: Agent.BOT
127
- Spider: Agent.SPIDER
128
- """
129
- ALL_AGENTS = "all-agents"
130
- USER = "user"
131
- BOT = "bot"
132
- SPIDER = "spider"
@@ -1,39 +0,0 @@
1
- """Helper functions on period."""
2
- import datetime
3
- import pageviewapi.client
4
-
5
-
6
- def sum_last(project, page, last=30, agent='all-agents', access='all-access'):
7
- """Page views during last days."""
8
- views = pageviewapi.client.per_article(project, page,
9
- __days_ago__(last),
10
- __today__(),
11
- access=access, agent=agent)
12
- return sum([daily['views'] for daily in views['items']])
13
-
14
-
15
- def avg_last(project, page, last=30, agent='all-agents', access='all-access'):
16
- """Page views during last days."""
17
- views = pageviewapi.client.per_article(project, page,
18
- __days_ago__(last),
19
- __today__(),
20
- access=access, agent=agent)
21
- return __avg__([daily['views'] for daily in views['items']])
22
-
23
-
24
- def __today__():
25
- """Date of the day as YYYYmmdd format."""
26
- return datetime.date.today().strftime('%Y%m%d')
27
-
28
-
29
- def __days_ago__(days):
30
- """Days ago as YYYYmmdd format."""
31
- today = datetime.date.today()
32
- delta = datetime.timedelta(days=days)
33
- ago = today - delta
34
- return ago.strftime('%Y%m%d')
35
-
36
-
37
- def __avg__(numericlist):
38
- """Basic average function."""
39
- return sum(numericlist) / float(len(numericlist))
@@ -1,63 +0,0 @@
1
- Metadata-Version: 1.1
2
- Name: pageviewapi
3
- Version: 0.2.3
4
- Summary: Wikimedia Pageview API client
5
- Home-page: http://github.com/Commonists/pageviewapi
6
- Author: Commonists
7
- Author-email: ps.huard@gmail.com
8
- License: MIT
9
- Description: # pageview-api
10
- [![Build Status](https://travis-ci.org/Commonists/pageview-api.svg?branch=master)](https://travis-ci.org/Commonists/pageview-api)
11
- [![Code Health](https://landscape.io/github/Commonists/pageview-api/master/landscape.svg?style=flat)](https://landscape.io/github/Commonists/pageview-api/master)
12
- [![License](http://img.shields.io/badge/license-MIT-orange.svg?style=flat)](http://opensource.org/licenses/MIT)
13
-
14
- Wikimedia Pageview API client
15
-
16
- Installation
17
- ------------
18
- In order to install system wide on system using sudo you can use:
19
- ```sh
20
- pip install git+https://github.com/Commonists/pageview-api.git
21
- ```
22
-
23
- Examples
24
- --------
25
-
26
- Number of view on English Wikipedia of article Paris from November 6th to November 20th 2015
27
-
28
- ```python
29
- import pageviewapi
30
- pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120',
31
- access='all-access', agent='all-agents', granularity='daily')
32
- ```
33
-
34
- Aggregation: Get a daily pageview count timeseries of all projects for the month of October 2015
35
- ```python
36
- import pageviewapi
37
- pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100', access='all-access',
38
- agent='all-agents', granularity='daily')
39
- ```
40
-
41
- Most viewed articles on French Wikipedia on November 14th, 2015
42
- ```python
43
- import pageviewapi
44
- pageviewapi.top('fr.wikipedia', 2015, 11, 14, access='all-access')
45
- ```
46
-
47
- Sum (resp. average) of view during last 30 days
48
- ```python
49
- import pageviewapi.period
50
- pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
51
- access='all-access', agent='all-agents')
52
-
53
- pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
54
- ```
55
-
56
- Platform: UNKNOWN
57
- Classifier: Development Status :: 4 - Beta
58
- Classifier: Environment :: Console
59
- Classifier: Intended Audience :: Developers
60
- Classifier: License :: OSI Approved :: MIT License
61
- Classifier: Operating System :: OS Independent
62
- Classifier: Programming Language :: Python
63
- Classifier: Topic :: Utilities
@@ -1,11 +0,0 @@
1
- MANIFEST.in
2
- README.md
3
- setup.py
4
- pageviewapi/__init__.py
5
- pageviewapi/client.py
6
- pageviewapi/period.py
7
- pageviewapi.egg-info/PKG-INFO
8
- pageviewapi.egg-info/SOURCES.txt
9
- pageviewapi.egg-info/dependency_links.txt
10
- pageviewapi.egg-info/requires.txt
11
- pageviewapi.egg-info/top_level.txt
@@ -1,2 +0,0 @@
1
- requests
2
- attrdict
@@ -1 +0,0 @@
1
- pageviewapi
@@ -1,5 +0,0 @@
1
- [egg_info]
2
- tag_build =
3
- tag_date = 0
4
- tag_svn_revision = 0
5
-
@@ -1,42 +0,0 @@
1
- #!/usr/bin/python
2
- # -*- coding: latin-1 -*-
3
-
4
- """Setup script."""
5
-
6
- try:
7
- from setuptools import setup
8
- except ImportError:
9
- from distutils.core import setup
10
-
11
- try:
12
- import pageviewapi
13
- version = pageviewapi.__version__
14
- except ImportError:
15
- version = 'Undefined'
16
-
17
-
18
- classifiers = [
19
- 'Development Status :: 4 - Beta',
20
- 'Environment :: Console',
21
- 'Intended Audience :: Developers',
22
- 'License :: OSI Approved :: MIT License',
23
- 'Operating System :: OS Independent',
24
- 'Programming Language :: Python',
25
- 'Topic :: Utilities'
26
- ]
27
- packages = ['pageviewapi']
28
- requires = ['requests', 'attrdict']
29
-
30
- setup(
31
- name='pageviewapi',
32
- version=version,
33
- author='Commonists',
34
- author_email='ps.huard@gmail.com',
35
- url='http://github.com/Commonists/pageviewapi',
36
- description='Wikimedia Pageview API client',
37
- long_description=open('README.md').read(),
38
- license='MIT',
39
- packages=packages,
40
- install_requires=requires,
41
- classifiers=classifiers
42
- )