edx-enterprise-data 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {edx_enterprise_data-9.1.1.dist-info → edx_enterprise_data-9.2.0.dist-info}/METADATA +1 -1
- {edx_enterprise_data-9.1.1.dist-info → edx_enterprise_data-9.2.0.dist-info}/RECORD +21 -24
- enterprise_data/__init__.py +1 -1
- enterprise_data/admin_analytics/constants.py +3 -17
- enterprise_data/admin_analytics/data_loaders.py +0 -125
- enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py +85 -0
- enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py +50 -0
- enterprise_data/api/v1/serializers.py +1 -38
- enterprise_data/api/v1/urls.py +2 -2
- enterprise_data/api/v1/views/analytics_completions.py +0 -2
- enterprise_data/api/v1/views/analytics_engagements.py +0 -2
- enterprise_data/api/v1/views/analytics_enrollments.py +0 -2
- enterprise_data/api/v1/views/analytics_leaderboard.py +65 -102
- enterprise_data/renderers.py +2 -2
- enterprise_data/tests/admin_analytics/mock_analytics_data.py +15 -60
- enterprise_data/tests/admin_analytics/test_analytics_leaderboard.py +48 -81
- enterprise_data/tests/admin_analytics/test_data_loaders.py +1 -58
- enterprise_data/utils.py +0 -16
- enterprise_data/admin_analytics/utils.py +0 -180
- enterprise_data/api/v1/paginators.py +0 -121
- enterprise_data/tests/admin_analytics/test_utils.py +0 -102
- {edx_enterprise_data-9.1.1.dist-info → edx_enterprise_data-9.2.0.dist-info}/LICENSE +0 -0
- {edx_enterprise_data-9.1.1.dist-info → edx_enterprise_data-9.2.0.dist-info}/WHEEL +0 -0
- {edx_enterprise_data-9.1.1.dist-info → edx_enterprise_data-9.2.0.dist-info}/top_level.txt +0 -0
@@ -1,180 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Utility functions for fetching data from the database.
|
3
|
-
"""
|
4
|
-
from datetime import datetime, timedelta
|
5
|
-
from enum import Enum
|
6
|
-
from logging import getLogger
|
7
|
-
|
8
|
-
from edx_django_utils.cache import TieredCache, get_cache_key
|
9
|
-
|
10
|
-
from enterprise_data.admin_analytics.constants import Calculation, Granularity
|
11
|
-
from enterprise_data.admin_analytics.data_loaders import (
|
12
|
-
fetch_engagement_data,
|
13
|
-
fetch_enrollment_data,
|
14
|
-
fetch_max_enrollment_datetime,
|
15
|
-
)
|
16
|
-
|
17
|
-
LOGGER = getLogger(__name__)
|
18
|
-
|
19
|
-
|
20
|
-
class ChartType(Enum):
|
21
|
-
"""
|
22
|
-
Chart types.
|
23
|
-
"""
|
24
|
-
COMPLETIONS_OVER_TIME = 'completions_over_time'
|
25
|
-
TOP_COURSES_BY_COMPLETIONS = 'top_courses_by_completions'
|
26
|
-
TOP_SUBJECTS_BY_COMPLETIONS = 'top_subjects_by_completions'
|
27
|
-
|
28
|
-
|
29
|
-
def fetch_enrollments_cache_expiry_timestamp():
|
30
|
-
"""Calculate cache expiry timestamp"""
|
31
|
-
# TODO: Implement correct cache expiry logic for `enrollments` data.
|
32
|
-
# Current cache expiry logic is based on `enterprise_learner_enrollment` table,
|
33
|
-
# Which has nothing to do with the `enrollments` data. Instead cache expiry should
|
34
|
-
# be based on `fact_enrollment_admin_dash` table. Currently we have no timestamp in
|
35
|
-
# `fact_enrollment_admin_dash` table that can be used for cache expiry. Add a new
|
36
|
-
# column in the table for this purpose and then use that column for cache expiry.
|
37
|
-
last_updated_at = fetch_max_enrollment_datetime()
|
38
|
-
cache_expiry = (
|
39
|
-
last_updated_at + timedelta(days=1) if last_updated_at else datetime.now()
|
40
|
-
)
|
41
|
-
return cache_expiry
|
42
|
-
|
43
|
-
|
44
|
-
def fetch_engagements_cache_expiry_timestamp():
|
45
|
-
"""Calculate cache expiry timestamp"""
|
46
|
-
# TODO: Implement correct cache expiry logic for `engagements` data.
|
47
|
-
# Current cache expiry logic is based on `enterprise_learner_enrollment` table,
|
48
|
-
# Which has nothing to do with the `engagements` data. Instead cache expiry should
|
49
|
-
# be based on `fact_enrollment_engagement_day_admin_dash` table. Currently we have
|
50
|
-
# no timestamp in `fact_enrollment_engagement_day_admin_dash` table that can be used
|
51
|
-
# for cache expiry. Add a new column in the table for this purpose and then use that
|
52
|
-
# column for cache expiry.
|
53
|
-
last_updated_at = fetch_max_enrollment_datetime()
|
54
|
-
cache_expiry = (
|
55
|
-
last_updated_at + timedelta(days=1) if last_updated_at else datetime.now()
|
56
|
-
)
|
57
|
-
return cache_expiry
|
58
|
-
|
59
|
-
|
60
|
-
def granularity_aggregation(level, group, date, data_frame, aggregation_type="count"):
|
61
|
-
"""Aggregate data based on granularity"""
|
62
|
-
df = data_frame
|
63
|
-
|
64
|
-
period_mapping = {
|
65
|
-
Granularity.WEEKLY.value: "W",
|
66
|
-
Granularity.MONTHLY.value: "M",
|
67
|
-
Granularity.QUARTERLY.value: "Q"
|
68
|
-
}
|
69
|
-
|
70
|
-
if level in period_mapping:
|
71
|
-
df[date] = df[date].dt.to_period(period_mapping[level]).dt.start_time
|
72
|
-
|
73
|
-
agg_column_name = "count"
|
74
|
-
if aggregation_type == "count":
|
75
|
-
df = df.groupby(group).size().reset_index()
|
76
|
-
elif aggregation_type == "sum":
|
77
|
-
df = df.groupby(group).sum().reset_index()
|
78
|
-
agg_column_name = "sum"
|
79
|
-
|
80
|
-
df.columns = group + [agg_column_name]
|
81
|
-
return df
|
82
|
-
|
83
|
-
|
84
|
-
def calculation_aggregation(calc, data_frame, aggregation_type="count"):
|
85
|
-
"""Aggregate data based on calculation"""
|
86
|
-
df = data_frame
|
87
|
-
|
88
|
-
window_mapping = {
|
89
|
-
Calculation.MOVING_AVERAGE_3_PERIOD.value: 3,
|
90
|
-
Calculation.MOVING_AVERAGE_7_PERIOD.value: 7,
|
91
|
-
}
|
92
|
-
|
93
|
-
aggregation_column = "count" if aggregation_type == "count" else "sum"
|
94
|
-
|
95
|
-
if calc == Calculation.RUNNING_TOTAL.value:
|
96
|
-
df[aggregation_column] = df.groupby("enroll_type")[aggregation_column].cumsum()
|
97
|
-
elif calc in [Calculation.MOVING_AVERAGE_3_PERIOD.value, Calculation.MOVING_AVERAGE_7_PERIOD.value]:
|
98
|
-
df[aggregation_column] = (
|
99
|
-
df.groupby("enroll_type")[aggregation_column]
|
100
|
-
.rolling(window_mapping[calc])
|
101
|
-
.mean()
|
102
|
-
.droplevel(level=[0])
|
103
|
-
)
|
104
|
-
|
105
|
-
return df
|
106
|
-
|
107
|
-
|
108
|
-
def get_cache_timeout(cache_expiry):
|
109
|
-
"""
|
110
|
-
Helper method to calculate cache timeout in seconds.
|
111
|
-
|
112
|
-
Arguments:
|
113
|
-
cache_expiry (datetime): Datetime object denoting the cache expiry.
|
114
|
-
|
115
|
-
Returns:
|
116
|
-
(int): Cache timeout in seconds.
|
117
|
-
"""
|
118
|
-
now = datetime.now()
|
119
|
-
cache_timeout = 0
|
120
|
-
if cache_expiry > now:
|
121
|
-
# Calculate cache expiry in seconds from now.
|
122
|
-
cache_timeout = (cache_expiry - now).seconds
|
123
|
-
|
124
|
-
return cache_timeout
|
125
|
-
|
126
|
-
|
127
|
-
def fetch_and_cache_enrollments_data(enterprise_id, cache_expiry):
|
128
|
-
"""
|
129
|
-
Helper method to fetch and cache enrollments data.
|
130
|
-
|
131
|
-
Arguments:
|
132
|
-
enterprise_id (str): UUID of the enterprise customer in string format.
|
133
|
-
cache_expiry (datetime): Datetime object denoting the cache expiry.
|
134
|
-
|
135
|
-
Returns:
|
136
|
-
(pandas.DataFrame): The enrollments data.
|
137
|
-
"""
|
138
|
-
cache_key = get_cache_key(
|
139
|
-
resource='enterprise-admin-analytics-aggregates-enrollments',
|
140
|
-
enterprise_customer=enterprise_id,
|
141
|
-
)
|
142
|
-
cached_response = TieredCache.get_cached_response(cache_key)
|
143
|
-
|
144
|
-
if cached_response.is_found:
|
145
|
-
LOGGER.info(f"Enrollments data found in cache for Enterprise [{enterprise_id}]")
|
146
|
-
return cached_response.value
|
147
|
-
else:
|
148
|
-
enrollments = fetch_enrollment_data(enterprise_id)
|
149
|
-
TieredCache.set_all_tiers(
|
150
|
-
cache_key, enrollments, get_cache_timeout(cache_expiry)
|
151
|
-
)
|
152
|
-
return enrollments
|
153
|
-
|
154
|
-
|
155
|
-
def fetch_and_cache_engagements_data(enterprise_id, cache_expiry):
|
156
|
-
"""
|
157
|
-
Helper method to fetch and cache engagements data.
|
158
|
-
|
159
|
-
Arguments:
|
160
|
-
enterprise_id (str): UUID of the enterprise customer in string format.
|
161
|
-
cache_expiry (datetime): Datetime object denoting the cache expiry.
|
162
|
-
|
163
|
-
Returns:
|
164
|
-
(pandas.DataFrame): The engagements data.
|
165
|
-
"""
|
166
|
-
cache_key = get_cache_key(
|
167
|
-
resource='enterprise-admin-analytics-aggregates-engagements',
|
168
|
-
enterprise_customer=enterprise_id,
|
169
|
-
)
|
170
|
-
cached_response = TieredCache.get_cached_response(cache_key)
|
171
|
-
|
172
|
-
if cached_response.is_found:
|
173
|
-
LOGGER.info(f"Engagements data found in cache for Enterprise [{enterprise_id}]")
|
174
|
-
return cached_response.value
|
175
|
-
else:
|
176
|
-
engagements = fetch_engagement_data(enterprise_id)
|
177
|
-
TieredCache.set_all_tiers(
|
178
|
-
cache_key, engagements, get_cache_timeout(cache_expiry)
|
179
|
-
)
|
180
|
-
return engagements
|
@@ -1,121 +0,0 @@
|
|
1
|
-
"""Custom paginator for the Advance Analytics API."""
|
2
|
-
|
3
|
-
import math
|
4
|
-
from dataclasses import dataclass
|
5
|
-
from typing import Any
|
6
|
-
|
7
|
-
from rest_framework.exceptions import NotFound
|
8
|
-
from rest_framework.pagination import PageNumberPagination
|
9
|
-
from rest_framework.response import Response
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class Page:
|
14
|
-
"""
|
15
|
-
A class representing a single page of paginated data.
|
16
|
-
|
17
|
-
Attributes:
|
18
|
-
data (Any): The data contained in the current page.
|
19
|
-
count (int): The total number of items across all pages.
|
20
|
-
num_pages (int): The total number of pages.
|
21
|
-
current_page (int): The current page number.
|
22
|
-
"""
|
23
|
-
data: Any
|
24
|
-
count: int
|
25
|
-
num_pages: int
|
26
|
-
current_page: int
|
27
|
-
|
28
|
-
def has_next(self):
|
29
|
-
"""
|
30
|
-
Check if there is a next page.
|
31
|
-
|
32
|
-
Returns:
|
33
|
-
bool: True if there is a next page, False otherwise.
|
34
|
-
"""
|
35
|
-
return self.current_page < self.num_pages
|
36
|
-
|
37
|
-
def has_previous(self):
|
38
|
-
"""
|
39
|
-
Check if there is a previous page.
|
40
|
-
|
41
|
-
Returns:
|
42
|
-
bool: True if there is a previous page, False otherwise.
|
43
|
-
"""
|
44
|
-
return self.current_page > 1
|
45
|
-
|
46
|
-
def next_page_number(self):
|
47
|
-
"""
|
48
|
-
Get the next page number.
|
49
|
-
|
50
|
-
Returns:
|
51
|
-
int: The next page number.
|
52
|
-
"""
|
53
|
-
return self.current_page + 1
|
54
|
-
|
55
|
-
def previous_page_number(self):
|
56
|
-
"""
|
57
|
-
Get the previous page number.
|
58
|
-
|
59
|
-
Returns:
|
60
|
-
int: The previous page number.
|
61
|
-
"""
|
62
|
-
return self.current_page - 1
|
63
|
-
|
64
|
-
|
65
|
-
class AdvanceAnalyticsPagination(PageNumberPagination):
|
66
|
-
"""
|
67
|
-
Custom pagination class for advanced analytics.
|
68
|
-
|
69
|
-
Attributes:
|
70
|
-
page_size_query_param (str): The query parameter for the page size.
|
71
|
-
page_size (int): The default page size.
|
72
|
-
max_page_size (int): The maximum allowed page size.
|
73
|
-
"""
|
74
|
-
page_size_query_param = "page_size"
|
75
|
-
page_size = 50
|
76
|
-
max_page_size = 100
|
77
|
-
|
78
|
-
def paginate_queryset(self, queryset, request, view=None):
|
79
|
-
"""
|
80
|
-
Paginate a given dataframe based on the request parameters.
|
81
|
-
|
82
|
-
Args:
|
83
|
-
queryset (pd.DataFrame): The dataframe to paginate.
|
84
|
-
request (Request): The request object containing query parameters.
|
85
|
-
view (View, optional): The view that is calling the paginator.
|
86
|
-
|
87
|
-
Returns:
|
88
|
-
Page: A Page object. `data` attribute of the object will contain the paginated data.
|
89
|
-
"""
|
90
|
-
dataframe = queryset
|
91
|
-
|
92
|
-
self.request = request # pylint: disable=attribute-defined-outside-init
|
93
|
-
page_size = self.get_page_size(request)
|
94
|
-
if not page_size:
|
95
|
-
return None
|
96
|
-
|
97
|
-
total_rows = dataframe.shape[0]
|
98
|
-
num_pages = math.ceil(total_rows / page_size)
|
99
|
-
|
100
|
-
page_number = int(request.query_params.get(self.page_query_param) or 1)
|
101
|
-
if page_number <= 0 or page_number > num_pages:
|
102
|
-
raise NotFound('Invalid page.')
|
103
|
-
|
104
|
-
start_index = (page_number - 1) * page_size
|
105
|
-
end_index = min(start_index + page_size, total_rows)
|
106
|
-
data_frame_page = dataframe.iloc[start_index:end_index]
|
107
|
-
|
108
|
-
# pylint: disable=attribute-defined-outside-init
|
109
|
-
self.page = Page(data_frame_page, total_rows, num_pages, page_number)
|
110
|
-
|
111
|
-
return self.page
|
112
|
-
|
113
|
-
def get_paginated_response(self, data):
|
114
|
-
return Response({
|
115
|
-
'next': self.get_next_link(),
|
116
|
-
'previous': self.get_previous_link(),
|
117
|
-
'count': self.page.count,
|
118
|
-
'num_pages': self.page.num_pages,
|
119
|
-
'current_page': self.page.current_page,
|
120
|
-
'results': data
|
121
|
-
})
|
@@ -1,102 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Test the utility functions in the admin_analytics app.
|
3
|
-
"""
|
4
|
-
from datetime import datetime, timedelta
|
5
|
-
|
6
|
-
from mock import patch
|
7
|
-
|
8
|
-
from django.test import TestCase
|
9
|
-
|
10
|
-
from enterprise_data.admin_analytics.utils import (
|
11
|
-
fetch_and_cache_engagements_data,
|
12
|
-
fetch_and_cache_enrollments_data,
|
13
|
-
get_cache_timeout,
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
class TestUtils(TestCase):
|
18
|
-
"""
|
19
|
-
Test suite for the utility functions in the admin_analytics package.
|
20
|
-
"""
|
21
|
-
|
22
|
-
def test_get_cache_timeout(self):
|
23
|
-
"""
|
24
|
-
Validate the get_cache_timeout function.
|
25
|
-
"""
|
26
|
-
now = datetime.now().replace(microsecond=0)
|
27
|
-
with patch('enterprise_data.admin_analytics.utils.datetime') as mock_datetime:
|
28
|
-
mock_datetime.now.return_value = now
|
29
|
-
cache_expiry = now
|
30
|
-
self.assertEqual(get_cache_timeout(cache_expiry), 0)
|
31
|
-
|
32
|
-
cache_expiry = now + timedelta(seconds=10)
|
33
|
-
self.assertEqual(get_cache_timeout(cache_expiry), 10)
|
34
|
-
|
35
|
-
cache_expiry = now + timedelta(seconds=100)
|
36
|
-
self.assertEqual(get_cache_timeout(cache_expiry), 100)
|
37
|
-
|
38
|
-
# Validate the case where cache_expiry is in the past.
|
39
|
-
cache_expiry = now - timedelta(seconds=10)
|
40
|
-
self.assertEqual(get_cache_timeout(cache_expiry), 0)
|
41
|
-
|
42
|
-
def test_fetch_and_cache_enrollments_data(self):
|
43
|
-
"""
|
44
|
-
Validate the fetch_and_cache_enrollments_data function.
|
45
|
-
"""
|
46
|
-
with patch('enterprise_data.admin_analytics.utils.fetch_enrollment_data') as mock_fetch_enrollment_data:
|
47
|
-
with patch('enterprise_data.admin_analytics.utils.TieredCache') as mock_tiered_cache:
|
48
|
-
# Simulate the scenario where the data is not found in the cache.
|
49
|
-
mock_tiered_cache.get_cached_response.return_value.is_found = False
|
50
|
-
mock_fetch_enrollment_data.return_value = 'enrollments'
|
51
|
-
|
52
|
-
enrollments = fetch_and_cache_enrollments_data('enterprise_id', datetime.now() + timedelta(seconds=10))
|
53
|
-
self.assertEqual(enrollments, 'enrollments')
|
54
|
-
self.assertEqual(mock_tiered_cache.get_cached_response.call_count, 1)
|
55
|
-
self.assertEqual(mock_tiered_cache.set_all_tiers.call_count, 1)
|
56
|
-
|
57
|
-
def test_fetch_and_cache_enrollments_data_with_data_cache_found(self):
|
58
|
-
"""
|
59
|
-
Validate the fetch_and_cache_enrollments_data function.
|
60
|
-
"""
|
61
|
-
with patch('enterprise_data.admin_analytics.utils.fetch_enrollment_data') as mock_fetch_enrollment_data:
|
62
|
-
with patch('enterprise_data.admin_analytics.utils.TieredCache') as mock_tiered_cache:
|
63
|
-
# Simulate the scenario where the data is found in the cache.
|
64
|
-
mock_tiered_cache.get_cached_response.return_value.is_found = True
|
65
|
-
mock_tiered_cache.get_cached_response.return_value.value = 'cached-enrollments'
|
66
|
-
mock_fetch_enrollment_data.return_value = 'enrollments'
|
67
|
-
|
68
|
-
enrollments = fetch_and_cache_enrollments_data('enterprise_id', datetime.now() + timedelta(seconds=10))
|
69
|
-
self.assertEqual(enrollments, 'cached-enrollments')
|
70
|
-
self.assertEqual(mock_tiered_cache.get_cached_response.call_count, 1)
|
71
|
-
self.assertEqual(mock_tiered_cache.set_all_tiers.call_count, 0)
|
72
|
-
|
73
|
-
def test_fetch_and_cache_engagements_data(self):
|
74
|
-
"""
|
75
|
-
Validate the fetch_and_cache_engagements_data function.
|
76
|
-
"""
|
77
|
-
with patch('enterprise_data.admin_analytics.utils.fetch_engagement_data') as mock_fetch_engagement_data:
|
78
|
-
with patch('enterprise_data.admin_analytics.utils.TieredCache') as mock_tiered_cache:
|
79
|
-
# Simulate the scenario where the data is not found in the cache.
|
80
|
-
mock_tiered_cache.get_cached_response.return_value.is_found = False
|
81
|
-
mock_fetch_engagement_data.return_value = 'engagements'
|
82
|
-
|
83
|
-
enrollments = fetch_and_cache_engagements_data('enterprise_id', datetime.now() + timedelta(seconds=10))
|
84
|
-
self.assertEqual(enrollments, 'engagements')
|
85
|
-
self.assertEqual(mock_tiered_cache.get_cached_response.call_count, 1)
|
86
|
-
self.assertEqual(mock_tiered_cache.set_all_tiers.call_count, 1)
|
87
|
-
|
88
|
-
def test_fetch_and_cache_engagements_data_with_data_cache_found(self):
|
89
|
-
"""
|
90
|
-
Validate the fetch_and_cache_engagements_data function.
|
91
|
-
"""
|
92
|
-
with patch('enterprise_data.admin_analytics.utils.fetch_engagement_data') as mock_fetch_engagement_data:
|
93
|
-
with patch('enterprise_data.admin_analytics.utils.TieredCache') as mock_tiered_cache:
|
94
|
-
# Simulate the scenario where the data is found in the cache.
|
95
|
-
mock_tiered_cache.get_cached_response.return_value.is_found = True
|
96
|
-
mock_tiered_cache.get_cached_response.return_value.value = 'cached-engagements'
|
97
|
-
mock_fetch_engagement_data.return_value = 'engagements'
|
98
|
-
|
99
|
-
enrollments = fetch_and_cache_engagements_data('enterprise_id', datetime.now() + timedelta(seconds=10))
|
100
|
-
self.assertEqual(enrollments, 'cached-engagements')
|
101
|
-
self.assertEqual(mock_tiered_cache.get_cached_response.call_count, 1)
|
102
|
-
self.assertEqual(mock_tiered_cache.set_all_tiers.call_count, 0)
|
File without changes
|
File without changes
|
File without changes
|