edx-enterprise-data 9.1.0__py3-none-any.whl → 9.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {edx_enterprise_data-9.1.0.dist-info → edx_enterprise_data-9.2.0.dist-info}/METADATA +1 -1
  2. {edx_enterprise_data-9.1.0.dist-info → edx_enterprise_data-9.2.0.dist-info}/RECORD +22 -25
  3. enterprise_data/__init__.py +1 -1
  4. enterprise_data/admin_analytics/constants.py +3 -17
  5. enterprise_data/admin_analytics/data_loaders.py +0 -125
  6. enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py +85 -0
  7. enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py +50 -0
  8. enterprise_data/api/v1/serializers.py +1 -38
  9. enterprise_data/api/v1/urls.py +2 -2
  10. enterprise_data/api/v1/views/analytics_completions.py +0 -2
  11. enterprise_data/api/v1/views/analytics_engagements.py +0 -2
  12. enterprise_data/api/v1/views/analytics_enrollments.py +0 -2
  13. enterprise_data/api/v1/views/analytics_leaderboard.py +65 -102
  14. enterprise_data/api/v1/views/enterprise_learner.py +17 -14
  15. enterprise_data/renderers.py +2 -2
  16. enterprise_data/tests/admin_analytics/mock_analytics_data.py +15 -60
  17. enterprise_data/tests/admin_analytics/test_analytics_leaderboard.py +48 -81
  18. enterprise_data/tests/admin_analytics/test_data_loaders.py +1 -58
  19. enterprise_data/utils.py +0 -16
  20. enterprise_data/admin_analytics/utils.py +0 -180
  21. enterprise_data/api/v1/paginators.py +0 -121
  22. enterprise_data/tests/admin_analytics/test_utils.py +0 -102
  23. {edx_enterprise_data-9.1.0.dist-info → edx_enterprise_data-9.2.0.dist-info}/LICENSE +0 -0
  24. {edx_enterprise_data-9.1.0.dist-info → edx_enterprise_data-9.2.0.dist-info}/WHEEL +0 -0
  25. {edx_enterprise_data-9.1.0.dist-info → edx_enterprise_data-9.2.0.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,11 @@
1
1
  """
2
2
  Test the utility functions in the admin_analytics app for data loading operations.
3
3
  """
4
- from uuid import uuid4
5
-
6
- import pytest
7
4
  from mock import patch
8
5
 
9
- from django.http import Http404
10
6
  from django.test import TestCase
11
7
 
12
- from enterprise_data.admin_analytics.data_loaders import (
13
- fetch_engagement_data,
14
- fetch_enrollment_data,
15
- fetch_max_enrollment_datetime,
16
- )
17
- from enterprise_data.tests.test_utils import get_dummy_engagements_data, get_dummy_enrollments_data
8
+ from enterprise_data.admin_analytics.data_loaders import fetch_max_enrollment_datetime
18
9
 
19
10
 
20
11
  class TestDataLoaders(TestCase):
@@ -36,51 +27,3 @@ class TestDataLoaders(TestCase):
36
27
  mock_run_query.return_value = []
37
28
  max_enrollment_date = fetch_max_enrollment_datetime()
38
29
  self.assertIsNone(max_enrollment_date)
39
-
40
- def test_fetch_engagement_data(self):
41
- """
42
- Validate the fetch_engagement_data function.
43
- """
44
- with patch('enterprise_data.admin_analytics.data_loaders.run_query') as mock_run_query:
45
- enterprise_uuid = str(uuid4())
46
- mock_run_query.return_value = [
47
- list(item.values()) for item in get_dummy_engagements_data(enterprise_uuid, 10)
48
- ]
49
-
50
- engagement_data = fetch_engagement_data(enterprise_uuid)
51
- self.assertEqual(engagement_data.shape, (10, 14))
52
-
53
- def test_fetch_engagement_data_empty_data(self):
54
- """
55
- Validate the fetch_engagement_data function behavior when no data is returned from the query.
56
- """
57
- with patch('enterprise_data.admin_analytics.data_loaders.run_query') as mock_run_query:
58
- mock_run_query.return_value = []
59
- enterprise_uuid = str(uuid4())
60
- with pytest.raises(Http404) as error:
61
- fetch_engagement_data(enterprise_uuid)
62
- error.value.message = f'No engagement data found for enterprise {enterprise_uuid}'
63
-
64
- def test_fetch_enrollment_data(self):
65
- """
66
- Validate the fetch_enrollment_data function.
67
- """
68
- with patch('enterprise_data.admin_analytics.data_loaders.run_query') as mock_run_query:
69
- enterprise_uuid = str(uuid4())
70
- mock_run_query.return_value = [
71
- list(item.values()) for item in get_dummy_enrollments_data(enterprise_uuid)
72
- ]
73
-
74
- enrollment_data = fetch_enrollment_data(enterprise_uuid)
75
- self.assertEqual(enrollment_data.shape, (10, 21))
76
-
77
- def test_fetch_enrollment_data_empty_data(self):
78
- """
79
- Validate the fetch_enrollment_data function behavior when no data is returned from the query.
80
- """
81
- with patch('enterprise_data.admin_analytics.data_loaders.run_query') as mock_run_query:
82
- mock_run_query.return_value = []
83
- enterprise_uuid = str(uuid4())
84
- with pytest.raises(Http404) as error:
85
- fetch_enrollment_data(enterprise_uuid)
86
- error.value.message = f'No enrollment data found for enterprise {enterprise_uuid}'
enterprise_data/utils.py CHANGED
@@ -83,22 +83,6 @@ def timer(prefix):
83
83
  LOGGER.info(f"TIMER:: {prefix} took {difference:.20f} seconds")
84
84
 
85
85
 
86
- def date_filter(start, end, data_frame, date_column):
87
- """
88
- Filter a pandas DataFrame by date range.
89
-
90
- Arguments:
91
- start (DatetimeScalar | NaTType | None): The start date.
92
- end (DatetimeScalar | NaTType | None): The end date.
93
- data_frame (pandas.DataFrame): The DataFrame to filter.
94
- date_column (str): The name of the date column.
95
-
96
- Returns:
97
- (pandas.DataFrame): The filtered DataFrame.
98
- """
99
- return data_frame[(start <= data_frame[date_column]) & (data_frame[date_column] <= end)]
100
-
101
-
102
86
  def primary_subject_truncate(x):
103
87
  """
104
88
  Truncate primary subject to a few categories.
@@ -1,180 +0,0 @@
1
- """
2
- Utility functions for fetching data from the database.
3
- """
4
- from datetime import datetime, timedelta
5
- from enum import Enum
6
- from logging import getLogger
7
-
8
- from edx_django_utils.cache import TieredCache, get_cache_key
9
-
10
- from enterprise_data.admin_analytics.constants import Calculation, Granularity
11
- from enterprise_data.admin_analytics.data_loaders import (
12
- fetch_engagement_data,
13
- fetch_enrollment_data,
14
- fetch_max_enrollment_datetime,
15
- )
16
-
17
- LOGGER = getLogger(__name__)
18
-
19
-
20
- class ChartType(Enum):
21
- """
22
- Chart types.
23
- """
24
- COMPLETIONS_OVER_TIME = 'completions_over_time'
25
- TOP_COURSES_BY_COMPLETIONS = 'top_courses_by_completions'
26
- TOP_SUBJECTS_BY_COMPLETIONS = 'top_subjects_by_completions'
27
-
28
-
29
- def fetch_enrollments_cache_expiry_timestamp():
30
- """Calculate cache expiry timestamp"""
31
- # TODO: Implement correct cache expiry logic for `enrollments` data.
32
- # Current cache expiry logic is based on `enterprise_learner_enrollment` table,
33
- # Which has nothing to do with the `enrollments` data. Instead cache expiry should
34
- # be based on `fact_enrollment_admin_dash` table. Currently we have no timestamp in
35
- # `fact_enrollment_admin_dash` table that can be used for cache expiry. Add a new
36
- # column in the table for this purpose and then use that column for cache expiry.
37
- last_updated_at = fetch_max_enrollment_datetime()
38
- cache_expiry = (
39
- last_updated_at + timedelta(days=1) if last_updated_at else datetime.now()
40
- )
41
- return cache_expiry
42
-
43
-
44
- def fetch_engagements_cache_expiry_timestamp():
45
- """Calculate cache expiry timestamp"""
46
- # TODO: Implement correct cache expiry logic for `engagements` data.
47
- # Current cache expiry logic is based on `enterprise_learner_enrollment` table,
48
- # Which has nothing to do with the `engagements` data. Instead cache expiry should
49
- # be based on `fact_enrollment_engagement_day_admin_dash` table. Currently we have
50
- # no timestamp in `fact_enrollment_engagement_day_admin_dash` table that can be used
51
- # for cache expiry. Add a new column in the table for this purpose and then use that
52
- # column for cache expiry.
53
- last_updated_at = fetch_max_enrollment_datetime()
54
- cache_expiry = (
55
- last_updated_at + timedelta(days=1) if last_updated_at else datetime.now()
56
- )
57
- return cache_expiry
58
-
59
-
60
- def granularity_aggregation(level, group, date, data_frame, aggregation_type="count"):
61
- """Aggregate data based on granularity"""
62
- df = data_frame
63
-
64
- period_mapping = {
65
- Granularity.WEEKLY.value: "W",
66
- Granularity.MONTHLY.value: "M",
67
- Granularity.QUARTERLY.value: "Q"
68
- }
69
-
70
- if level in period_mapping:
71
- df[date] = df[date].dt.to_period(period_mapping[level]).dt.start_time
72
-
73
- agg_column_name = "count"
74
- if aggregation_type == "count":
75
- df = df.groupby(group).size().reset_index()
76
- elif aggregation_type == "sum":
77
- df = df.groupby(group).sum().reset_index()
78
- agg_column_name = "sum"
79
-
80
- df.columns = group + [agg_column_name]
81
- return df
82
-
83
-
84
- def calculation_aggregation(calc, data_frame, aggregation_type="count"):
85
- """Aggregate data based on calculation"""
86
- df = data_frame
87
-
88
- window_mapping = {
89
- Calculation.MOVING_AVERAGE_3_PERIOD.value: 3,
90
- Calculation.MOVING_AVERAGE_7_PERIOD.value: 7,
91
- }
92
-
93
- aggregation_column = "count" if aggregation_type == "count" else "sum"
94
-
95
- if calc == Calculation.RUNNING_TOTAL.value:
96
- df[aggregation_column] = df.groupby("enroll_type")[aggregation_column].cumsum()
97
- elif calc in [Calculation.MOVING_AVERAGE_3_PERIOD.value, Calculation.MOVING_AVERAGE_7_PERIOD.value]:
98
- df[aggregation_column] = (
99
- df.groupby("enroll_type")[aggregation_column]
100
- .rolling(window_mapping[calc])
101
- .mean()
102
- .droplevel(level=[0])
103
- )
104
-
105
- return df
106
-
107
-
108
- def get_cache_timeout(cache_expiry):
109
- """
110
- Helper method to calculate cache timeout in seconds.
111
-
112
- Arguments:
113
- cache_expiry (datetime): Datetime object denoting the cache expiry.
114
-
115
- Returns:
116
- (int): Cache timeout in seconds.
117
- """
118
- now = datetime.now()
119
- cache_timeout = 0
120
- if cache_expiry > now:
121
- # Calculate cache expiry in seconds from now.
122
- cache_timeout = (cache_expiry - now).seconds
123
-
124
- return cache_timeout
125
-
126
-
127
- def fetch_and_cache_enrollments_data(enterprise_id, cache_expiry):
128
- """
129
- Helper method to fetch and cache enrollments data.
130
-
131
- Arguments:
132
- enterprise_id (str): UUID of the enterprise customer in string format.
133
- cache_expiry (datetime): Datetime object denoting the cache expiry.
134
-
135
- Returns:
136
- (pandas.DataFrame): The enrollments data.
137
- """
138
- cache_key = get_cache_key(
139
- resource='enterprise-admin-analytics-aggregates-enrollments',
140
- enterprise_customer=enterprise_id,
141
- )
142
- cached_response = TieredCache.get_cached_response(cache_key)
143
-
144
- if cached_response.is_found:
145
- LOGGER.info(f"Enrollments data found in cache for Enterprise [{enterprise_id}]")
146
- return cached_response.value
147
- else:
148
- enrollments = fetch_enrollment_data(enterprise_id)
149
- TieredCache.set_all_tiers(
150
- cache_key, enrollments, get_cache_timeout(cache_expiry)
151
- )
152
- return enrollments
153
-
154
-
155
- def fetch_and_cache_engagements_data(enterprise_id, cache_expiry):
156
- """
157
- Helper method to fetch and cache engagements data.
158
-
159
- Arguments:
160
- enterprise_id (str): UUID of the enterprise customer in string format.
161
- cache_expiry (datetime): Datetime object denoting the cache expiry.
162
-
163
- Returns:
164
- (pandas.DataFrame): The engagements data.
165
- """
166
- cache_key = get_cache_key(
167
- resource='enterprise-admin-analytics-aggregates-engagements',
168
- enterprise_customer=enterprise_id,
169
- )
170
- cached_response = TieredCache.get_cached_response(cache_key)
171
-
172
- if cached_response.is_found:
173
- LOGGER.info(f"Engagements data found in cache for Enterprise [{enterprise_id}]")
174
- return cached_response.value
175
- else:
176
- engagements = fetch_engagement_data(enterprise_id)
177
- TieredCache.set_all_tiers(
178
- cache_key, engagements, get_cache_timeout(cache_expiry)
179
- )
180
- return engagements
@@ -1,121 +0,0 @@
1
- """Custom paginator for the Advance Analytics API."""
2
-
3
- import math
4
- from dataclasses import dataclass
5
- from typing import Any
6
-
7
- from rest_framework.exceptions import NotFound
8
- from rest_framework.pagination import PageNumberPagination
9
- from rest_framework.response import Response
10
-
11
-
12
- @dataclass
13
- class Page:
14
- """
15
- A class representing a single page of paginated data.
16
-
17
- Attributes:
18
- data (Any): The data contained in the current page.
19
- count (int): The total number of items across all pages.
20
- num_pages (int): The total number of pages.
21
- current_page (int): The current page number.
22
- """
23
- data: Any
24
- count: int
25
- num_pages: int
26
- current_page: int
27
-
28
- def has_next(self):
29
- """
30
- Check if there is a next page.
31
-
32
- Returns:
33
- bool: True if there is a next page, False otherwise.
34
- """
35
- return self.current_page < self.num_pages
36
-
37
- def has_previous(self):
38
- """
39
- Check if there is a previous page.
40
-
41
- Returns:
42
- bool: True if there is a previous page, False otherwise.
43
- """
44
- return self.current_page > 1
45
-
46
- def next_page_number(self):
47
- """
48
- Get the next page number.
49
-
50
- Returns:
51
- int: The next page number.
52
- """
53
- return self.current_page + 1
54
-
55
- def previous_page_number(self):
56
- """
57
- Get the previous page number.
58
-
59
- Returns:
60
- int: The previous page number.
61
- """
62
- return self.current_page - 1
63
-
64
-
65
- class AdvanceAnalyticsPagination(PageNumberPagination):
66
- """
67
- Custom pagination class for advanced analytics.
68
-
69
- Attributes:
70
- page_size_query_param (str): The query parameter for the page size.
71
- page_size (int): The default page size.
72
- max_page_size (int): The maximum allowed page size.
73
- """
74
- page_size_query_param = "page_size"
75
- page_size = 50
76
- max_page_size = 100
77
-
78
- def paginate_queryset(self, queryset, request, view=None):
79
- """
80
- Paginate a given dataframe based on the request parameters.
81
-
82
- Args:
83
- queryset (pd.DataFrame): The dataframe to paginate.
84
- request (Request): The request object containing query parameters.
85
- view (View, optional): The view that is calling the paginator.
86
-
87
- Returns:
88
- Page: A Page object. `data` attribute of the object will contain the paginated data.
89
- """
90
- dataframe = queryset
91
-
92
- self.request = request # pylint: disable=attribute-defined-outside-init
93
- page_size = self.get_page_size(request)
94
- if not page_size:
95
- return None
96
-
97
- total_rows = dataframe.shape[0]
98
- num_pages = math.ceil(total_rows / page_size)
99
-
100
- page_number = int(request.query_params.get(self.page_query_param) or 1)
101
- if page_number <= 0 or page_number > num_pages:
102
- raise NotFound('Invalid page.')
103
-
104
- start_index = (page_number - 1) * page_size
105
- end_index = min(start_index + page_size, total_rows)
106
- data_frame_page = dataframe.iloc[start_index:end_index]
107
-
108
- # pylint: disable=attribute-defined-outside-init
109
- self.page = Page(data_frame_page, total_rows, num_pages, page_number)
110
-
111
- return self.page
112
-
113
- def get_paginated_response(self, data):
114
- return Response({
115
- 'next': self.get_next_link(),
116
- 'previous': self.get_previous_link(),
117
- 'count': self.page.count,
118
- 'num_pages': self.page.num_pages,
119
- 'current_page': self.page.current_page,
120
- 'results': data
121
- })
@@ -1,102 +0,0 @@
1
- """
2
- Test the utility functions in the admin_analytics app.
3
- """
4
- from datetime import datetime, timedelta
5
-
6
- from mock import patch
7
-
8
- from django.test import TestCase
9
-
10
- from enterprise_data.admin_analytics.utils import (
11
- fetch_and_cache_engagements_data,
12
- fetch_and_cache_enrollments_data,
13
- get_cache_timeout,
14
- )
15
-
16
-
17
- class TestUtils(TestCase):
18
- """
19
- Test suite for the utility functions in the admin_analytics package.
20
- """
21
-
22
- def test_get_cache_timeout(self):
23
- """
24
- Validate the get_cache_timeout function.
25
- """
26
- now = datetime.now().replace(microsecond=0)
27
- with patch('enterprise_data.admin_analytics.utils.datetime') as mock_datetime:
28
- mock_datetime.now.return_value = now
29
- cache_expiry = now
30
- self.assertEqual(get_cache_timeout(cache_expiry), 0)
31
-
32
- cache_expiry = now + timedelta(seconds=10)
33
- self.assertEqual(get_cache_timeout(cache_expiry), 10)
34
-
35
- cache_expiry = now + timedelta(seconds=100)
36
- self.assertEqual(get_cache_timeout(cache_expiry), 100)
37
-
38
- # Validate the case where cache_expiry is in the past.
39
- cache_expiry = now - timedelta(seconds=10)
40
- self.assertEqual(get_cache_timeout(cache_expiry), 0)
41
-
42
- def test_fetch_and_cache_enrollments_data(self):
43
- """
44
- Validate the fetch_and_cache_enrollments_data function.
45
- """
46
- with patch('enterprise_data.admin_analytics.utils.fetch_enrollment_data') as mock_fetch_enrollment_data:
47
- with patch('enterprise_data.admin_analytics.utils.TieredCache') as mock_tiered_cache:
48
- # Simulate the scenario where the data is not found in the cache.
49
- mock_tiered_cache.get_cached_response.return_value.is_found = False
50
- mock_fetch_enrollment_data.return_value = 'enrollments'
51
-
52
- enrollments = fetch_and_cache_enrollments_data('enterprise_id', datetime.now() + timedelta(seconds=10))
53
- self.assertEqual(enrollments, 'enrollments')
54
- self.assertEqual(mock_tiered_cache.get_cached_response.call_count, 1)
55
- self.assertEqual(mock_tiered_cache.set_all_tiers.call_count, 1)
56
-
57
- def test_fetch_and_cache_enrollments_data_with_data_cache_found(self):
58
- """
59
- Validate the fetch_and_cache_enrollments_data function.
60
- """
61
- with patch('enterprise_data.admin_analytics.utils.fetch_enrollment_data') as mock_fetch_enrollment_data:
62
- with patch('enterprise_data.admin_analytics.utils.TieredCache') as mock_tiered_cache:
63
- # Simulate the scenario where the data is found in the cache.
64
- mock_tiered_cache.get_cached_response.return_value.is_found = True
65
- mock_tiered_cache.get_cached_response.return_value.value = 'cached-enrollments'
66
- mock_fetch_enrollment_data.return_value = 'enrollments'
67
-
68
- enrollments = fetch_and_cache_enrollments_data('enterprise_id', datetime.now() + timedelta(seconds=10))
69
- self.assertEqual(enrollments, 'cached-enrollments')
70
- self.assertEqual(mock_tiered_cache.get_cached_response.call_count, 1)
71
- self.assertEqual(mock_tiered_cache.set_all_tiers.call_count, 0)
72
-
73
- def test_fetch_and_cache_engagements_data(self):
74
- """
75
- Validate the fetch_and_cache_engagements_data function.
76
- """
77
- with patch('enterprise_data.admin_analytics.utils.fetch_engagement_data') as mock_fetch_engagement_data:
78
- with patch('enterprise_data.admin_analytics.utils.TieredCache') as mock_tiered_cache:
79
- # Simulate the scenario where the data is not found in the cache.
80
- mock_tiered_cache.get_cached_response.return_value.is_found = False
81
- mock_fetch_engagement_data.return_value = 'engagements'
82
-
83
- enrollments = fetch_and_cache_engagements_data('enterprise_id', datetime.now() + timedelta(seconds=10))
84
- self.assertEqual(enrollments, 'engagements')
85
- self.assertEqual(mock_tiered_cache.get_cached_response.call_count, 1)
86
- self.assertEqual(mock_tiered_cache.set_all_tiers.call_count, 1)
87
-
88
- def test_fetch_and_cache_engagements_data_with_data_cache_found(self):
89
- """
90
- Validate the fetch_and_cache_engagements_data function.
91
- """
92
- with patch('enterprise_data.admin_analytics.utils.fetch_engagement_data') as mock_fetch_engagement_data:
93
- with patch('enterprise_data.admin_analytics.utils.TieredCache') as mock_tiered_cache:
94
- # Simulate the scenario where the data is found in the cache.
95
- mock_tiered_cache.get_cached_response.return_value.is_found = True
96
- mock_tiered_cache.get_cached_response.return_value.value = 'cached-engagements'
97
- mock_fetch_engagement_data.return_value = 'engagements'
98
-
99
- enrollments = fetch_and_cache_engagements_data('enterprise_id', datetime.now() + timedelta(seconds=10))
100
- self.assertEqual(enrollments, 'cached-engagements')
101
- self.assertEqual(mock_tiered_cache.get_cached_response.call_count, 1)
102
- self.assertEqual(mock_tiered_cache.set_all_tiers.call_count, 0)