mozdetect 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mozdetect might be problematic. Click here for more details.

mozdetect/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
+
5
+ from mozdetect.data import TelemetryTimeSeries, TimeSeries
6
+ from mozdetect.detectors import get_detectors
7
+ from mozdetect.timeseries_detectors import get_timeseries_detectors
8
+
9
+ __all__ = [
10
+ "get_detectors",
11
+ "get_timeseries_detectors",
12
+ "TelemetryTimeSeries",
13
+ "TimeSeries",
14
+ ]
mozdetect/data.py ADDED
@@ -0,0 +1,371 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
+ import json
5
+ import numpy as np
6
+ import pandas
7
+ import traceback
8
+
9
+ from collections import Counter
10
+ from datetime import datetime, timedelta
11
+
12
+
13
+ class InvalidNumberError(Exception):
14
+ """Raised when an invalid number of points is requested."""
15
+
16
+ pass
17
+
18
+
19
+ class UnknownDataTypeError(Exception):
20
+ """Raised when an unknown data type is requested."""
21
+
22
+ pass
23
+
24
+
25
+ class TimeSeries:
26
+ """Represents a time series composed of Datum objects.
27
+
28
+ Primarily a wrapper around `pandas.DataFrame`. It provides some additional
29
+ helper functions to make it simpler to iterate over the data, and provide
30
+ a common interface for all detectors to use.
31
+
32
+ The pandas.DataFrame was chosen for this so that strings could be included
33
+ in the dataset alongside numerical input. It also easily allows multidimensional
34
+ data to be used.
35
+ """
36
+
37
+ def __init__(self, data, data_type="all", **kwargs):
38
+ """Initializes the time series.
39
+
40
+ :param list data: A list of tuples representing the time series.
41
+ :param str/list data_type: The data type that should be iterated over. See
42
+ `set_data_type` for more information.
43
+ :param kwargs: A set of options that can be used to finetune the
44
+ options passed into the pandas DataFrame creation.
45
+ """
46
+ self._original_data = self._prepare_data(data, **kwargs)
47
+ self._iteration_data = self._original_data
48
+ self._data_type = "all"
49
+
50
+ self.numerical_data = self.get_numerical_columns()
51
+ self.nonnumerical_data = self.get_nonnumerical_columns()
52
+
53
+ # Only set data type after getting the numberical, and
54
+ # non-numerical columns from the original data
55
+ self.set_data_type(data_type)
56
+
57
+ self._currind = 0
58
+ self._curr = None
59
+
60
+ def __iter__(self):
61
+ """Helper method to iterate over the timeseries.
62
+
63
+ :return DataFrame: A row in the DataFrame.
64
+ """
65
+ for index, row in self.data.iterrows():
66
+ self._currind = index
67
+ self._curr = pandas.DataFrame(row).T
68
+
69
+ yield self._curr
70
+
71
+ @property
72
+ def data(self):
73
+ return self._iteration_data
74
+
75
+ def _prepare_data(self, data, **kwargs):
76
+ """Formats the data into a pandas DataFrame for detectors.
77
+
78
+ Note that since the DataFrame doesn't have restrictions on types,
79
+ multiple types can be combined into a single tuple, e.g. (1, 2, "h")
80
+ is valid. As is the following when data is missing: [(1, 2), (3, 4, 5)]
81
+
82
+ :param list data: A list of tuples representing the time series.
83
+ :param kwargs: A set of options that can be used to finetune the
84
+ options passed into the pandas DataFrame creation.
85
+
86
+ :return DataFrame: The data converted to a pandas DataFrame.
87
+ """
88
+ return pandas.DataFrame(data=data, **kwargs)
89
+
90
+ def set_data_type(self, data_type):
91
+ """Used to set the data type to iterate over.
92
+
93
+ By default, all the data will be iterated over. If "numerical" is passed
94
+ here, then only the numerical data will be returned in all methods. If
95
+ "non-numerical" is passed, then only the non-numerical data will be returned.
96
+ This can be reset to all data by passing "all". A custom type may also
97
+ be passed for special returns.
98
+
99
+ :param str/list data_type: Either "numerical", "non-numerical", or "all" to
100
+ denote the type of data. Alternatively, pass a list of custom types to get
101
+ alternative data.
102
+ """
103
+ self._data_type = data_type
104
+ if isinstance(self._data_type, str):
105
+ if self._data_type == "numerical":
106
+ self._iteration_data = self.numerical_data
107
+ elif self._data_type == "non-numerical":
108
+ self._iteration_data = self.nonnumerical_data
109
+ elif self._data_type == "all":
110
+ self._iteration_data = self._original_data
111
+ else:
112
+ raise UnknownDataTypeError(
113
+ f"Unknown data type requested for iteration: {self._data_type}"
114
+ )
115
+ elif isinstance(self._data_type, list):
116
+ try:
117
+ self._iteration_data = self._original_data.select_dtypes(include=self._data_type)
118
+ except Exception:
119
+ raise UnknownDataTypeError(
120
+ f"Failed to get custom data types for {str(self._data_type)}:"
121
+ f" {traceback.format_exc()}"
122
+ )
123
+ else:
124
+ raise UnknownDataTypeError("Expecting list or str as type for data type.")
125
+
126
+ def get_current(self):
127
+ """Returns the current datum being analyzed.
128
+
129
+ :return DataFrame: The row at the current position in the DataFrame.
130
+ """
131
+ if self._curr is None:
132
+ self._curr = self.data.iloc[[self._currind]]
133
+ return self._curr
134
+
135
+ def get_next_n(self, n, inclusive=False):
136
+ """Returns the next `n` data points in the time series.
137
+
138
+ This methods is exclusive, and doesn't include the current data point
139
+ that is being analyzed.
140
+
141
+ :param int n: The number of data points to get.
142
+
143
+ :return DataFrame: The number of requested rows if they exist. If the
144
+ current position is at the end of the timeseries, then nothing will
145
+ be returned.
146
+ """
147
+ if n <= 0:
148
+ raise InvalidNumberError("Number of data points must be greater than 0.")
149
+
150
+ start_ind = self._currind + 1
151
+ end_ind = self._currind + n + 1
152
+ if inclusive:
153
+ start_ind = start_ind - 1
154
+ end_ind = end_ind - 1
155
+
156
+ return self.data.iloc[start_ind:end_ind]
157
+
158
+ def get_previous_n(self, n, inclusive=False):
159
+ """Returns the previous `n` data points in the time series.
160
+
161
+ This methods is exclusive, and doesn't include the current data point
162
+ that is being analyzed.
163
+
164
+ :param int n: The number of data points to get.
165
+
166
+ :return DataFrame: The number of requested rows if they exist. If the
167
+ current position is at the beginning of the timeseries, then nothing will
168
+ be returned.
169
+ """
170
+ if n <= 0:
171
+ raise InvalidNumberError("Number of data points must be greater than 0.")
172
+
173
+ start_ind = max(self._currind - n, 0)
174
+ end_ind = self._currind
175
+ if inclusive:
176
+ if self._currind == 0:
177
+ start_ind = 0
178
+ else:
179
+ start_ind = start_ind + 1
180
+ end_ind = end_ind + 1
181
+
182
+ return self.data.iloc[start_ind:end_ind]
183
+
184
+ def get_numerical_columns(self):
185
+ """Returns the data but with only numerical columns.
186
+
187
+ :return DataFrame: Returns the data with non-numerical columns removed.
188
+ """
189
+ return self.data.select_dtypes(include=[np.number])
190
+
191
+ def get_nonnumerical_columns(self):
192
+ """Returns the data but with only non-numerical columns.
193
+
194
+ :return DataFrame: Returns the data with numerical columns removed.
195
+ """
196
+ return self.data.select_dtypes(exclude=[np.number])
197
+
198
+
199
+ class TelemetryTimeSeries:
200
+ """Provides additional telemetry-specific methods, and exposes the
201
+ raw data in the `raw_data` attribute to provide more customization.
202
+ """
203
+
204
+ def __init__(self, data, *args, **kwargs):
205
+ self.raw_data = data
206
+ self.cumulative_by_day_histograms = pandas.DataFrame()
207
+ self.cumulative_multiday_histograms = pandas.DataFrame()
208
+
209
+ def _build_id_to_date(self, build_id):
210
+ """Returns a build ID converted into a datetime.date.
211
+
212
+ :param str build_id: The build ID to convert.
213
+
214
+ :return datetime.date: The date that was parsed from the build ID.
215
+ """
216
+ if len(build_id) == 10:
217
+ return datetime.strptime(build_id, "%Y%m%d%H").date()
218
+ else:
219
+ return datetime.strptime(build_id, "%Y%m%d%H%M%S").date()
220
+
221
+ def get_percentiles_over_time(self, histograms=None):
222
+ """Returns the percentiles over time from the telemetry data."""
223
+
224
+ def _extract_percentiles(group):
225
+ filtered = group[group["cdf"] > 0.5]
226
+ retval = filtered.iloc[0][["date"]]
227
+ retval["p50"] = filtered.iloc[0]["bin"]
228
+ retval["p75"] = group[group["cdf"] > 0.75].iloc[0]["bin"]
229
+ retval["p95"] = group[group["cdf"] > 0.95].iloc[0]["bin"]
230
+ return retval
231
+
232
+ if histograms is None:
233
+ if not self.cumulative_by_day_histograms.empty:
234
+ histograms = self.cumulative_by_day_histograms
235
+ else:
236
+ histograms = self.raw_data
237
+
238
+ return (
239
+ histograms.groupby("date", as_index=False)[["date", "bin", "cdf"]]
240
+ .apply(_extract_percentiles)
241
+ .dropna()
242
+ .reset_index(drop=True)
243
+ )
244
+
245
+ def get_cumulative_by_day(self, start_date=None, end_date=None):
246
+ """Returns the data downsampled into a per-day granularity.
247
+
248
+ :param datetime.date start_date: The start date to downsample from.
249
+ :param datetime.date end_date: The end date to downsample to.
250
+
251
+ :return pandas.DataFrame: A pandas.DateFrame of the cumulative histograms at a per-day
252
+ granularity. Can also be obtained from `self.cumulative_histograms`.
253
+ """
254
+ last_start_row = 0
255
+ current_date = start_date or self._build_id_to_date(self.raw_data.iloc[0].build_id)
256
+ end_date = end_date or self._build_id_to_date(self.raw_data.iloc[-1].build_id)
257
+
258
+ while current_date <= end_date:
259
+ # Find the start date location
260
+ # TODO: get rid of while True loops
261
+ while True:
262
+ current_row = self.raw_data.iloc[last_start_row]
263
+ current_row_date = self._build_id_to_date(current_row.build_id)
264
+ if current_row_date < current_date:
265
+ last_start_row += 1
266
+ else:
267
+ break
268
+
269
+ # Gather the data from builds in the current day
270
+ summed_histograms_raw = Counter()
271
+ current_row_iloc = last_start_row
272
+ while current_row_iloc < len(self.raw_data):
273
+ current_row = self.raw_data.iloc[current_row_iloc]
274
+ current_row_date = self._build_id_to_date(current_row.build_id)
275
+ if current_row_date > current_date:
276
+ break
277
+ if not current_row.non_norm_histogram:
278
+ current_row_iloc += 1
279
+ continue
280
+
281
+ summed_histograms_raw.update(json.loads(current_row.non_norm_histogram))
282
+ current_row_iloc += 1
283
+
284
+ summed_histogram = pandas.DataFrame(
285
+ list(summed_histograms_raw.items()), columns=["bin", "count"]
286
+ )
287
+ if summed_histogram.empty:
288
+ current_date += timedelta(days=1)
289
+ continue
290
+
291
+ # Produce the cumulative histogram for this day
292
+ summed_histogram["bin"] = summed_histogram["bin"].astype(int)
293
+ summed_histogram["date"] = current_date
294
+
295
+ summed_histogram = summed_histogram.sort_values(by="bin").reset_index(drop=True)
296
+ summed_histogram["cumulative"] = summed_histogram["count"].cumsum()
297
+ summed_histogram["cdf"] = (
298
+ summed_histogram["cumulative"] / summed_histogram["count"].sum()
299
+ )
300
+
301
+ self.cumulative_by_day_histograms = pandas.concat(
302
+ [summed_histogram.fillna(0), self.cumulative_by_day_histograms], ignore_index=True
303
+ )
304
+ current_date += timedelta(days=1)
305
+
306
+ return self.cumulative_by_day_histograms
307
+
308
+ def get_multiday_average(self, days=7):
309
+ """Produces a multiday average of the cumulative per-day histograms using
310
+ a rolling window.
311
+
312
+ :param int days: The number of days to average together.
313
+
314
+ :return pandas.DataFrame: The data with each point being a multiday average instead
315
+ of only a single day.
316
+ """
317
+ if self.cumulative_by_day_histograms is None:
318
+ raise Exception(
319
+ "get_multiday_average expects get_cumulative_by_day to be called first."
320
+ )
321
+
322
+ start_date = self.cumulative_by_day_histograms["date"].min()
323
+ end_date = self.cumulative_by_day_histograms["date"].max()
324
+
325
+ # Initialize the current sum with the start date
326
+ current_date = start_date
327
+ current_sum = self.cumulative_by_day_histograms[
328
+ self.cumulative_by_day_histograms["date"] == current_date
329
+ ][["bin", "count"]].reset_index(drop=True)
330
+
331
+ # Gather the first set of days-1 into a sum
332
+ # TODO use walrus here
333
+ current_date += timedelta(days=1)
334
+ while current_date < start_date + timedelta(days=days - 1):
335
+ current_sum["count"] += self.cumulative_by_day_histograms[
336
+ self.cumulative_by_day_histograms["date"] == current_date
337
+ ][["bin", "count"]].reset_index(drop=True)["count"]
338
+ current_date += timedelta(days=1)
339
+
340
+ # Add one day at a time to the current_sum, and remove the last day from it
341
+ # after adding the current_sum to the multiday_window data
342
+ # TODO use walrus here
343
+ current_date = start_date
344
+ date_to_add = current_date + timedelta(days=days - 1)
345
+ while date_to_add <= end_date:
346
+ # Add new date data if it has data
347
+ hist_to_add = self.cumulative_by_day_histograms[
348
+ self.cumulative_by_day_histograms["date"] == date_to_add
349
+ ][["bin", "count"]].reset_index(drop=True)["count"]
350
+ if not hist_to_add.empty:
351
+ current_sum["count"] += hist_to_add
352
+
353
+ # Calculate new CDF, and add to multiday_window
354
+ current_sum["date"] = current_date
355
+ current_sum["cumulative"] = current_sum["count"].cumsum()
356
+ current_sum["cdf"] = current_sum["cumulative"] / current_sum["count"].sum()
357
+ self.cumulative_multiday_histograms = pandas.concat(
358
+ [current_sum.copy(), self.cumulative_multiday_histograms], ignore_index=True
359
+ )
360
+
361
+ # Remove current date from current_sum if it had data
362
+ hist_to_remove = self.cumulative_by_day_histograms[
363
+ self.cumulative_by_day_histograms["date"] == current_date
364
+ ][["bin", "count"]].reset_index(drop=True)["count"]
365
+ if not hist_to_remove.empty:
366
+ current_sum["count"] -= hist_to_remove
367
+
368
+ current_date += timedelta(days=1)
369
+ date_to_add += timedelta(days=1)
370
+
371
+ return self.cumulative_multiday_histograms
@@ -0,0 +1,13 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
+
5
+ from mozdetect.detectors.base import BaseDetector
6
+ from mozdetect.detectors.cdf import CDFDetector
7
+ from mozdetect.detectors.cdf_squared import CDFSquaredDetector
8
+
9
+ DETECTORS = {"base": BaseDetector, "cdf": CDFDetector, "cdf_squared": CDFSquaredDetector}
10
+
11
+
12
+ def get_detectors():
13
+ return DETECTORS
@@ -0,0 +1,39 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
+
5
+
6
+ class BaseDetector:
7
+ """Base class for all group detectors."""
8
+
9
+ def __init__(self, groups=None, **kwargs):
10
+ """Initialize the detector.
11
+
12
+ :param list groups: A list of DataFrame objects to compare between.
13
+ Generally expected for there to be TWO groups to compare, but it's
14
+ possible to have multiple to do a cross-comparison (assuming the
15
+ detector supports this).
16
+ """
17
+ self.groups = groups
18
+
19
+ def _coalesce_groups(self, groups):
20
+ """Used to determine the groups to compare.
21
+
22
+ The groups passed as an argument have a higher priority than the groups
23
+ used to initialize the detector.
24
+
25
+ :param list groups: A list of groups to compare or None.
26
+ :return list: The groups that should be compared.
27
+ """
28
+ if not groups:
29
+ if not self.groups:
30
+ raise ValueError("Groups to compare have not been specified.")
31
+ return self.groups
32
+ return groups
33
+
34
+ def detect_changes(self, groups=None, **kwargs):
35
+ """Detect changes between two groups of data points.
36
+
37
+ :param list groups: A list of the groups to compare.
38
+ """
39
+ pass
@@ -0,0 +1,60 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
+ import numpy as np
5
+
6
+
7
+ from mozdetect.detectors.base import BaseDetector
8
+
9
+
10
+ class CDFDetector(BaseDetector):
11
+ """Uses the CDF of two groups of histograms to detect changes."""
12
+
13
+ def _calculate_cdf(self, group):
14
+ cdf = []
15
+ currsum = 0
16
+ for col, values in group.items():
17
+ currsum += values[0]
18
+ cdf.append(currsum)
19
+ cdf = list((np.asarray(cdf) / cdf[-1]) * 100)
20
+ return cdf
21
+
22
+ def _calculate_cdf_difference(self, cdf_a, cdf_b):
23
+ diff = 0
24
+ for val_a, val_b in zip(cdf_a, cdf_b):
25
+ diff += abs(val_a - val_b)
26
+ return diff
27
+
28
+ def _calculate_cdf_area_difference(self, cdf_a, cdf_b):
29
+ z = np.asarray(cdf_a) - np.asarray(cdf_b)
30
+ x = np.arange(len(cdf_a))
31
+
32
+ # Determine intersection points of the CDFs
33
+ dx = x[1:] - x[:-1]
34
+ cross_test = np.sign(z[:-1] * z[1:])
35
+ dx_intersect = -dx / (z[1:] - z[:-1]) * z[:-1]
36
+
37
+ # Find areas of positive, and negative areas
38
+ areas_pos = abs(z[:-1] + z[1:]) * 0.5 * dx
39
+ areas_neg = 0.5 * dx_intersect * abs(z[:-1]) + 0.5 * (dx - dx_intersect) * abs(z[1:])
40
+
41
+ # Find the total area between the curves
42
+ areas = np.where(cross_test < 0, areas_neg, areas_pos)
43
+ total_area = np.sum(areas)
44
+
45
+ return total_area
46
+
47
+ def detect_changes(self, groups=None, **kwargs):
48
+ groups = self._coalesce_groups(groups)
49
+ group_a = groups[0]
50
+ group_b = groups[1]
51
+
52
+ cdf_a = self._calculate_cdf(group_a)
53
+ cdf_b = self._calculate_cdf(group_b)
54
+
55
+ diff = self._calculate_cdf_difference(cdf_a, cdf_b)
56
+
57
+ total_diff = len(cdf_a) * 100
58
+ diff_pct = (diff / total_diff) * 100
59
+
60
+ return diff, diff_pct, total_diff
@@ -0,0 +1,33 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
+ import pandas
5
+
6
+ from mozdetect.detectors.base import BaseDetector
7
+
8
+
9
+ class CDFSquaredDetector(BaseDetector):
10
+ """Uses the CDF of two groups of histograms to detect changes."""
11
+
12
+ def detect_changes(self, groups=None, **kwargs):
13
+ """Obtain the difference between two CDFs using a squared diff method.
14
+
15
+ Two data points are expected in the groups, with a CDF defined in both of them.
16
+ """
17
+ current_date_hist = groups[0]
18
+ next_date_hist = groups[1]
19
+
20
+ merged_hist = pandas.merge(
21
+ current_date_hist, next_date_hist, on="bin", suffixes=("_current", "_next")
22
+ )
23
+ merged_hist["diff"] = merged_hist["cdf_current"] - merged_hist["cdf_next"]
24
+ merged_hist["sq_diff"] = (merged_hist["cdf_next"] - merged_hist["cdf_current"]) ** 2
25
+
26
+ return {
27
+ "diff": [merged_hist["diff"].sum()],
28
+ "sq_diff": [
29
+ merged_hist["sq_diff"].sum()
30
+ * ((merged_hist["diff"].sum()) / abs(merged_hist["diff"].sum()))
31
+ ],
32
+ "maxmin": (abs(merged_hist["diff"].min()) + abs(merged_hist["diff"].max())),
33
+ }
@@ -0,0 +1,111 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
+
5
+ from datetime import datetime
6
+ from google.cloud import bigquery
7
+
8
+
9
+ class BigQueryClient:
10
+ client = None
11
+
12
+ def __init__(self, project="moz-fx-data-bq-performance"):
13
+ if BigQueryClient.client is None:
14
+ BigQueryClient.initialize_bq_client()
15
+
16
+ @classmethod
17
+ def initialize_bq_client(cls, project="moz-fx-data-bq-performance"):
18
+ BigQueryClient.client = bigquery.Client()
19
+
20
+
21
+ def get_years_to_query():
22
+ current_year = datetime.now().year
23
+ next_year = current_year + 1
24
+ previous_year = current_year - 1
25
+ return previous_year, current_year, next_year
26
+
27
+
28
+ def _get_android_metric_table(probe):
29
+ previous_year, current_year, next_year = get_years_to_query()
30
+
31
+ job = BigQueryClient.client.query(
32
+ f"""
33
+ SELECT *
34
+ FROM
35
+ moz-fx-data-shared-prod.glam_etl.glam_fenix_nightly_aggregates
36
+ WHERE
37
+ metric = '{probe}'
38
+ AND ping_type = 'metrics'
39
+ AND os = 'Android'
40
+ AND build_id != '*'
41
+ AND (
42
+ build_id like '{previous_year}%'
43
+ OR build_id like '{current_year}%'
44
+ OR build_id like '{next_year}%'
45
+ )
46
+ """
47
+ )
48
+ return job.to_dataframe()
49
+
50
+
51
+ def _get_non_fog_desktop_metric_table(probe, process, os):
52
+ previous_year, current_year, next_year = get_years_to_query()
53
+
54
+ job = BigQueryClient.client.query(
55
+ f"""
56
+ SELECT *
57
+ FROM
58
+ moz-fx-data-shared-prod.glam_etl.glam_desktop_nightly_aggregates
59
+ WHERE
60
+ metric ='{probe}'
61
+ AND process = "{process}"
62
+ AND os = '{os}'
63
+ AND build_id != "*"
64
+ AND (
65
+ build_id like '{previous_year}%'
66
+ OR build_id like '{current_year}%'
67
+ OR build_id like '{next_year}%'
68
+ )
69
+ ORDER BY build_id
70
+ """
71
+ )
72
+
73
+ return job.to_dataframe()
74
+
75
+
76
+ def _get_fog_desktop_metric_table(probe, os):
77
+ previous_year, current_year, next_year = get_years_to_query()
78
+
79
+ job = BigQueryClient.client.query(
80
+ f"""
81
+ SELECT *
82
+ FROM
83
+ moz-fx-data-shared-prod.glam_etl.glam_fog_nightly_aggregates
84
+ WHERE
85
+ metric ='{probe}'
86
+ AND ping_type = "*"
87
+ AND os = '{os}'
88
+ AND build_id != "*"
89
+ AND (
90
+ build_id like '{previous_year}%'
91
+ OR build_id like '{current_year}%'
92
+ OR build_id like '{next_year}%'
93
+ )
94
+ ORDER BY build_id
95
+ """
96
+ )
97
+ return job.to_dataframe()
98
+
99
+
100
+ def get_metric_table(probe, os, process=None, android=False, use_fog=False):
101
+ BigQueryClient()
102
+
103
+ print("Running query...")
104
+ if android:
105
+ return _get_android_metric_table(probe, os, process)
106
+ elif not use_fog:
107
+ if process is None:
108
+ raise ValueError("Missing process argument for non-fog telemetry probes.")
109
+ return _get_non_fog_desktop_metric_table(probe, process, os)
110
+ else:
111
+ return _get_fog_desktop_metric_table(probe, os)
@@ -0,0 +1,17 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
+
5
+ from mozdetect.timeseries_detectors.base import BaseTimeSeriesDetector
6
+ from mozdetect.timeseries_detectors.cdf import CDFTimeSeriesDetector
7
+ from mozdetect.timeseries_detectors.cdf_squared import CDFSquaredTimeSeriesDetector
8
+
9
+ TIMESERIES_DETECTORS = {
10
+ "base": BaseTimeSeriesDetector,
11
+ "cdf": CDFTimeSeriesDetector,
12
+ "cdf_squared": CDFSquaredTimeSeriesDetector,
13
+ }
14
+
15
+
16
+ def get_timeseries_detectors():
17
+ return TIMESERIES_DETECTORS