oracle-ads 2.11.6__py3-none-any.whl → 2.11.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +24 -14
- ads/aqua/base.py +0 -2
- ads/aqua/cli.py +50 -2
- ads/aqua/decorator.py +8 -0
- ads/aqua/deployment.py +37 -34
- ads/aqua/evaluation.py +106 -49
- ads/aqua/extension/base_handler.py +18 -10
- ads/aqua/extension/common_handler.py +21 -2
- ads/aqua/extension/deployment_handler.py +1 -4
- ads/aqua/extension/evaluation_handler.py +1 -2
- ads/aqua/extension/finetune_handler.py +0 -1
- ads/aqua/extension/ui_handler.py +1 -12
- ads/aqua/extension/utils.py +4 -4
- ads/aqua/finetune.py +24 -11
- ads/aqua/model.py +2 -4
- ads/aqua/utils.py +39 -23
- ads/catalog/model.py +3 -3
- ads/catalog/notebook.py +3 -3
- ads/catalog/project.py +2 -2
- ads/catalog/summary.py +2 -4
- ads/cli.py +21 -2
- ads/common/serializer.py +5 -4
- ads/common/utils.py +6 -2
- ads/config.py +1 -0
- ads/data_labeling/metadata.py +2 -2
- ads/dataset/dataset.py +3 -5
- ads/dataset/factory.py +2 -3
- ads/dataset/label_encoder.py +1 -1
- ads/dataset/sampled_dataset.py +3 -5
- ads/jobs/ads_job.py +26 -2
- ads/jobs/builders/infrastructure/dsc_job.py +20 -7
- ads/llm/serializers/runnable_parallel.py +7 -1
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +1 -1
- ads/opctl/operator/lowcode/anomaly/README.md +1 -1
- ads/opctl/operator/lowcode/anomaly/environment.yaml +1 -1
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +8 -15
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +16 -10
- ads/opctl/operator/lowcode/anomaly/model/autots.py +9 -10
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +36 -39
- ads/opctl/operator/lowcode/anomaly/model/tods.py +4 -4
- ads/opctl/operator/lowcode/anomaly/operator_config.py +18 -1
- ads/opctl/operator/lowcode/anomaly/schema.yaml +16 -4
- ads/opctl/operator/lowcode/common/data.py +16 -2
- ads/opctl/operator/lowcode/common/transformations.py +48 -14
- ads/opctl/operator/lowcode/forecast/README.md +1 -1
- ads/opctl/operator/lowcode/forecast/environment.yaml +5 -4
- ads/opctl/operator/lowcode/forecast/model/arima.py +36 -29
- ads/opctl/operator/lowcode/forecast/model/automlx.py +91 -90
- ads/opctl/operator/lowcode/forecast/model/autots.py +200 -166
- ads/opctl/operator/lowcode/forecast/model/base_model.py +144 -140
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +86 -80
- ads/opctl/operator/lowcode/forecast/model/prophet.py +68 -63
- ads/opctl/operator/lowcode/forecast/operator_config.py +18 -2
- ads/opctl/operator/lowcode/forecast/schema.yaml +20 -4
- ads/opctl/operator/lowcode/forecast/utils.py +8 -4
- ads/opctl/operator/lowcode/pii/README.md +1 -1
- ads/opctl/operator/lowcode/pii/environment.yaml +1 -1
- ads/opctl/operator/lowcode/pii/model/report.py +71 -70
- ads/pipeline/ads_pipeline_step.py +11 -12
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/METADATA +8 -7
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/RECORD +64 -64
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/WHEEL +0 -0
- {oracle_ads-2.11.6.dist-info → oracle_ads-2.11.8.dist-info}/entry_points.txt +0 -0
@@ -82,7 +82,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
82
82
|
|
83
83
|
data = self.preprocess(df, series_id)
|
84
84
|
data_i = self.drop_horizon(data)
|
85
|
-
if self.loaded_models is not None:
|
85
|
+
if self.loaded_models is not None and series_id in self.loaded_models:
|
86
86
|
model = self.loaded_models[series_id]
|
87
87
|
else:
|
88
88
|
if self.perform_tuning:
|
@@ -133,9 +133,6 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
133
133
|
}
|
134
134
|
|
135
135
|
def _build_model(self) -> pd.DataFrame:
|
136
|
-
from prophet import Prophet
|
137
|
-
from prophet.diagnostics import cross_validation, performance_metrics
|
138
|
-
|
139
136
|
full_data_dict = self.datasets.get_data_by_series()
|
140
137
|
self.models = dict()
|
141
138
|
self.outputs = dict()
|
@@ -160,6 +157,9 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
160
157
|
return self.forecast_output.get_forecast_long()
|
161
158
|
|
162
159
|
def run_tuning(self, data_i, model_kwargs_i):
|
160
|
+
from prophet import Prophet
|
161
|
+
from prophet.diagnostics import cross_validation, performance_metrics
|
162
|
+
|
163
163
|
def objective(trial):
|
164
164
|
params = {
|
165
165
|
"seasonality_mode": trial.suggest_categorical(
|
@@ -243,68 +243,70 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
243
243
|
return model_kwargs_i
|
244
244
|
|
245
245
|
def _generate_report(self):
|
246
|
-
import
|
246
|
+
import report_creator as rc
|
247
247
|
from prophet.plot import add_changepoints_to_plot
|
248
248
|
|
249
|
-
series_ids = self.
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
series_ids=series_ids,
|
266
|
-
)
|
249
|
+
series_ids = self.models.keys()
|
250
|
+
all_sections = []
|
251
|
+
if len(series_ids) > 0:
|
252
|
+
sec1 = _select_plot_list(
|
253
|
+
lambda s_id: self.models[s_id].plot(
|
254
|
+
self.outputs[s_id], include_legend=True
|
255
|
+
),
|
256
|
+
series_ids=series_ids,
|
257
|
+
)
|
258
|
+
section_1 = rc.Block(
|
259
|
+
rc.Heading("Forecast Overview", level=2),
|
260
|
+
rc.Text(
|
261
|
+
"These plots show your forecast in the context of historical data."
|
262
|
+
),
|
263
|
+
sec1,
|
264
|
+
)
|
267
265
|
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
sec3_figs[s_id].gca(), self.models[s_id], self.outputs[s_id]
|
266
|
+
sec2 = _select_plot_list(
|
267
|
+
lambda s_id: self.models[s_id].plot_components(self.outputs[s_id]),
|
268
|
+
series_ids=series_ids,
|
269
|
+
)
|
270
|
+
section_2 = rc.Block(
|
271
|
+
rc.Heading("Forecast Broken Down by Trend Component", level=2), sec2
|
275
272
|
)
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
m = self.models[s_id]
|
284
|
-
model_states.append(
|
285
|
-
pd.Series(
|
286
|
-
m.seasonalities,
|
287
|
-
index=pd.Index(m.seasonalities.keys(), dtype="object"),
|
288
|
-
name=s_id,
|
289
|
-
dtype="object",
|
273
|
+
|
274
|
+
sec3_figs = {
|
275
|
+
s_id: self.models[s_id].plot(self.outputs[s_id]) for s_id in series_ids
|
276
|
+
}
|
277
|
+
for s_id in series_ids:
|
278
|
+
add_changepoints_to_plot(
|
279
|
+
sec3_figs[s_id].gca(), self.models[s_id], self.outputs[s_id]
|
290
280
|
)
|
281
|
+
sec3 = _select_plot_list(
|
282
|
+
lambda s_id: sec3_figs[s_id], series_ids=series_ids
|
291
283
|
)
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
284
|
+
section_3 = rc.Block(rc.Heading("Forecast Changepoints", level=2), sec3)
|
285
|
+
|
286
|
+
all_sections = [section_1, section_2, section_3]
|
287
|
+
|
288
|
+
sec5_text = rc.Heading("Prophet Model Seasonality Components", level=2)
|
289
|
+
model_states = []
|
290
|
+
for s_id in series_ids:
|
291
|
+
m = self.models[s_id]
|
292
|
+
model_states.append(
|
293
|
+
pd.Series(
|
294
|
+
m.seasonalities,
|
295
|
+
index=pd.Index(m.seasonalities.keys(), dtype="object"),
|
296
|
+
name=s_id,
|
297
|
+
dtype="object",
|
298
|
+
)
|
299
|
+
)
|
300
|
+
all_model_states = pd.concat(model_states, axis=1)
|
301
|
+
if not all_model_states.empty:
|
302
|
+
sec5 = rc.DataTable(all_model_states, index=True)
|
303
|
+
all_sections = all_sections + [sec5_text, sec5]
|
296
304
|
|
297
305
|
if self.spec.generate_explanations:
|
298
306
|
try:
|
299
307
|
# If the key is present, call the "explain_model" method
|
300
308
|
self.explain_model()
|
301
309
|
|
302
|
-
# Create a markdown text block for the global explanation section
|
303
|
-
global_explanation_text = dp.Text(
|
304
|
-
f"## Global Explanation of Models \n "
|
305
|
-
"The following tables provide the feature attribution for the global explainability."
|
306
|
-
)
|
307
|
-
|
308
310
|
# Convert the global explanation data to a DataFrame
|
309
311
|
global_explanation_df = pd.DataFrame(self.global_explanation)
|
310
312
|
|
@@ -313,9 +315,12 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
313
315
|
)
|
314
316
|
|
315
317
|
# Create a markdown section for the global explainability
|
316
|
-
global_explanation_section =
|
317
|
-
"
|
318
|
-
|
318
|
+
global_explanation_section = rc.Block(
|
319
|
+
rc.Heading("Global Explanation of Models", level=2),
|
320
|
+
rc.Text(
|
321
|
+
"The following tables provide the feature attribution for the global explainability."
|
322
|
+
),
|
323
|
+
rc.DataTable(self.formatted_global_explanation, index=True),
|
319
324
|
)
|
320
325
|
|
321
326
|
aggregate_local_explanations = pd.DataFrame()
|
@@ -327,21 +332,21 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
327
332
|
)
|
328
333
|
self.formatted_local_explanation = aggregate_local_explanations
|
329
334
|
|
330
|
-
local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
|
331
335
|
blocks = [
|
332
|
-
|
336
|
+
rc.DataTable(
|
333
337
|
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
|
334
338
|
label=s_id,
|
339
|
+
index=True,
|
335
340
|
)
|
336
341
|
for s_id, local_ex_df in self.local_explanation.items()
|
337
342
|
]
|
338
|
-
local_explanation_section = (
|
339
|
-
|
343
|
+
local_explanation_section = rc.Block(
|
344
|
+
rc.Heading("Local Explanation of Models", level=2),
|
345
|
+
rc.Select(blocks=blocks),
|
340
346
|
)
|
341
347
|
|
342
348
|
# Append the global explanation text and section to the "all_sections" list
|
343
349
|
all_sections = all_sections + [
|
344
|
-
global_explanation_text,
|
345
350
|
global_explanation_section,
|
346
351
|
local_explanation_text,
|
347
352
|
local_explanation_section,
|
@@ -351,7 +356,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
351
356
|
logger.warn(f"Failed to generate Explanations with error: {e}.")
|
352
357
|
logger.debug(f"Full Traceback: {traceback.format_exc()}")
|
353
358
|
|
354
|
-
model_description =
|
359
|
+
model_description = (
|
355
360
|
"Prophet is a procedure for forecasting time series data based on an additive "
|
356
361
|
"model where non-linear trends are fit with yearly, weekly, and daily seasonality, "
|
357
362
|
"plus holiday effects. It works best with time series that have strong seasonal "
|
@@ -29,6 +29,22 @@ class DateTimeColumn(DataClassSerializable):
|
|
29
29
|
format: str = None
|
30
30
|
|
31
31
|
|
32
|
+
@dataclass(repr=True)
|
33
|
+
class PreprocessingSteps(DataClassSerializable):
|
34
|
+
"""Class representing preprocessing steps for operator."""
|
35
|
+
|
36
|
+
missing_value_imputation: bool = True
|
37
|
+
outlier_treatment: bool = True
|
38
|
+
|
39
|
+
|
40
|
+
@dataclass(repr=True)
|
41
|
+
class DataPreprocessor(DataClassSerializable):
|
42
|
+
"""Class representing operator specification preprocessing details."""
|
43
|
+
|
44
|
+
enabled: bool = True
|
45
|
+
steps: PreprocessingSteps = field(default_factory=PreprocessingSteps)
|
46
|
+
|
47
|
+
|
32
48
|
@dataclass(repr=True)
|
33
49
|
class Tuning(DataClassSerializable):
|
34
50
|
"""Class representing operator specification tuning details."""
|
@@ -54,7 +70,7 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
54
70
|
global_explanation_filename: str = None
|
55
71
|
local_explanation_filename: str = None
|
56
72
|
target_column: str = None
|
57
|
-
preprocessing:
|
73
|
+
preprocessing: DataPreprocessor = field(default_factory=DataPreprocessor)
|
58
74
|
datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn)
|
59
75
|
target_category_columns: List[str] = field(default_factory=list)
|
60
76
|
generate_report: bool = None
|
@@ -79,7 +95,7 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
79
95
|
self.confidence_interval_width = self.confidence_interval_width or 0.80
|
80
96
|
self.report_filename = self.report_filename or "report.html"
|
81
97
|
self.preprocessing = (
|
82
|
-
self.preprocessing if self.preprocessing is not None else True
|
98
|
+
self.preprocessing if self.preprocessing is not None else DataPreprocessor(enabled=True)
|
83
99
|
)
|
84
100
|
# For Report Generation. When user doesn't specify defaults to True
|
85
101
|
self.generate_report = (
|
@@ -286,11 +286,27 @@ spec:
|
|
286
286
|
default: target
|
287
287
|
|
288
288
|
preprocessing:
|
289
|
-
type:
|
289
|
+
type: dict
|
290
290
|
required: false
|
291
|
-
|
292
|
-
|
293
|
-
|
291
|
+
schema:
|
292
|
+
enabled:
|
293
|
+
type: boolean
|
294
|
+
required: false
|
295
|
+
default: true
|
296
|
+
meta:
|
297
|
+
description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
|
298
|
+
steps:
|
299
|
+
type: dict
|
300
|
+
required: false
|
301
|
+
schema:
|
302
|
+
missing_value_imputation:
|
303
|
+
type: boolean
|
304
|
+
required: false
|
305
|
+
default: false
|
306
|
+
outlier_treatment:
|
307
|
+
type: boolean
|
308
|
+
required: false
|
309
|
+
default: false
|
294
310
|
|
295
311
|
generate_explanations:
|
296
312
|
type: boolean
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8 -*--
|
3
3
|
|
4
|
-
# Copyright (c) 2023 Oracle and/or its affiliates.
|
4
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
import os
|
@@ -51,7 +51,11 @@ def _inverse_transform_dataframe(le, df):
|
|
51
51
|
def smape(actual, predicted) -> float:
|
52
52
|
if not all([isinstance(actual, np.ndarray), isinstance(predicted, np.ndarray)]):
|
53
53
|
actual, predicted = (np.array(actual), np.array(predicted))
|
54
|
+
zero_mask = np.logical_and(actual == 0, predicted == 0)
|
55
|
+
|
54
56
|
denominator = np.abs(actual) + np.abs(predicted)
|
57
|
+
denominator[zero_mask] = 1
|
58
|
+
|
55
59
|
numerator = np.abs(actual - predicted)
|
56
60
|
default_output = np.ones_like(numerator) * np.inf
|
57
61
|
|
@@ -254,10 +258,10 @@ def evaluate_train_metrics(output, metrics_col_name=None):
|
|
254
258
|
|
255
259
|
|
256
260
|
def _select_plot_list(fn, series_ids):
|
257
|
-
import
|
261
|
+
import report_creator as rc
|
258
262
|
|
259
|
-
blocks = [
|
260
|
-
return
|
263
|
+
blocks = [rc.Widget(fn(s_id=s_id), label=s_id) for s_id in series_ids]
|
264
|
+
return rc.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
|
261
265
|
|
262
266
|
|
263
267
|
def _add_unit(num, unit):
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8 -*--
|
3
3
|
|
4
|
-
# Copyright (c) 2023 Oracle and/or its affiliates.
|
4
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
|
7
7
|
|
@@ -39,10 +39,10 @@ from ads.opctl.operator.lowcode.pii.operator_config import PiiOperatorConfig
|
|
39
39
|
from ads.opctl.operator.lowcode.pii.utils import compute_rate
|
40
40
|
|
41
41
|
try:
|
42
|
-
import
|
42
|
+
import report_creator as rc
|
43
43
|
except ImportError:
|
44
44
|
raise ModuleNotFoundError(
|
45
|
-
f"`
|
45
|
+
f"`report-creator` module was not found. Please run "
|
46
46
|
f"`pip install {OptionalDependency.PII}`."
|
47
47
|
)
|
48
48
|
|
@@ -119,8 +119,8 @@ def make_model_card(model_name="", readme_path=""):
|
|
119
119
|
logger.warning(
|
120
120
|
"You don't have internet connection. Therefore, we are not able to generate model card."
|
121
121
|
)
|
122
|
-
return
|
123
|
-
|
122
|
+
return rc.Group(
|
123
|
+
rc.Text("-"),
|
124
124
|
columns=1,
|
125
125
|
)
|
126
126
|
|
@@ -144,15 +144,15 @@ def make_model_card(model_name="", readme_path=""):
|
|
144
144
|
)
|
145
145
|
]
|
146
146
|
)
|
147
|
-
eval_res_tb =
|
147
|
+
eval_res_tb = rc.Widget(data=fig, caption="Evaluation Results")
|
148
148
|
except:
|
149
|
-
eval_res_tb =
|
149
|
+
eval_res_tb = rc.Text("-")
|
150
150
|
logger.warning(
|
151
151
|
"The given readme.md doesn't have correct template for Evaluation Results."
|
152
152
|
)
|
153
153
|
|
154
|
-
return
|
155
|
-
|
154
|
+
return rc.Group(
|
155
|
+
rc.Text(text),
|
156
156
|
eval_res_tb,
|
157
157
|
columns=2,
|
158
158
|
)
|
@@ -172,7 +172,7 @@ def map_label_to_color(labels):
|
|
172
172
|
|
173
173
|
|
174
174
|
@runtime_dependency(module="plotly", install_from=OptionalDependency.PII)
|
175
|
-
def plot_pie(count_map) ->
|
175
|
+
def plot_pie(count_map) -> rc.Widget:
|
176
176
|
import plotly.express as px
|
177
177
|
|
178
178
|
cols = count_map.keys()
|
@@ -190,7 +190,7 @@ def plot_pie(count_map) -> dp.Plot:
|
|
190
190
|
color_discrete_map=map_label_to_color(cols),
|
191
191
|
)
|
192
192
|
fig.update_traces(textposition="inside", textinfo="percent+label")
|
193
|
-
return
|
193
|
+
return rc.Widget(fig)
|
194
194
|
|
195
195
|
|
196
196
|
def build_entity_df(entites, id) -> pd.DataFrame:
|
@@ -223,37 +223,38 @@ class RowReportFields:
|
|
223
223
|
self.spec = row_spec
|
224
224
|
self.show_sensitive_info = show_sensitive_info
|
225
225
|
|
226
|
-
def build_report(self) ->
|
227
|
-
return
|
228
|
-
|
226
|
+
def build_report(self) -> rc.Group:
|
227
|
+
return rc.Group(
|
228
|
+
rc.Select(
|
229
229
|
blocks=[
|
230
230
|
self._make_stats_card(),
|
231
231
|
self._make_text_card(),
|
232
232
|
],
|
233
|
-
type=
|
233
|
+
type=rc.SelectType.TABS,
|
234
234
|
),
|
235
235
|
label="Row Id: " + str(self.spec.id),
|
236
236
|
)
|
237
237
|
|
238
238
|
def _make_stats_card(self):
|
239
239
|
stats = [
|
240
|
-
|
241
|
-
|
240
|
+
rc.Heading("Row Summary Statistics", level=2),
|
241
|
+
rc.Metric(
|
242
242
|
heading="Total No. Of Entites Proceed",
|
243
243
|
value=self.spec.total_tokens or 0,
|
244
244
|
),
|
245
|
-
|
245
|
+
rc.Heading("Entities Distribution", level=3),
|
246
246
|
plot_pie(self.spec.statics),
|
247
247
|
]
|
248
248
|
if self.show_sensitive_info:
|
249
|
-
stats.append(
|
249
|
+
stats.append(rc.Heading("Resolved Entities", level=3))
|
250
250
|
stats.append(
|
251
|
-
|
251
|
+
rc.DataTable(
|
252
252
|
build_entity_df(self.spec.entities, id=self.spec.id),
|
253
253
|
label="Resolved Entities",
|
254
|
+
index=True,
|
254
255
|
)
|
255
256
|
)
|
256
|
-
return
|
257
|
+
return rc.Group(stats, label="STATS")
|
257
258
|
|
258
259
|
def _make_text_card(self):
|
259
260
|
annotations = []
|
@@ -274,7 +275,7 @@ class RowReportFields:
|
|
274
275
|
},
|
275
276
|
return_html=True,
|
276
277
|
)
|
277
|
-
return
|
278
|
+
return rc.Group(rc.HTML(render_html), label="TEXT")
|
278
279
|
|
279
280
|
|
280
281
|
class PIIOperatorReport:
|
@@ -293,26 +294,28 @@ class PIIOperatorReport:
|
|
293
294
|
self.report_uri = report_uri
|
294
295
|
|
295
296
|
def make_view(self):
|
296
|
-
title_text =
|
297
|
-
|
297
|
+
title_text = rc.Heading(
|
298
|
+
"Personally Identifiable Information Operator Report", level=1
|
299
|
+
)
|
300
|
+
time_proceed = rc.Metric(
|
298
301
|
heading="Ran at",
|
299
302
|
value=self.report_spec.run_summary.timestamp or "today",
|
300
303
|
)
|
301
|
-
report_description =
|
304
|
+
report_description = rc.Text(PII_REPORT_DESCRIPTION)
|
302
305
|
|
303
|
-
structure =
|
304
|
-
|
306
|
+
structure = rc.Block(
|
307
|
+
rc.Select(
|
305
308
|
blocks=[
|
306
|
-
|
309
|
+
rc.Group(
|
307
310
|
self._build_summary_page(),
|
308
311
|
label="Summary",
|
309
312
|
),
|
310
|
-
|
313
|
+
rc.Group(
|
311
314
|
self._build_details_page(),
|
312
315
|
label="Details",
|
313
316
|
),
|
314
317
|
],
|
315
|
-
type=
|
318
|
+
type=rc.SelectType.TABS,
|
316
319
|
)
|
317
320
|
)
|
318
321
|
self.report_sections = [title_text, report_description, time_proceed, structure]
|
@@ -322,11 +325,10 @@ class PIIOperatorReport:
|
|
322
325
|
with tempfile.TemporaryDirectory() as temp_dir:
|
323
326
|
report_local_path = os.path.join(temp_dir, "___report.html")
|
324
327
|
disable_print()
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
)
|
328
|
+
with rc.ReportCreator("My Report") as report:
|
329
|
+
report.save(
|
330
|
+
rc.Block(report_sections or self.report_sections), report_local_path
|
331
|
+
)
|
330
332
|
enable_print()
|
331
333
|
|
332
334
|
report_uri = report_uri or self.report_uri
|
@@ -339,36 +341,36 @@ class PIIOperatorReport:
|
|
339
341
|
f2.write(f1.read())
|
340
342
|
|
341
343
|
def _build_summary_page(self):
|
342
|
-
summary =
|
343
|
-
|
344
|
-
|
345
|
-
|
344
|
+
summary = rc.Block(
|
345
|
+
rc.Heading("PII Summary", level=1),
|
346
|
+
rc.Text(self._get_summary_desc()),
|
347
|
+
rc.Select(
|
346
348
|
blocks=[
|
347
349
|
self._make_summary_stats_card(),
|
348
350
|
self._make_yaml_card(),
|
349
351
|
self._make_model_card(),
|
350
352
|
],
|
351
|
-
type=
|
353
|
+
type=rc.SelectType.TABS,
|
352
354
|
),
|
353
355
|
)
|
354
356
|
|
355
357
|
return summary
|
356
358
|
|
357
359
|
def _build_details_page(self):
|
358
|
-
details =
|
359
|
-
|
360
|
-
|
360
|
+
details = rc.Block(
|
361
|
+
rc.Text(DETAILS_REPORT_DESCRIPTION),
|
362
|
+
rc.Select(
|
361
363
|
blocks=[
|
362
364
|
row.build_report() for row in self.rows_details
|
363
365
|
], # RowReportFields
|
364
|
-
type=
|
366
|
+
type=rc.SelectType.DROPDOWN,
|
365
367
|
label="Details",
|
366
368
|
),
|
367
369
|
)
|
368
370
|
|
369
371
|
return details
|
370
372
|
|
371
|
-
def _make_summary_stats_card(self) ->
|
373
|
+
def _make_summary_stats_card(self) -> rc.Group:
|
372
374
|
"""
|
373
375
|
Shows summary statics
|
374
376
|
1. total rows
|
@@ -388,21 +390,21 @@ class PIIOperatorReport:
|
|
388
390
|
process_rate = "-"
|
389
391
|
|
390
392
|
summary_stats = [
|
391
|
-
|
392
|
-
|
393
|
-
|
393
|
+
rc.Heading("Summary Statistics", level=2),
|
394
|
+
rc.Group(
|
395
|
+
rc.Metric(
|
394
396
|
heading="Total No. Of Rows",
|
395
397
|
value=self.report_spec.run_summary.total_rows or "unknown",
|
396
398
|
),
|
397
|
-
|
399
|
+
rc.Metric(
|
398
400
|
heading="Total No. Of Entites Proceed",
|
399
401
|
value=self.report_spec.run_summary.total_tokens,
|
400
402
|
),
|
401
|
-
|
403
|
+
rc.Metric(
|
402
404
|
heading="Rows per second processed",
|
403
405
|
value=process_rate,
|
404
406
|
),
|
405
|
-
|
407
|
+
rc.Metric(
|
406
408
|
heading="Total Time Spent",
|
407
409
|
value=human_time_friendly(
|
408
410
|
self.report_spec.run_summary.elapsed_time
|
@@ -410,32 +412,31 @@ class PIIOperatorReport:
|
|
410
412
|
),
|
411
413
|
columns=2,
|
412
414
|
),
|
413
|
-
|
415
|
+
rc.Heading("Entities Distribution", level=3),
|
414
416
|
plot_pie(self.report_spec.run_summary.statics),
|
415
417
|
]
|
416
418
|
if self.report_spec.run_summary.show_sensitive_info:
|
417
419
|
entites_df = self._build_total_entity_df()
|
418
|
-
summary_stats.append(
|
419
|
-
summary_stats.append(
|
420
|
-
return
|
420
|
+
summary_stats.append(rc.Heading("Resolved Entities", level=3))
|
421
|
+
summary_stats.append(rc.DataTable(entites_df, index=True))
|
422
|
+
return rc.Group(summary_stats, label="STATS")
|
421
423
|
|
422
|
-
def _make_yaml_card(self) ->
|
424
|
+
def _make_yaml_card(self) -> rc.Group:
|
423
425
|
"""Shows the full pii config yaml."""
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
return dp.Group(blocks=[yaml_appendix_title, yaml_appendix], label="YAML")
|
426
|
+
yaml_appendix_title = rc.Heading("Reference: YAML File", level=2)
|
427
|
+
yaml_appendix = rc.Yaml(self.report_spec.run_summary.config.to_dict())
|
428
|
+
return rc.Group(yaml_appendix_title, yaml_appendix, label="YAML")
|
428
429
|
|
429
|
-
def _make_model_card(self) ->
|
430
|
+
def _make_model_card(self) -> rc.Group:
|
430
431
|
"""Generates model card."""
|
431
432
|
if len(self.report_spec.run_summary.selected_spacy_model) == 0:
|
432
|
-
return
|
433
|
-
|
433
|
+
return rc.Group(
|
434
|
+
rc.Text("No model used."),
|
434
435
|
label="MODEL CARD",
|
435
436
|
)
|
436
437
|
|
437
438
|
model_cards = [
|
438
|
-
|
439
|
+
rc.Group(
|
439
440
|
make_model_card(model_name=x.get("model")),
|
440
441
|
label=x.get("model"),
|
441
442
|
)
|
@@ -443,14 +444,14 @@ class PIIOperatorReport:
|
|
443
444
|
]
|
444
445
|
|
445
446
|
if len(model_cards) <= 1:
|
446
|
-
return
|
447
|
-
|
447
|
+
return rc.Group(
|
448
|
+
model_cards,
|
448
449
|
label="MODEL CARD",
|
449
450
|
)
|
450
|
-
return
|
451
|
-
|
452
|
-
|
453
|
-
type=
|
451
|
+
return rc.Group(
|
452
|
+
rc.Select(
|
453
|
+
model_cards,
|
454
|
+
type=rc.SelectType.TABS,
|
454
455
|
),
|
455
456
|
label="MODEL CARD",
|
456
457
|
)
|