acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/METADATA +2511 -2484
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/RECORD +223 -189
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -1
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +239 -0
- datahub/api/entities/external/external_tag.py +145 -0
- datahub/api/entities/external/lake_formation_external_entites.py +161 -0
- datahub/api/entities/external/restricted_text.py +247 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +124 -27
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/pydantic_migration_helpers.py +7 -5
- datahub/emitter/rest_emitter.py +70 -12
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +48 -44
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +100 -15
- datahub/ingestion/graph/config.py +1 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +13 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +4 -0
- datahub/ingestion/source/aws/glue.py +489 -244
- datahub/ingestion/source/aws/tag_entities.py +292 -0
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
- datahub/ingestion/source/dbt/dbt_common.py +6 -2
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +94 -81
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +76 -28
- datahub/ingestion/source/ge_profiling_config.py +11 -0
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +507 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +21 -1
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +16 -2
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -11
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -11
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +239 -34
- datahub/ingestion/source/sql/mysql.py +0 -1
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +121 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +58 -3
- datahub/ingestion/source/tableau/tableau.py +58 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/config.py +5 -0
- datahub/ingestion/source/unity/proxy.py +118 -0
- datahub/ingestion/source/unity/source.py +195 -17
- datahub/ingestion/source/unity/tag_entities.py +295 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1522 -569
- datahub/metadata/_urns/urn_defs.py +1826 -1658
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +29 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +17758 -17097
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +237 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +1 -0
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +116 -0
- datahub/sdk/chart.py +315 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +432 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +8 -2
- datahub/sdk/entity_client.py +82 -2
- datahub/sdk/lineage_client.py +683 -82
- datahub/sdk/main_client.py +46 -16
- datahub/sdk/mlmodel.py +101 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +4 -3
- datahub/sdk/search_filters.py +95 -27
- datahub/specific/chart.py +1 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +62 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +56 -14
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1.dist-info}/top_level.txt +0 -0
datahub/ingestion/api/source.py
CHANGED
|
@@ -2,7 +2,6 @@ import contextlib
|
|
|
2
2
|
import datetime
|
|
3
3
|
import logging
|
|
4
4
|
from abc import ABCMeta, abstractmethod
|
|
5
|
-
from collections import defaultdict
|
|
6
5
|
from dataclasses import dataclass, field
|
|
7
6
|
from enum import Enum
|
|
8
7
|
from functools import partial
|
|
@@ -15,7 +14,6 @@ from typing import (
|
|
|
15
14
|
List,
|
|
16
15
|
Optional,
|
|
17
16
|
Sequence,
|
|
18
|
-
Set,
|
|
19
17
|
Type,
|
|
20
18
|
TypeVar,
|
|
21
19
|
Union,
|
|
@@ -28,7 +26,6 @@ from typing_extensions import LiteralString, Self
|
|
|
28
26
|
from datahub.configuration.common import ConfigModel
|
|
29
27
|
from datahub.configuration.source_common import PlatformInstanceConfigMixin
|
|
30
28
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
31
|
-
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
32
29
|
from datahub.ingestion.api.auto_work_units.auto_dataset_properties_aspect import (
|
|
33
30
|
auto_patch_last_modified,
|
|
34
31
|
)
|
|
@@ -37,7 +34,7 @@ from datahub.ingestion.api.auto_work_units.auto_ensure_aspect_size import (
|
|
|
37
34
|
)
|
|
38
35
|
from datahub.ingestion.api.closeable import Closeable
|
|
39
36
|
from datahub.ingestion.api.common import PipelineContext, RecordEnvelope, WorkUnit
|
|
40
|
-
from datahub.ingestion.api.report import Report
|
|
37
|
+
from datahub.ingestion.api.report import ExamplesReport, Report
|
|
41
38
|
from datahub.ingestion.api.source_helpers import (
|
|
42
39
|
AutoSystemMetadata,
|
|
43
40
|
auto_browse_path_v2,
|
|
@@ -50,9 +47,8 @@ from datahub.ingestion.api.source_helpers import (
|
|
|
50
47
|
auto_workunit_reporter,
|
|
51
48
|
)
|
|
52
49
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
53
|
-
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
54
|
-
from datahub.metadata.schema_classes import UpstreamLineageClass
|
|
55
50
|
from datahub.sdk.entity import Entity
|
|
51
|
+
from datahub.telemetry import stats
|
|
56
52
|
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
|
57
53
|
from datahub.utilities.type_annotations import get_class_from_annotation
|
|
58
54
|
|
|
@@ -76,6 +72,7 @@ class SourceCapability(Enum):
|
|
|
76
72
|
SCHEMA_METADATA = "Schema Metadata"
|
|
77
73
|
CONTAINERS = "Asset Containers"
|
|
78
74
|
CLASSIFICATION = "Classification"
|
|
75
|
+
TEST_CONNECTION = "Test Connection"
|
|
79
76
|
|
|
80
77
|
|
|
81
78
|
class StructuredLogLevel(Enum):
|
|
@@ -190,20 +187,11 @@ class StructuredLogs(Report):
|
|
|
190
187
|
|
|
191
188
|
|
|
192
189
|
@dataclass
|
|
193
|
-
class SourceReport(
|
|
190
|
+
class SourceReport(ExamplesReport):
|
|
194
191
|
event_not_produced_warn: bool = True
|
|
195
192
|
events_produced: int = 0
|
|
196
193
|
events_produced_per_sec: int = 0
|
|
197
194
|
|
|
198
|
-
_urns_seen: Set[str] = field(default_factory=set)
|
|
199
|
-
entities: Dict[str, list] = field(default_factory=lambda: defaultdict(LossyList))
|
|
200
|
-
aspects: Dict[str, Dict[str, int]] = field(
|
|
201
|
-
default_factory=lambda: defaultdict(lambda: defaultdict(int))
|
|
202
|
-
)
|
|
203
|
-
aspect_urn_samples: Dict[str, Dict[str, LossyList[str]]] = field(
|
|
204
|
-
default_factory=lambda: defaultdict(lambda: defaultdict(LossyList))
|
|
205
|
-
)
|
|
206
|
-
|
|
207
195
|
_structured_logs: StructuredLogs = field(default_factory=StructuredLogs)
|
|
208
196
|
|
|
209
197
|
@property
|
|
@@ -220,33 +208,10 @@ class SourceReport(Report):
|
|
|
220
208
|
|
|
221
209
|
def report_workunit(self, wu: WorkUnit) -> None:
|
|
222
210
|
self.events_produced += 1
|
|
211
|
+
if not isinstance(wu, MetadataWorkUnit):
|
|
212
|
+
return
|
|
223
213
|
|
|
224
|
-
|
|
225
|
-
urn = wu.get_urn()
|
|
226
|
-
|
|
227
|
-
# Specialized entity reporting.
|
|
228
|
-
if not isinstance(wu.metadata, MetadataChangeEvent):
|
|
229
|
-
mcps = [wu.metadata]
|
|
230
|
-
else:
|
|
231
|
-
mcps = list(mcps_from_mce(wu.metadata))
|
|
232
|
-
|
|
233
|
-
for mcp in mcps:
|
|
234
|
-
entityType = mcp.entityType
|
|
235
|
-
aspectName = mcp.aspectName
|
|
236
|
-
|
|
237
|
-
if urn not in self._urns_seen:
|
|
238
|
-
self._urns_seen.add(urn)
|
|
239
|
-
self.entities[entityType].append(urn)
|
|
240
|
-
|
|
241
|
-
if aspectName is not None: # usually true
|
|
242
|
-
self.aspects[entityType][aspectName] += 1
|
|
243
|
-
self.aspect_urn_samples[entityType][aspectName].append(urn)
|
|
244
|
-
if isinstance(mcp.aspect, UpstreamLineageClass):
|
|
245
|
-
upstream_lineage = cast(UpstreamLineageClass, mcp.aspect)
|
|
246
|
-
if upstream_lineage.fineGrainedLineages:
|
|
247
|
-
self.aspect_urn_samples[entityType][
|
|
248
|
-
"fineGrainedLineages"
|
|
249
|
-
].append(urn)
|
|
214
|
+
super()._store_workunit_data(wu)
|
|
250
215
|
|
|
251
216
|
def report_warning(
|
|
252
217
|
self,
|
|
@@ -265,9 +230,10 @@ class SourceReport(Report):
|
|
|
265
230
|
context: Optional[str] = None,
|
|
266
231
|
title: Optional[LiteralString] = None,
|
|
267
232
|
exc: Optional[BaseException] = None,
|
|
233
|
+
log: bool = True,
|
|
268
234
|
) -> None:
|
|
269
235
|
self._structured_logs.report_log(
|
|
270
|
-
StructuredLogLevel.WARN, message, title, context, exc, log=
|
|
236
|
+
StructuredLogLevel.WARN, message, title, context, exc, log=log
|
|
271
237
|
)
|
|
272
238
|
|
|
273
239
|
def report_failure(
|
|
@@ -325,6 +291,7 @@ class SourceReport(Report):
|
|
|
325
291
|
)
|
|
326
292
|
|
|
327
293
|
def __post_init__(self) -> None:
|
|
294
|
+
super().__post_init__()
|
|
328
295
|
self.start_time = datetime.datetime.now()
|
|
329
296
|
self.running_time: datetime.timedelta = datetime.timedelta(seconds=0)
|
|
330
297
|
|
|
@@ -337,6 +304,43 @@ class SourceReport(Report):
|
|
|
337
304
|
"infos": Report.to_pure_python_obj(self.infos),
|
|
338
305
|
}
|
|
339
306
|
|
|
307
|
+
@staticmethod
|
|
308
|
+
def _discretize_dict_values(
|
|
309
|
+
nested_dict: Dict[str, Dict[str, int]],
|
|
310
|
+
) -> Dict[str, Dict[str, int]]:
|
|
311
|
+
"""Helper method to discretize values in a nested dictionary structure."""
|
|
312
|
+
result = {}
|
|
313
|
+
for outer_key, inner_dict in nested_dict.items():
|
|
314
|
+
discretized_dict: Dict[str, int] = {}
|
|
315
|
+
for inner_key, count in inner_dict.items():
|
|
316
|
+
discretized_dict[inner_key] = stats.discretize(count)
|
|
317
|
+
result[outer_key] = discretized_dict
|
|
318
|
+
return result
|
|
319
|
+
|
|
320
|
+
def get_aspects_dict(self) -> Dict[str, Dict[str, int]]:
|
|
321
|
+
"""Convert the nested defaultdict aspects to a regular dict for serialization."""
|
|
322
|
+
return self._discretize_dict_values(self.aspects)
|
|
323
|
+
|
|
324
|
+
def get_aspects_by_subtypes_dict(self) -> Dict[str, Dict[str, Dict[str, int]]]:
|
|
325
|
+
"""Get aspect counts grouped by entity type and subtype."""
|
|
326
|
+
return self._discretize_dict_values_nested(self.aspects_by_subtypes)
|
|
327
|
+
|
|
328
|
+
@staticmethod
|
|
329
|
+
def _discretize_dict_values_nested(
|
|
330
|
+
nested_dict: Dict[str, Dict[str, Dict[str, int]]],
|
|
331
|
+
) -> Dict[str, Dict[str, Dict[str, int]]]:
|
|
332
|
+
"""Helper method to discretize values in a nested dictionary structure with three levels."""
|
|
333
|
+
result = {}
|
|
334
|
+
for outer_key, middle_dict in nested_dict.items():
|
|
335
|
+
discretized_middle_dict: Dict[str, Dict[str, int]] = {}
|
|
336
|
+
for middle_key, inner_dict in middle_dict.items():
|
|
337
|
+
discretized_inner_dict: Dict[str, int] = {}
|
|
338
|
+
for inner_key, count in inner_dict.items():
|
|
339
|
+
discretized_inner_dict[inner_key] = stats.discretize(count)
|
|
340
|
+
discretized_middle_dict[middle_key] = discretized_inner_dict
|
|
341
|
+
result[outer_key] = discretized_middle_dict
|
|
342
|
+
return result
|
|
343
|
+
|
|
340
344
|
def compute_stats(self) -> None:
|
|
341
345
|
super().compute_stats()
|
|
342
346
|
|
|
@@ -503,7 +507,7 @@ class Source(Closeable, metaclass=ABCMeta):
|
|
|
503
507
|
pass
|
|
504
508
|
|
|
505
509
|
def close(self) -> None:
|
|
506
|
-
|
|
510
|
+
self.get_report().close()
|
|
507
511
|
|
|
508
512
|
def _infer_platform(self) -> Optional[str]:
|
|
509
513
|
config = self.get_config()
|
|
File without changes
|