acryl-datahub 0.15.0rc17__py3-none-any.whl → 0.15.0rc19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=36zFdNjWt7jwEClr7v19ajrbbZWK2fom9sGt-Llby2I,575
1
+ datahub/__init__.py,sha256=zTa1Zc6cS51RVM7kIIa6JgOSFayPVXd-AmsJeebmbNQ,575
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -346,7 +346,7 @@ datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz
346
346
  datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
347
347
  datahub/ingestion/source/looker/view_upstream.py,sha256=rAWKif3UngeRGS_CMQqjihKs3Lxiu6DecLAdtdxvqOw,26096
348
348
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
349
- datahub/ingestion/source/metadata/business_glossary.py,sha256=eRVRpQI0ZX5OofS1BUhNihFOfWih70TIAkJM7zaMH80,17577
349
+ datahub/ingestion/source/metadata/business_glossary.py,sha256=yySwJp2SCUQp8hRwN2lQuSqvOQowIhCKDKj9syhlTZA,18210
350
350
  datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwrvEhapVLdRlDxCuc,9507
351
351
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
352
352
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=L9WiZ5yZrIDMrgj3gYU9j6zz3TRMXYpcWxeTegD7sFg,12409
@@ -438,7 +438,7 @@ datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=ud3Ah4qHrmSfpD8Od-
438
438
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
439
439
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
440
440
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=PEmYNMXJRUvLQmVd8juVqjokfuSPuH9ppcM0ruXamxA,24807
441
- datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=443P7t839_iRymWMIg-dd7to21smsazS110UKEYbpEU,12588
441
+ datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=YczNEupY89jeegjR2_1pT4bPi9wQ69EIhGpzyCe9Jdg,12600
442
442
  datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=lo_3asTuIZbF-LuEUcYL-9NIZ720n7oB9mYA6WVTWA4,31960
443
443
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
444
444
  datahub/ingestion/source/sql/athena.py,sha256=G3cIY8H_76lIUAzQWW2kLnZOEsfbakmojxbiHb3dYZ8,24059
@@ -560,7 +560,7 @@ datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1
560
560
  datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
561
561
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
562
562
  datahub/metadata/_schema_classes.py,sha256=iPeBXGvbNEm0vw5pYwunnvx7bTtBdmIQVtzMOlS6bSI,955042
563
- datahub/metadata/schema.avsc,sha256=wMMSgx3OtzD1tNTC4dh_PFBZrnco21i2jO5J7oy2PgE,677545
563
+ datahub/metadata/schema.avsc,sha256=Xx93OdPzQfBb2CtntIYE-HAeKNg-JZcCtRU95v7ZZCs,677728
564
564
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
565
565
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
566
566
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -757,7 +757,7 @@ datahub/metadata/schemas/ExecutionRequestKey.avsc,sha256=SvjnlTAGYsSnvVE0rZ9-7UP
757
757
  datahub/metadata/schemas/ExecutionRequestResult.avsc,sha256=kg3xMNr9kYLPnFsV-iqcGm1sh1muQVGJvxUt15L1yKo,2333
758
758
  datahub/metadata/schemas/ExecutionRequestSignal.avsc,sha256=dsIUa6tfVSXqYOgh4cW6_Hzi8RjHuJJoO-mBAuZukpA,2515
759
759
  datahub/metadata/schemas/Filter.avsc,sha256=PU-aGkc2-sI3ZXY7ci-Y0A7zp1jux3VW_6c8MJRAokg,5933
760
- datahub/metadata/schemas/FormInfo.avsc,sha256=Gol1Qh6eB5lYvAOXBx5k2eTtyolGI8n0o_Cv_pxX40E,6095
760
+ datahub/metadata/schemas/FormInfo.avsc,sha256=FbN34htiCgm3LqKDL3sVsJhMUHIyc5jYpGJtYm7Ysd4,6270
761
761
  datahub/metadata/schemas/FormKey.avsc,sha256=1-wE28B8T3WJ3JtexreNtFvP3To3n7U-jvYudCuSM9o,437
762
762
  datahub/metadata/schemas/Forms.avsc,sha256=shmkhRoHN2gTaTsqGrGDRoNwe_z-nrFbbLjH9MtVDCs,10955
763
763
  datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=OVMM6FwhHhufHkezYcVePK0zI2llzFYLVFJhmAiHoiI,10102
@@ -869,11 +869,11 @@ datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn
869
869
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
870
870
  datahub/sql_parsing/schema_resolver.py,sha256=9INZWdxA2dMSLK6RXaVqjbjyLY_VKMhCkQv_Xd6Ln3I,10848
871
871
  datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
872
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=gLelf5l73EufB8qijb9ZDLANkt4o05schGg4DY-bOJs,69937
872
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=F-aj7yqOwbo7FpxduFO5a7cLWkojL_Npv3_dlfHPNGY,69877
873
873
  datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
874
874
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
875
875
  datahub/sql_parsing/sqlglot_lineage.py,sha256=CLDOc0HNqL_539eahOP3QOoldIYC6CF29id4Xe3TlEM,47018
876
- datahub/sql_parsing/sqlglot_utils.py,sha256=8MYzkyekhup3ihVStRPuwneWPNu17xhBg5SG8iVfFRY,14431
876
+ datahub/sql_parsing/sqlglot_utils.py,sha256=n6yufzEGwSlFeCSU540hEldIuab0q8KGqm9x0vSawkc,14699
877
877
  datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
878
878
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
879
879
  datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
974
974
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
975
975
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
976
976
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
977
- acryl_datahub-0.15.0rc17.dist-info/METADATA,sha256=5ZB602QpwWUttdsXUipTSML9XJPatWCW7XIkyHdVQWA,173559
978
- acryl_datahub-0.15.0rc17.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
- acryl_datahub-0.15.0rc17.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
- acryl_datahub-0.15.0rc17.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
- acryl_datahub-0.15.0rc17.dist-info/RECORD,,
977
+ acryl_datahub-0.15.0rc19.dist-info/METADATA,sha256=q_LaYt6m4WYgYyJo2ZA3Gj3a7kk5MKwiWvJheDfSPc8,173559
978
+ acryl_datahub-0.15.0rc19.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
+ acryl_datahub-0.15.0rc19.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
+ acryl_datahub-0.15.0rc19.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
+ acryl_datahub-0.15.0rc19.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc17"
6
+ __version__ = "0.15.0rc19"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -45,6 +45,9 @@ class Owners(ConfigModel):
45
45
  groups: Optional[List[str]] = None
46
46
 
47
47
 
48
+ OwnersMultipleTypes = Union[List[Owners], Owners]
49
+
50
+
48
51
  class KnowledgeCard(ConfigModel):
49
52
  url: Optional[str] = None
50
53
  label: Optional[str] = None
@@ -57,7 +60,7 @@ class GlossaryTermConfig(ConfigModel):
57
60
  term_source: Optional[str] = None
58
61
  source_ref: Optional[str] = None
59
62
  source_url: Optional[str] = None
60
- owners: Optional[Owners] = None
63
+ owners: Optional[OwnersMultipleTypes] = None
61
64
  inherits: Optional[List[str]] = None
62
65
  contains: Optional[List[str]] = None
63
66
  values: Optional[List[str]] = None
@@ -74,7 +77,7 @@ class GlossaryNodeConfig(ConfigModel):
74
77
  id: Optional[str] = None
75
78
  name: str
76
79
  description: str
77
- owners: Optional[Owners] = None
80
+ owners: Optional[OwnersMultipleTypes] = None
78
81
  terms: Optional[List["GlossaryTermConfig"]] = None
79
82
  nodes: Optional[List["GlossaryNodeConfig"]] = None
80
83
  knowledge_links: Optional[List[KnowledgeCard]] = None
@@ -88,7 +91,7 @@ class DefaultConfig(ConfigModel):
88
91
  """Holds defaults for populating fields in glossary terms"""
89
92
 
90
93
  source: Optional[str] = None
91
- owners: Owners
94
+ owners: OwnersMultipleTypes
92
95
  url: Optional[str] = None
93
96
  source_type: str = "INTERNAL"
94
97
 
@@ -153,30 +156,44 @@ def make_glossary_term_urn(
153
156
  return "urn:li:glossaryTerm:" + create_id(path, default_id, enable_auto_id)
154
157
 
155
158
 
156
- def get_owners(owners: Owners) -> models.OwnershipClass:
157
- ownership_type, ownership_type_urn = validate_ownership_type(owners.type)
159
+ def get_owners_multiple_types(owners: OwnersMultipleTypes) -> models.OwnershipClass:
160
+ """Allows owner types to be a list and maintains backward compatibility"""
161
+ if isinstance(owners, Owners):
162
+ return models.OwnershipClass(owners=list(get_owners(owners)))
163
+
164
+ owners_meta: List[models.OwnerClass] = []
165
+ for owner in owners:
166
+ owners_meta.extend(get_owners(owner))
167
+
168
+ return models.OwnershipClass(owners=owners_meta)
169
+
170
+
171
+ def get_owners(owners: Owners) -> Iterable[models.OwnerClass]:
172
+ actual_type = owners.type or models.OwnershipTypeClass.DEVELOPER
173
+
174
+ if actual_type.startswith("urn:li:ownershipType:"):
175
+ ownership_type: str = "CUSTOM"
176
+ ownership_type_urn: Optional[str] = actual_type
177
+ else:
178
+ ownership_type, ownership_type_urn = validate_ownership_type(actual_type)
179
+
158
180
  if owners.typeUrn is not None:
159
181
  ownership_type_urn = owners.typeUrn
160
- owners_meta: List[models.OwnerClass] = []
182
+
161
183
  if owners.users is not None:
162
- owners_meta = owners_meta + [
163
- models.OwnerClass(
184
+ for o in owners.users:
185
+ yield models.OwnerClass(
164
186
  owner=make_user_urn(o),
165
187
  type=ownership_type,
166
188
  typeUrn=ownership_type_urn,
167
189
  )
168
- for o in owners.users
169
- ]
170
190
  if owners.groups is not None:
171
- owners_meta = owners_meta + [
172
- models.OwnerClass(
191
+ for o in owners.groups:
192
+ yield models.OwnerClass(
173
193
  owner=make_group_urn(o),
174
194
  type=ownership_type,
175
195
  typeUrn=ownership_type_urn,
176
196
  )
177
- for o in owners.groups
178
- ]
179
- return models.OwnershipClass(owners=owners_meta)
180
197
 
181
198
 
182
199
  def get_mces(
@@ -185,7 +202,7 @@ def get_mces(
185
202
  ingestion_config: BusinessGlossarySourceConfig,
186
203
  ctx: PipelineContext,
187
204
  ) -> Iterable[Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass]]:
188
- root_owners = get_owners(glossary.owners)
205
+ root_owners = get_owners_multiple_types(glossary.owners)
189
206
 
190
207
  if glossary.nodes:
191
208
  for node in glossary.nodes:
@@ -270,7 +287,7 @@ def get_mces_from_node(
270
287
  node_owners = parentOwners
271
288
  if glossaryNode.owners is not None:
272
289
  assert glossaryNode.owners is not None
273
- node_owners = get_owners(glossaryNode.owners)
290
+ node_owners = get_owners_multiple_types(glossaryNode.owners)
274
291
 
275
292
  node_snapshot = models.GlossaryNodeSnapshotClass(
276
293
  urn=node_urn,
@@ -426,7 +443,7 @@ def get_mces_from_term(
426
443
  ownership: models.OwnershipClass = parentOwnership
427
444
  if glossaryTerm.owners is not None:
428
445
  assert glossaryTerm.owners is not None
429
- ownership = get_owners(glossaryTerm.owners)
446
+ ownership = get_owners_multiple_types(glossaryTerm.owners)
430
447
  aspects.append(ownership)
431
448
 
432
449
  if glossaryTerm.domain is not None:
@@ -119,7 +119,6 @@ class SnowflakeFilter:
119
119
  ) -> bool:
120
120
  if not dataset_type or not dataset_name:
121
121
  return True
122
- dataset_params = dataset_name.split(".")
123
122
  if dataset_type.lower() not in (
124
123
  SnowflakeObjectDomain.TABLE,
125
124
  SnowflakeObjectDomain.EXTERNAL_TABLE,
@@ -131,6 +130,7 @@ class SnowflakeFilter:
131
130
  if _is_sys_table(dataset_name):
132
131
  return False
133
132
 
133
+ dataset_params = _split_qualified_name(dataset_name)
134
134
  if len(dataset_params) != 3:
135
135
  self.structured_reporter.info(
136
136
  title="Unexpected dataset pattern",
@@ -18518,6 +18518,10 @@
18518
18518
  "namespace": "com.linkedin.pegasus2avro.form",
18519
18519
  "fields": [
18520
18520
  {
18521
+ "Searchable": {
18522
+ "fieldName": "structuredPropertyPromptUrns",
18523
+ "fieldType": "URN"
18524
+ },
18521
18525
  "java": {
18522
18526
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
18523
18527
  },
@@ -97,6 +97,10 @@
97
97
  "namespace": "com.linkedin.pegasus2avro.form",
98
98
  "fields": [
99
99
  {
100
+ "Searchable": {
101
+ "fieldName": "structuredPropertyPromptUrns",
102
+ "fieldType": "URN"
103
+ },
100
104
  "java": {
101
105
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
102
106
  },
@@ -1383,8 +1383,7 @@ class SqlParsingAggregator(Closeable):
1383
1383
  return QueryUrn(query_id).urn()
1384
1384
 
1385
1385
  @classmethod
1386
- def _composite_query_id(cls, composed_of_queries: Iterable[QueryId]) -> str:
1387
- composed_of_queries = list(composed_of_queries)
1386
+ def _composite_query_id(cls, composed_of_queries: List[QueryId]) -> str:
1388
1387
  combined = json.dumps(composed_of_queries)
1389
1388
  return f"composite_{generate_hash(combined)}"
1390
1389
 
@@ -121,7 +121,7 @@ _BASIC_NORMALIZATION_RULES = {
121
121
  # Remove /* */ comments.
122
122
  re.compile(r"/\*.*?\*/", re.DOTALL): "",
123
123
  # Remove -- comments.
124
- re.compile(r"--.*$"): "",
124
+ re.compile(r"--.*$", re.MULTILINE): "",
125
125
  # Replace all runs of whitespace with a single space.
126
126
  re.compile(r"\s+"): " ",
127
127
  # Remove leading and trailing whitespace and trailing semicolons.
@@ -131,10 +131,16 @@ _BASIC_NORMALIZATION_RULES = {
131
131
  # Replace anything that looks like a string with a placeholder.
132
132
  re.compile(r"'[^']*'"): "?",
133
133
  # Replace sequences of IN/VALUES with a single placeholder.
134
- re.compile(r"\b(IN|VALUES)\s*\(\?(?:, \?)*\)", re.IGNORECASE): r"\1 (?)",
134
+ # The r" ?" makes it more robust to uneven spacing.
135
+ re.compile(r"\b(IN|VALUES)\s*\( ?\?(?:, ?\?)* ?\)", re.IGNORECASE): r"\1 (?)",
135
136
  # Normalize parenthesis spacing.
136
137
  re.compile(r"\( "): "(",
137
138
  re.compile(r" \)"): ")",
139
+ # Fix up spaces before commas in column lists.
140
+ # e.g. "col1 , col2" -> "col1, col2"
141
+ # e.g. "col1,col2" -> "col1, col2"
142
+ re.compile(r"\b ,"): ",",
143
+ re.compile(r"\b,\b"): ", ",
138
144
  }
139
145
  _TABLE_NAME_NORMALIZATION_RULES = {
140
146
  # Replace UUID-like strings with a placeholder (both - and _ variants).