dcs-sdk 1.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. data_diff/__init__.py +221 -0
  2. data_diff/__main__.py +517 -0
  3. data_diff/abcs/__init__.py +13 -0
  4. data_diff/abcs/compiler.py +27 -0
  5. data_diff/abcs/database_types.py +402 -0
  6. data_diff/config.py +141 -0
  7. data_diff/databases/__init__.py +38 -0
  8. data_diff/databases/_connect.py +323 -0
  9. data_diff/databases/base.py +1417 -0
  10. data_diff/databases/bigquery.py +376 -0
  11. data_diff/databases/clickhouse.py +217 -0
  12. data_diff/databases/databricks.py +262 -0
  13. data_diff/databases/duckdb.py +207 -0
  14. data_diff/databases/mssql.py +343 -0
  15. data_diff/databases/mysql.py +189 -0
  16. data_diff/databases/oracle.py +238 -0
  17. data_diff/databases/postgresql.py +293 -0
  18. data_diff/databases/presto.py +222 -0
  19. data_diff/databases/redis.py +93 -0
  20. data_diff/databases/redshift.py +233 -0
  21. data_diff/databases/snowflake.py +222 -0
  22. data_diff/databases/sybase.py +720 -0
  23. data_diff/databases/trino.py +73 -0
  24. data_diff/databases/vertica.py +174 -0
  25. data_diff/diff_tables.py +489 -0
  26. data_diff/errors.py +17 -0
  27. data_diff/format.py +369 -0
  28. data_diff/hashdiff_tables.py +1026 -0
  29. data_diff/info_tree.py +76 -0
  30. data_diff/joindiff_tables.py +434 -0
  31. data_diff/lexicographic_space.py +253 -0
  32. data_diff/parse_time.py +88 -0
  33. data_diff/py.typed +0 -0
  34. data_diff/queries/__init__.py +13 -0
  35. data_diff/queries/api.py +213 -0
  36. data_diff/queries/ast_classes.py +811 -0
  37. data_diff/queries/base.py +38 -0
  38. data_diff/queries/extras.py +43 -0
  39. data_diff/query_utils.py +70 -0
  40. data_diff/schema.py +67 -0
  41. data_diff/table_segment.py +583 -0
  42. data_diff/thread_utils.py +112 -0
  43. data_diff/utils.py +1022 -0
  44. data_diff/version.py +15 -0
  45. dcs_core/__init__.py +13 -0
  46. dcs_core/__main__.py +17 -0
  47. dcs_core/__version__.py +15 -0
  48. dcs_core/cli/__init__.py +13 -0
  49. dcs_core/cli/cli.py +165 -0
  50. dcs_core/core/__init__.py +19 -0
  51. dcs_core/core/common/__init__.py +13 -0
  52. dcs_core/core/common/errors.py +50 -0
  53. dcs_core/core/common/models/__init__.py +13 -0
  54. dcs_core/core/common/models/configuration.py +284 -0
  55. dcs_core/core/common/models/dashboard.py +24 -0
  56. dcs_core/core/common/models/data_source_resource.py +75 -0
  57. dcs_core/core/common/models/metric.py +160 -0
  58. dcs_core/core/common/models/profile.py +75 -0
  59. dcs_core/core/common/models/validation.py +216 -0
  60. dcs_core/core/common/models/widget.py +44 -0
  61. dcs_core/core/configuration/__init__.py +13 -0
  62. dcs_core/core/configuration/config_loader.py +139 -0
  63. dcs_core/core/configuration/configuration_parser.py +262 -0
  64. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  65. dcs_core/core/datasource/__init__.py +13 -0
  66. dcs_core/core/datasource/base.py +62 -0
  67. dcs_core/core/datasource/manager.py +112 -0
  68. dcs_core/core/datasource/search_datasource.py +421 -0
  69. dcs_core/core/datasource/sql_datasource.py +1094 -0
  70. dcs_core/core/inspect.py +163 -0
  71. dcs_core/core/logger/__init__.py +13 -0
  72. dcs_core/core/logger/base.py +32 -0
  73. dcs_core/core/logger/default_logger.py +94 -0
  74. dcs_core/core/metric/__init__.py +13 -0
  75. dcs_core/core/metric/base.py +220 -0
  76. dcs_core/core/metric/combined_metric.py +98 -0
  77. dcs_core/core/metric/custom_metric.py +34 -0
  78. dcs_core/core/metric/manager.py +137 -0
  79. dcs_core/core/metric/numeric_metric.py +403 -0
  80. dcs_core/core/metric/reliability_metric.py +90 -0
  81. dcs_core/core/profiling/__init__.py +13 -0
  82. dcs_core/core/profiling/datasource_profiling.py +136 -0
  83. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  84. dcs_core/core/profiling/text_field_profiling.py +67 -0
  85. dcs_core/core/repository/__init__.py +13 -0
  86. dcs_core/core/repository/metric_repository.py +77 -0
  87. dcs_core/core/utils/__init__.py +13 -0
  88. dcs_core/core/utils/log.py +29 -0
  89. dcs_core/core/utils/tracking.py +105 -0
  90. dcs_core/core/utils/utils.py +44 -0
  91. dcs_core/core/validation/__init__.py +13 -0
  92. dcs_core/core/validation/base.py +230 -0
  93. dcs_core/core/validation/completeness_validation.py +153 -0
  94. dcs_core/core/validation/custom_query_validation.py +24 -0
  95. dcs_core/core/validation/manager.py +282 -0
  96. dcs_core/core/validation/numeric_validation.py +276 -0
  97. dcs_core/core/validation/reliability_validation.py +91 -0
  98. dcs_core/core/validation/uniqueness_validation.py +61 -0
  99. dcs_core/core/validation/validity_validation.py +738 -0
  100. dcs_core/integrations/__init__.py +13 -0
  101. dcs_core/integrations/databases/__init__.py +13 -0
  102. dcs_core/integrations/databases/bigquery.py +187 -0
  103. dcs_core/integrations/databases/databricks.py +51 -0
  104. dcs_core/integrations/databases/db2.py +652 -0
  105. dcs_core/integrations/databases/elasticsearch.py +61 -0
  106. dcs_core/integrations/databases/mssql.py +829 -0
  107. dcs_core/integrations/databases/mysql.py +409 -0
  108. dcs_core/integrations/databases/opensearch.py +64 -0
  109. dcs_core/integrations/databases/oracle.py +719 -0
  110. dcs_core/integrations/databases/postgres.py +482 -0
  111. dcs_core/integrations/databases/redshift.py +53 -0
  112. dcs_core/integrations/databases/snowflake.py +48 -0
  113. dcs_core/integrations/databases/spark_df.py +111 -0
  114. dcs_core/integrations/databases/sybase.py +1069 -0
  115. dcs_core/integrations/storage/__init__.py +13 -0
  116. dcs_core/integrations/storage/local_file.py +149 -0
  117. dcs_core/integrations/utils/__init__.py +13 -0
  118. dcs_core/integrations/utils/utils.py +36 -0
  119. dcs_core/report/__init__.py +13 -0
  120. dcs_core/report/dashboard.py +211 -0
  121. dcs_core/report/models.py +88 -0
  122. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  123. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  124. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  125. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  126. dcs_core/report/static/assets/images/docs.svg +6 -0
  127. dcs_core/report/static/assets/images/github.svg +4 -0
  128. dcs_core/report/static/assets/images/logo.svg +7 -0
  129. dcs_core/report/static/assets/images/slack.svg +13 -0
  130. dcs_core/report/static/index.js +2 -0
  131. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  132. dcs_sdk/__init__.py +13 -0
  133. dcs_sdk/__main__.py +18 -0
  134. dcs_sdk/__version__.py +15 -0
  135. dcs_sdk/cli/__init__.py +13 -0
  136. dcs_sdk/cli/cli.py +163 -0
  137. dcs_sdk/sdk/__init__.py +58 -0
  138. dcs_sdk/sdk/config/__init__.py +13 -0
  139. dcs_sdk/sdk/config/config_loader.py +491 -0
  140. dcs_sdk/sdk/data_diff/__init__.py +13 -0
  141. dcs_sdk/sdk/data_diff/data_differ.py +821 -0
  142. dcs_sdk/sdk/rules/__init__.py +15 -0
  143. dcs_sdk/sdk/rules/rules_mappping.py +31 -0
  144. dcs_sdk/sdk/rules/rules_repository.py +214 -0
  145. dcs_sdk/sdk/rules/schema_rules.py +65 -0
  146. dcs_sdk/sdk/utils/__init__.py +13 -0
  147. dcs_sdk/sdk/utils/serializer.py +25 -0
  148. dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
  149. dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
  150. dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
  151. dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
  152. dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
  153. dcs_sdk/sdk/utils/table.py +475 -0
  154. dcs_sdk/sdk/utils/themes.py +40 -0
  155. dcs_sdk/sdk/utils/utils.py +349 -0
  156. dcs_sdk-1.6.5.dist-info/METADATA +150 -0
  157. dcs_sdk-1.6.5.dist-info/RECORD +159 -0
  158. dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
  159. dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,421 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from datetime import datetime, timezone
16
+ from typing import Dict, List
17
+
18
+ from dateutil import parser
19
+
20
+ from dcs_core.core.datasource.base import DataSource
21
+
22
+
23
+ class SearchIndexDataSource(DataSource):
24
+ """
25
+ Abstract class for search index data sources
26
+ """
27
+
28
+ FIELD_TYPE_MAPPING = {
29
+ "text": str,
30
+ "keyword": str,
31
+ "date": datetime,
32
+ "long": int,
33
+ "integer": int,
34
+ "short": int,
35
+ "byte": int,
36
+ "double": float,
37
+ "float": float,
38
+ "half_float": float,
39
+ "boolean": bool,
40
+ "binary": str,
41
+ "nested": dict,
42
+ }
43
+
44
+ def __init__(self, data_source_name: str, data_connection: Dict):
45
+ super().__init__(data_source_name, data_connection)
46
+
47
+ self.client = None
48
+
49
+ def query_get_index_metadata(self) -> List[str]:
50
+ """
51
+ Get the index metadata
52
+ :return: query for index metadata
53
+ """
54
+ return [index for index in self.client.indices.get("*")]
55
+
56
+ def query_get_field_metadata(self, index_name: str) -> Dict[str, str]:
57
+ """
58
+ Get the field metadata
59
+ :param index_name: name of the index
60
+ :return: query for field metadata
61
+ """
62
+ results_: Dict[str, str] = {}
63
+ mappings = self.client.indices.get_mapping(index=index_name)
64
+ properties = mappings[index_name]["mappings"]["properties"]
65
+
66
+ for field, value in properties.items():
67
+ if "type" in value:
68
+ results_[field] = self.FIELD_TYPE_MAPPING[value["type"]]
69
+ elif "properties" in value:
70
+ results_[field] = self.FIELD_TYPE_MAPPING["nested"]
71
+
72
+ return results_
73
+
74
+ def query_get_field_type(self, index_name: str, field: str) -> str:
75
+ """
76
+ Get the field type
77
+ :param index_name: name of the index
78
+ :param field: field name
79
+ :return: field type
80
+ """
81
+ types = self.query_get_field_metadata(index_name=index_name)
82
+ return types[field]
83
+
84
+ def query_get_document_count(self, index_name: str, filters: Dict = None) -> int:
85
+ """
86
+ Get the document count
87
+ :param index_name: name of the index
88
+ :param filters: optional filter
89
+ :return: count of documents
90
+ """
91
+ body = {"query": filters} if filters else {}
92
+ response = self.client.count(index=index_name, body=body)
93
+ return response["count"]
94
+
95
+ def query_get_max(self, index_name: str, field: str, filters: Dict = None) -> int:
96
+ """
97
+ Get the max value
98
+ :param index_name: name of the index
99
+ :param field: field name
100
+ :param filters: optional filter
101
+ :return: max value
102
+ """
103
+ query = {"aggs": {"max_value": {"max": {"field": field}}}}
104
+ if filters:
105
+ query["query"] = filters
106
+
107
+ response = self.client.search(index=index_name, body=query)
108
+ return response["aggregations"]["max_value"]["value"]
109
+
110
+ def query_get_min(self, index_name: str, field: str, filters: Dict = None) -> int:
111
+ """
112
+ Get the min value of a field
113
+ :param index_name:
114
+ :param field:
115
+ :param filters:
116
+ :return:
117
+ """
118
+ query = {"aggs": {"min_value": {"min": {"field": field}}}}
119
+ if filters:
120
+ query["query"] = filters
121
+
122
+ response = self.client.search(index=index_name, body=query)
123
+ return response["aggregations"]["min_value"]["value"]
124
+
125
+ def query_get_avg(self, index_name: str, field: str, filters: Dict = None) -> int:
126
+ """
127
+ Get the average value of a field
128
+ :param index_name:
129
+ :param field:
130
+ :param filters:
131
+ :return:
132
+ """
133
+ query = {"aggs": {"avg_value": {"avg": {"field": field}}}}
134
+ if filters:
135
+ query["query"] = filters
136
+
137
+ response = self.client.search(index=index_name, body=query)
138
+ return round(response["aggregations"]["avg_value"]["value"], 2)
139
+
140
+ def query_get_sum(self, index_name: str, field: str, filters: Dict = None) -> int:
141
+ """
142
+ Get the sum value of a field
143
+ :param index_name:
144
+ :param field:
145
+ :param filters:
146
+ :return:
147
+ """
148
+ query = {"aggs": {"sum_value": {"sum": {"field": field}}}}
149
+ if filters:
150
+ query["query"] = filters
151
+
152
+ response = self.client.search(index=index_name, body=query)
153
+ return round(response["aggregations"]["sum_value"]["value"], 2)
154
+
155
+ def query_get_variance(self, index_name: str, field: str, filters: Dict = None) -> int:
156
+ """
157
+ Get the variance value of a field
158
+ :param index_name:
159
+ :param field:
160
+ :param filters:
161
+ :return:
162
+ """
163
+ query = {"aggs": {"stats": {"extended_stats": {"field": field}}}}
164
+ if filters:
165
+ query["query"] = filters
166
+
167
+ response = self.client.search(index=index_name, body=query)["aggregations"]
168
+ return round(response["stats"]["variance_sampling"], 2)
169
+
170
+ def query_get_stddev(self, index_name: str, field: str, filters: Dict = None) -> float:
171
+ """
172
+ Get the standard deviation value of a field
173
+ :param index_name:
174
+ :param field:
175
+ :param filters:
176
+ :return:
177
+ """
178
+ query = {"aggs": {"stats": {"extended_stats": {"field": field}}}}
179
+ if filters:
180
+ query["query"] = filters
181
+
182
+ response = self.client.search(index=index_name, body=query)["aggregations"]
183
+ return round(response["stats"]["std_deviation_sampling"], 2)
184
+
185
+ def query_get_distinct_count(self, index_name: str, field: str, filters: Dict = None) -> int:
186
+ """
187
+ Get the distinct count value of a field
188
+ :param index_name:
189
+ :param field:
190
+ :param filters:
191
+ :return:
192
+ """
193
+ query = {"aggs": {"distinct_count": {"cardinality": {"field": field}}}}
194
+ if filters:
195
+ query["query"] = filters
196
+
197
+ response = self.client.search(index=index_name, body=query)["aggregations"]
198
+ return response["distinct_count"]["value"]
199
+
200
+ def query_get_time_diff(self, index_name: str, field: str) -> int:
201
+ """
202
+ Get the time difference
203
+ :param index_name: name of the index
204
+ :param field: field name
205
+ :param filters: optional filter
206
+ :return: time difference in milliseconds
207
+ """
208
+ query = {"query": {"match_all": {}}, "sort": [{f"{field}": {"order": "desc"}}]}
209
+
210
+ response = self.client.search(index=index_name, body=query)
211
+
212
+ if response["hits"]["hits"]:
213
+ last_updated = response["hits"]["hits"][0]["_source"][field]
214
+
215
+ last_updated = parser.parse(timestr=last_updated).timestamp()
216
+ now = datetime.now(timezone.utc).timestamp()
217
+ return int(now - last_updated)
218
+
219
+ return 0
220
+
221
+ def query_get_null_count(self, index_name: str, field: str, filters: Dict = None) -> int:
222
+ """
223
+ Get the null count
224
+ :param index_name: name of the index
225
+ :param field: field name
226
+ :param filters: optional filter
227
+ :return: null count
228
+ """
229
+ query = {"query": {"bool": {"must_not": {"exists": {"field": field}}}}}
230
+ if filters:
231
+ query["query"]["bool"]["filter"] = filters
232
+ response = self.client.search(index=index_name, body=query)
233
+ return response["hits"]["total"]["value"]
234
+
235
+ def query_get_null_percentage(self, index_name: str, field: str, filters: Dict = None) -> float:
236
+ """
237
+ Get the null percentage
238
+ :param index_name: name of the index
239
+ :param field: field name
240
+ :param filters: optional filter
241
+ :return: null percentage
242
+ """
243
+ query = {
244
+ "size": 0,
245
+ "aggs": {
246
+ "null_count": {"missing": {"field": field}},
247
+ "total_count": {"value_count": {"field": field}},
248
+ },
249
+ }
250
+ if filters:
251
+ query["query"] = filters
252
+
253
+ response = self.client.search(index=index_name, body=query)["aggregations"]
254
+ return round(
255
+ (response["null_count"]["doc_count"] / response["total_count"]["value"]) * 100,
256
+ 2,
257
+ )
258
+
259
+ def query_get_empty_string_count(self, index_name: str, field: str, filters: Dict = None) -> int:
260
+ """
261
+ Get the count of empty strings
262
+ :param index_name: name of the index
263
+ :param field: field name
264
+ :param filters: optional filter
265
+ :return: count of empty strings
266
+ """
267
+
268
+ query = {"query": {"bool": {"must": {"match": {f"{field}.keyword": ""}}}}}
269
+ if filters:
270
+ query["query"]["bool"]["filter"] = filters
271
+ response = self.client.search(index=index_name, body=query)
272
+ return response["hits"]["total"]["value"]
273
+
274
+ def query_get_empty_string_percentage(self, index_name: str, field: str, filters: Dict = None) -> float:
275
+ """
276
+ Get the empty string percentage
277
+ :param index_name: name of the index
278
+ :param field: field name
279
+ :param filters: optional filter
280
+ :return: empty string percentage
281
+ """
282
+ query = {
283
+ "size": 0,
284
+ "aggs": {
285
+ "empty_string_count": {
286
+ "filter": {"match": {f"{field}.keyword": ""}},
287
+ },
288
+ "total_count": {"value_count": {"field": f"{field}.keyword"}},
289
+ },
290
+ }
291
+ if filters:
292
+ query["query"] = filters
293
+
294
+ response = self.client.search(index=index_name, body=query)["aggregations"]
295
+ total_count = response["total_count"]["value"]
296
+ empty_string_count = response["empty_string_count"]["doc_count"]
297
+
298
+ if total_count == 0:
299
+ return 0.0
300
+
301
+ return round((empty_string_count / total_count) * 100, 2)
302
+
303
+ def profiling_search_aggregates_numeric(self, index_name: str, field: str) -> Dict:
304
+ """
305
+ Get the aggregates for a numeric field
306
+ :param index_name: name of the index
307
+ :param field: field name
308
+ :return: aggregates
309
+ """
310
+
311
+ query = {
312
+ "aggs": {
313
+ "stats": {"extended_stats": {"field": field}},
314
+ "distinct_count": {"cardinality": {"field": field}},
315
+ "missing_count": {"missing": {"field": field}},
316
+ }
317
+ }
318
+ response = self.client.search(index=index_name, body=query)["aggregations"]
319
+
320
+ return {
321
+ "avg": response["stats"]["avg"],
322
+ "min": response["stats"]["min"],
323
+ "max": response["stats"]["max"],
324
+ "sum": response["stats"]["sum"],
325
+ "stddev": response["stats"]["std_deviation"],
326
+ "variance": response["stats"]["variance_sampling"],
327
+ "distinct_count": response["distinct_count"]["value"],
328
+ "missing_count": response["missing_count"]["doc_count"],
329
+ }
330
+
331
+ def profiling_search_aggregates_string(self, index_name: str, field: str) -> Dict:
332
+ """
333
+ Get the aggregates for a text field
334
+ :param index_name: name of the index
335
+ :param field: field name
336
+ :return: aggregates
337
+ """
338
+ script = {"script": {"source": f"params._source.containsKey('{field}')? params._source.{field}.length(): 0"}}
339
+ query = {
340
+ "aggs": {
341
+ "max_length": {"max": script},
342
+ "min_length": {"min": script},
343
+ "avg_length": {"avg": script},
344
+ "distinct_count": {"cardinality": {"field": f"{field}.keyword"}},
345
+ "missing_count": {"missing": {"field": f"{field}.keyword"}},
346
+ }
347
+ }
348
+
349
+ response = self.client.search(index=index_name, body=query)["aggregations"]
350
+
351
+ return {
352
+ "distinct_count": response["distinct_count"]["value"],
353
+ "missing_count": response["missing_count"]["doc_count"],
354
+ "max_length": response["max_length"]["value"],
355
+ "min_length": response["min_length"]["value"],
356
+ "avg_length": response["avg_length"]["value"],
357
+ }
358
+
359
+ def query_get_duplicate_count(self, index_name: str, field: str, filters: Dict = None) -> int:
360
+ """
361
+ Get the duplicate count
362
+ :param index_name: name of the index
363
+ :param field: field name
364
+ :return: duplicate count
365
+ """
366
+ field_type = self.query_get_field_type(index_name=index_name, field=field)
367
+ query = {
368
+ "aggs": {
369
+ "duplicate_count": {
370
+ "terms": {
371
+ "field": field if field_type != "str" else f"{field}.keyword",
372
+ "size": 10000,
373
+ "min_doc_count": 2,
374
+ },
375
+ }
376
+ }
377
+ }
378
+ if filters:
379
+ query["query"] = filters
380
+ response = self.client.search(index=index_name, body=query)["aggregations"]
381
+
382
+ return len(response["duplicate_count"]["buckets"])
383
+
384
+ def query_string_pattern_validity(
385
+ self,
386
+ index_name: str,
387
+ field: str,
388
+ regex_pattern: str = None,
389
+ predefined_regex_pattern: str = None,
390
+ filters: Dict = None,
391
+ ) -> int:
392
+ """
393
+ Get the count of string pattern validity
394
+ :param index_name: name of the index
395
+ :param field: field name
396
+ :param regex_pattern: regex pattern
397
+ :param predefined_regex_pattern: predefined regex pattern
398
+ :param filters: filter condition
399
+ :return: count of valid values, count of total row count
400
+ """
401
+ regex_patterns = {"usa_phone": "\\+?1?[-.\\s]?\\(?[0-9]{3}\\)?[-.\\s]?[0-9]{3}[-.\\s]?[0-9]{4}"}
402
+
403
+ if not regex_pattern and not predefined_regex_pattern:
404
+ raise ValueError("Either regex_pattern or predefined_regex_pattern should be provided")
405
+
406
+ if predefined_regex_pattern:
407
+ regex_string = regex_patterns[predefined_regex_pattern]
408
+ else:
409
+ regex_string = regex_pattern
410
+
411
+ query = {
412
+ "track_total_hits": True,
413
+ "query": {"regexp": {f"{field}.keyword": regex_string}},
414
+ }
415
+
416
+ if filters:
417
+ query["query"]["bool"]["filter"] = filters
418
+
419
+ response = self.client.search(index=index_name, body=query)
420
+ total_count = self.client.count(index=index_name, body={"query": {"match_all": {}}})
421
+ return response["hits"]["total"]["value"], total_count["count"]