awx-zipline-ai 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. __init__.py +0 -0
  2. agent/__init__.py +1 -0
  3. agent/constants.py +15 -0
  4. agent/ttypes.py +1684 -0
  5. ai/__init__.py +0 -0
  6. ai/chronon/__init__.py +0 -0
  7. ai/chronon/airflow_helpers.py +248 -0
  8. ai/chronon/cli/__init__.py +0 -0
  9. ai/chronon/cli/compile/__init__.py +0 -0
  10. ai/chronon/cli/compile/column_hashing.py +336 -0
  11. ai/chronon/cli/compile/compile_context.py +173 -0
  12. ai/chronon/cli/compile/compiler.py +183 -0
  13. ai/chronon/cli/compile/conf_validator.py +742 -0
  14. ai/chronon/cli/compile/display/__init__.py +0 -0
  15. ai/chronon/cli/compile/display/class_tracker.py +102 -0
  16. ai/chronon/cli/compile/display/compile_status.py +95 -0
  17. ai/chronon/cli/compile/display/compiled_obj.py +12 -0
  18. ai/chronon/cli/compile/display/console.py +3 -0
  19. ai/chronon/cli/compile/display/diff_result.py +111 -0
  20. ai/chronon/cli/compile/fill_templates.py +35 -0
  21. ai/chronon/cli/compile/parse_configs.py +134 -0
  22. ai/chronon/cli/compile/parse_teams.py +242 -0
  23. ai/chronon/cli/compile/serializer.py +109 -0
  24. ai/chronon/cli/compile/version_utils.py +42 -0
  25. ai/chronon/cli/git_utils.py +145 -0
  26. ai/chronon/cli/logger.py +59 -0
  27. ai/chronon/constants.py +3 -0
  28. ai/chronon/group_by.py +692 -0
  29. ai/chronon/join.py +580 -0
  30. ai/chronon/logger.py +23 -0
  31. ai/chronon/model.py +40 -0
  32. ai/chronon/query.py +126 -0
  33. ai/chronon/repo/__init__.py +39 -0
  34. ai/chronon/repo/aws.py +284 -0
  35. ai/chronon/repo/cluster.py +136 -0
  36. ai/chronon/repo/compile.py +62 -0
  37. ai/chronon/repo/constants.py +164 -0
  38. ai/chronon/repo/default_runner.py +269 -0
  39. ai/chronon/repo/explore.py +418 -0
  40. ai/chronon/repo/extract_objects.py +134 -0
  41. ai/chronon/repo/gcp.py +586 -0
  42. ai/chronon/repo/gitpython_utils.py +15 -0
  43. ai/chronon/repo/hub_runner.py +261 -0
  44. ai/chronon/repo/hub_uploader.py +109 -0
  45. ai/chronon/repo/init.py +60 -0
  46. ai/chronon/repo/join_backfill.py +119 -0
  47. ai/chronon/repo/run.py +296 -0
  48. ai/chronon/repo/serializer.py +133 -0
  49. ai/chronon/repo/team_json_utils.py +46 -0
  50. ai/chronon/repo/utils.py +481 -0
  51. ai/chronon/repo/zipline.py +35 -0
  52. ai/chronon/repo/zipline_hub.py +277 -0
  53. ai/chronon/resources/__init__.py +0 -0
  54. ai/chronon/resources/gcp/__init__.py +0 -0
  55. ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
  56. ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
  57. ai/chronon/resources/gcp/group_bys/test/data.py +30 -0
  58. ai/chronon/resources/gcp/joins/__init__.py +0 -0
  59. ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
  60. ai/chronon/resources/gcp/joins/test/data.py +26 -0
  61. ai/chronon/resources/gcp/sources/__init__.py +0 -0
  62. ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
  63. ai/chronon/resources/gcp/sources/test/data.py +26 -0
  64. ai/chronon/resources/gcp/teams.py +58 -0
  65. ai/chronon/source.py +86 -0
  66. ai/chronon/staging_query.py +226 -0
  67. ai/chronon/types.py +58 -0
  68. ai/chronon/utils.py +510 -0
  69. ai/chronon/windows.py +48 -0
  70. awx_zipline_ai-0.0.32.dist-info/METADATA +197 -0
  71. awx_zipline_ai-0.0.32.dist-info/RECORD +96 -0
  72. awx_zipline_ai-0.0.32.dist-info/WHEEL +5 -0
  73. awx_zipline_ai-0.0.32.dist-info/entry_points.txt +2 -0
  74. awx_zipline_ai-0.0.32.dist-info/top_level.txt +4 -0
  75. gen_thrift/__init__.py +0 -0
  76. gen_thrift/api/__init__.py +1 -0
  77. gen_thrift/api/constants.py +15 -0
  78. gen_thrift/api/ttypes.py +3754 -0
  79. gen_thrift/common/__init__.py +1 -0
  80. gen_thrift/common/constants.py +15 -0
  81. gen_thrift/common/ttypes.py +1814 -0
  82. gen_thrift/eval/__init__.py +1 -0
  83. gen_thrift/eval/constants.py +15 -0
  84. gen_thrift/eval/ttypes.py +660 -0
  85. gen_thrift/fetcher/__init__.py +1 -0
  86. gen_thrift/fetcher/constants.py +15 -0
  87. gen_thrift/fetcher/ttypes.py +127 -0
  88. gen_thrift/hub/__init__.py +1 -0
  89. gen_thrift/hub/constants.py +15 -0
  90. gen_thrift/hub/ttypes.py +1109 -0
  91. gen_thrift/observability/__init__.py +1 -0
  92. gen_thrift/observability/constants.py +15 -0
  93. gen_thrift/observability/ttypes.py +2355 -0
  94. gen_thrift/planner/__init__.py +1 -0
  95. gen_thrift/planner/constants.py +15 -0
  96. gen_thrift/planner/ttypes.py +1967 -0
ai/chronon/utils.py ADDED
@@ -0,0 +1,510 @@
1
+ # Copyright (C) 2023 The Chronon Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import gc
16
+ import importlib
17
+ import json
18
+ import os
19
+ import re
20
+ import shutil
21
+ import subprocess
22
+ import tempfile
23
+ from collections.abc import Iterable
24
+ from typing import List, Optional, Union, cast
25
+
26
+ import gen_thrift.api.ttypes as api
27
+
28
+ import ai.chronon.repo.extract_objects as eo
29
+ from ai.chronon.repo import FOLDER_NAME_TO_CLASS
30
+
31
+ ChrononJobTypes = Union[api.GroupBy, api.Join, api.StagingQuery]
32
+
33
+ chronon_root_path = "" # passed from compile.py
34
+
35
+
36
+ def edit_distance(str1, str2):
37
+ m = len(str1) + 1
38
+ n = len(str2) + 1
39
+ dp = [[0 for _ in range(n)] for _ in range(m)]
40
+ for i in range(m):
41
+ for j in range(n):
42
+ if i == 0:
43
+ dp[i][j] = j
44
+ elif j == 0:
45
+ dp[i][j] = i
46
+ elif str1[i - 1] == str2[j - 1]:
47
+ dp[i][j] = dp[i - 1][j - 1]
48
+ else:
49
+ dp[i][j] = 1 + min(dp[i][j - 1], dp[i - 1][j], dp[i - 1][j - 1])
50
+ return dp[m - 1][n - 1]
51
+
52
+
53
+ class JsonDiffer:
54
+ def __init__(self):
55
+ self.temp_dir = tempfile.mkdtemp()
56
+ self.new_name = "new.json"
57
+ self.old_name = "old.json"
58
+
59
+ def diff(self, new_json_str: object, old_json_str: object, skipped_keys=None) -> str:
60
+ if skipped_keys is None:
61
+ skipped_keys = []
62
+ new_json = {k: v for k, v in json.loads(new_json_str).items() if k not in skipped_keys}
63
+ old_json = {k: v for k, v in json.loads(old_json_str).items() if k not in skipped_keys}
64
+
65
+ with (
66
+ open(os.path.join(self.temp_dir, self.old_name), mode="w") as old,
67
+ open(os.path.join(self.temp_dir, self.new_name), mode="w") as new,
68
+ ):
69
+ old.write(json.dumps(old_json, sort_keys=True, indent=2))
70
+ new.write(json.dumps(new_json, sort_keys=True, indent=2))
71
+ diff_str = subprocess.run(
72
+ ["diff", old.name, new.name], stdout=subprocess.PIPE
73
+ ).stdout.decode("utf-8")
74
+ return diff_str
75
+
76
+ def clean(self):
77
+ shutil.rmtree(self.temp_dir)
78
+
79
+
80
+ def check_contains_single(candidate, valid_items, type_name, name, print_function=repr):
81
+ name_suffix = f"for {name}" if name else ""
82
+ candidate_str = print_function(candidate)
83
+ if not valid_items:
84
+ assert f"{candidate_str}, is not a valid {type_name} because no {type_name}s are specified {name_suffix}"
85
+ elif candidate not in valid_items:
86
+ sorted_items = sorted(
87
+ map(print_function, valid_items),
88
+ key=lambda item: edit_distance(candidate_str, item),
89
+ )
90
+ printed_items = "\n ".join(sorted_items)
91
+ assert (
92
+ candidate in valid_items
93
+ ), f"""{candidate_str}, is not a valid {type_name} {name_suffix}
94
+ Please pick one from:
95
+ {printed_items}
96
+ """
97
+
98
+
99
+ def check_contains(candidates, *args):
100
+ if isinstance(candidates, Iterable) and not isinstance(candidates, str):
101
+ for candidate in candidates:
102
+ check_contains_single(candidate, *args)
103
+ else:
104
+ check_contains_single(candidates, *args)
105
+
106
+
107
+ def get_streaming_sources(group_by: api.GroupBy) -> List[api.Source]:
108
+ """Checks if the group by has a source with streaming enabled."""
109
+ return [source for source in group_by.sources if is_streaming(source)]
110
+
111
+
112
+ def is_streaming(source: api.Source) -> bool:
113
+ """Checks if the source has streaming enabled."""
114
+ return (source.entities and source.entities.mutationTopic is not None) or (
115
+ source.events and source.events.topic is not None
116
+ )
117
+
118
+
119
+ def _get_underlying_source(
120
+ source: api.Source,
121
+ ) -> Union[api.EventSource, api.EntitySource, api.JoinSource]:
122
+ if source.entities:
123
+ return source.entities
124
+ elif source.events:
125
+ return source.events
126
+ else:
127
+ return source.joinSource
128
+
129
+
130
+ def get_root_source(
131
+ source: api.Source,
132
+ ) -> Union[api.EventSource, api.EntitySource]:
133
+ if source.entities:
134
+ return source.entities
135
+ elif source.events:
136
+ return source.events
137
+ else:
138
+ return get_root_source(source.joinSource.join.left)
139
+
140
+
141
+ def get_query(source: api.Source) -> api.Query:
142
+ return _get_underlying_source(source).query
143
+
144
+
145
+ def get_table(source: api.Source) -> str:
146
+ if source.entities:
147
+ table = source.entities.snapshotTable
148
+ elif source.events:
149
+ table = source.events.table
150
+ else:
151
+ from ai.chronon.join import _get_output_table_name
152
+
153
+ table = _get_output_table_name(source.joinSource.join, True)
154
+ return table.split("/")[0]
155
+
156
+
157
+ def get_topic(source: api.Source) -> str:
158
+ return source.entities.mutationTopic if source.entities else source.events.topic
159
+
160
+
161
+ def get_columns(source: api.Source):
162
+ query = get_query(source)
163
+ assert query.selects is not None, "Please specify selects in your Source/Query"
164
+ columns = query.selects.keys()
165
+ return columns
166
+
167
+
168
+ def get_mod_name_from_gc(obj, mod_prefix):
169
+ """get an object's module information from garbage collector"""
170
+ mod_name = None
171
+ # get obj's module info from garbage collector
172
+ gc.collect()
173
+
174
+ referrers = gc.get_referrers(obj)
175
+
176
+ valid_referrers = [
177
+ ref for ref in referrers if (isinstance(ref, Iterable) and "__name__" in ref)
178
+ ]
179
+
180
+ if len(valid_referrers) == 1:
181
+ return valid_referrers[0]["__name__"]
182
+
183
+ for ref in valid_referrers:
184
+ if ref["__name__"].startswith(mod_prefix):
185
+ mod_name = ref["__name__"]
186
+ break
187
+
188
+ return mod_name
189
+
190
+
191
+ def get_mod_and_var_name_from_gc(obj, mod_prefix):
192
+ # Find the variable name within the module
193
+ mod_name = get_mod_name_from_gc(obj, mod_prefix)
194
+ """Get the variable name that points to the obj in the module"""
195
+ if not mod_name:
196
+ return None
197
+
198
+ module = importlib.import_module(mod_name)
199
+ for var_name, value in vars(module).items():
200
+ if value is obj:
201
+ return mod_name, var_name
202
+
203
+ return mod_name, None
204
+
205
+
206
+ def __set_name(obj, cls, mod_prefix):
207
+ module_qualifier = get_mod_name_from_gc(obj, mod_prefix)
208
+
209
+ module = importlib.import_module(module_qualifier)
210
+ eo.import_module_set_name(module, cls)
211
+
212
+
213
+ def sanitize(name):
214
+ """
215
+ From api.Extensions.scala
216
+ Option(name).map(_.replaceAll("[^a-zA-Z0-9_]", "_")).orNull
217
+ """
218
+ if name is not None:
219
+ return re.sub("[^a-zA-Z0-9_]", "_", name)
220
+ return None
221
+
222
+
223
+ def dict_to_bash_commands(d):
224
+ """
225
+ Convert a dict into a bash command substring
226
+ """
227
+ if not d:
228
+ return ""
229
+ bash_commands = []
230
+ for key, value in d.items():
231
+ cmd = f"--{key.replace('_', '-')}={value}" if value else f"--{key.replace('_', '-')}"
232
+ bash_commands.append(cmd)
233
+ return " ".join(bash_commands)
234
+
235
+
236
+ def dict_to_exports(d):
237
+ if not d:
238
+ return ""
239
+ exports = []
240
+ for key, value in d.items():
241
+ exports.append(f"export {key.upper()}={value}")
242
+ return " && ".join(exports)
243
+
244
+
245
+ def output_table_name(obj, full_name: bool):
246
+ table_name = sanitize(obj.metaData.name)
247
+ db = obj.metaData.outputNamespace
248
+ db = db or "{{ db }}"
249
+ if full_name:
250
+ return db + "." + table_name
251
+ else:
252
+ return table_name
253
+
254
+
255
+ def join_part_name(jp):
256
+ if jp.groupBy is None:
257
+ raise NotImplementedError("Join Part names for non group bys is not implemented.")
258
+ if not jp.groupBy.metaData.name and isinstance(jp.groupBy, api.GroupBy):
259
+ __set_name(jp.groupBy, api.GroupBy, "group_bys")
260
+ return "_".join(
261
+ [
262
+ component
263
+ for component in [jp.prefix, sanitize(jp.groupBy.metaData.name)]
264
+ if component is not None
265
+ ]
266
+ )
267
+
268
+
269
+ def join_part_output_table_name(join, jp, full_name: bool = False):
270
+ """
271
+ From api.Extensions.scala
272
+
273
+ Join Part output table name.
274
+ To be synced with Scala API.
275
+ def partOutputTable(jp: JoinPart): String = (Seq(join.metaData.outputTable) ++ Option(jp.prefix) :+
276
+ jp.groupBy.metaData.cleanName).mkString("_")
277
+ """
278
+ if not join.metaData.name and isinstance(join, api.Join):
279
+ __set_name(join, api.Join, "joins")
280
+ return "_".join(
281
+ [
282
+ component
283
+ for component in [
284
+ output_table_name(join, full_name),
285
+ join_part_name(jp),
286
+ ]
287
+ if component is not None
288
+ ]
289
+ )
290
+
291
+
292
+ def log_table_name(obj, full_name: bool = False):
293
+ return output_table_name(obj, full_name=full_name) + "_logged"
294
+
295
+
296
+ def get_team_conf_from_py(team, key):
297
+ team_module = importlib.import_module(f"teams.{team}")
298
+ return getattr(team_module, key)
299
+
300
+
301
+ def wait_for_simple_schema(table, lag, start, end):
302
+ if not table:
303
+ return None
304
+ table_tokens = table.split("/")
305
+ clean_name = table_tokens[0]
306
+ subpartition_spec = "/".join(table_tokens[1:]) if len(table_tokens) > 1 else ""
307
+ return {
308
+ "name": "wait_for_{}_ds{}".format(clean_name, "" if lag == 0 else f"_minus_{lag}"),
309
+ "spec": "{}/ds={}{}".format(
310
+ clean_name,
311
+ "{{ ds }}" if lag == 0 else "{{{{ macros.ds_add(ds, -{}) }}}}".format(lag),
312
+ "/{}".format(subpartition_spec) if subpartition_spec else "",
313
+ ),
314
+ "start": start,
315
+ "end": end,
316
+ }
317
+
318
+
319
+ def wait_for_name(dep):
320
+ replace_nonalphanumeric = re.sub("[^a-zA-Z0-9]", "_", dep)
321
+ name = f"wait_for_{replace_nonalphanumeric}"
322
+ return re.sub("_+", "_", name).rstrip("_")
323
+
324
+
325
+ def dedupe_in_order(seq):
326
+ seen = set()
327
+ seen_add = seen.add
328
+ return [x for x in seq if not (x in seen or seen_add(x))]
329
+
330
+
331
+ def has_topic(group_by: api.GroupBy) -> bool:
332
+ """Find if there's topic or mutationTopic for a source helps define streaming tasks"""
333
+ return any(
334
+ (source.entities and source.entities.mutationTopic)
335
+ or (source.events and source.events.topic)
336
+ for source in group_by.sources
337
+ )
338
+
339
+
340
+ def get_offline_schedule(conf: ChrononJobTypes) -> Optional[str]:
341
+ schedule_interval = conf.metaData.executionInfo.scheduleCron or "@daily"
342
+ if schedule_interval == "@never":
343
+ return None
344
+ return schedule_interval
345
+
346
+
347
+ def requires_log_flattening_task(conf: ChrononJobTypes) -> bool:
348
+ return (conf.metaData.samplePercent or 0) > 0
349
+
350
+
351
+ def get_applicable_modes(conf: ChrononJobTypes) -> List[str]:
352
+ """Based on a conf and mode determine if a conf should define a task."""
353
+ modes = [] # type: List[str]
354
+
355
+ if isinstance(conf, api.GroupBy):
356
+ group_by = cast(api.GroupBy, conf)
357
+ if group_by.backfillStartDate is not None:
358
+ modes.append("backfill")
359
+
360
+ online = group_by.metaData.online or False
361
+
362
+ if online:
363
+ modes.append("upload")
364
+
365
+ temporal_accuracy = group_by.accuracy or False
366
+ streaming = has_topic(group_by)
367
+ if temporal_accuracy or streaming:
368
+ modes.append("streaming")
369
+
370
+ elif isinstance(conf, api.Join):
371
+ join = cast(api.Join, conf)
372
+
373
+ if get_offline_schedule(conf) is not None:
374
+ modes.append("backfill")
375
+ modes.append("stats-summary")
376
+
377
+ if join.metaData.consistencyCheck is True:
378
+ modes.append("consistency-metrics-compute")
379
+
380
+ if requires_log_flattening_task(join):
381
+ modes.append("log-flattener")
382
+
383
+ if join.labelParts is not None:
384
+ modes.append("label-join")
385
+
386
+ elif isinstance(conf, api.StagingQuery):
387
+ modes.append("backfill")
388
+ else:
389
+ raise ValueError(f"Unsupported job type {type(conf).__name__}")
390
+
391
+ return modes
392
+
393
+
394
+ def get_related_table_names(conf: ChrononJobTypes) -> List[str]:
395
+ table_name = output_table_name(conf, full_name=True)
396
+
397
+ applicable_modes = set(get_applicable_modes(conf))
398
+ related_tables = [] # type: List[str]
399
+
400
+ if "upload" in applicable_modes:
401
+ related_tables.append(f"{table_name}_upload")
402
+ if "stats-summary" in applicable_modes:
403
+ related_tables.append(f"{table_name}_daily_stats")
404
+ if "label-join" in applicable_modes:
405
+ related_tables.append(f"{table_name}_labels")
406
+ related_tables.append(f"{table_name}_labeled")
407
+ related_tables.append(f"{table_name}_labeled_latest")
408
+ if "log-flattener" in applicable_modes:
409
+ related_tables.append(f"{table_name}_logged")
410
+ if "consistency-metrics-compute" in applicable_modes:
411
+ related_tables.append(f"{table_name}_consistency")
412
+
413
+ if isinstance(conf, api.Join) and conf.bootstrapParts:
414
+ related_tables.append(f"{table_name}_bootstrap")
415
+
416
+ return related_tables
417
+
418
+
419
+ class DotDict(dict):
420
+ def __getattr__(self, attr):
421
+ if attr in self:
422
+ value = self[attr]
423
+ return DotDict(value) if isinstance(value, dict) else value
424
+ return None
425
+
426
+
427
+ def convert_json_to_obj(d):
428
+ if isinstance(d, dict):
429
+ return DotDict({k: convert_json_to_obj(v) for k, v in d.items()})
430
+ elif isinstance(d, list):
431
+ return [convert_json_to_obj(item) for item in d]
432
+ else:
433
+ return d
434
+
435
+
436
+ def chronon_path(file_path: str) -> str:
437
+ conf_types = FOLDER_NAME_TO_CLASS.keys()
438
+ splits = file_path.split("/")
439
+ conf_occurences = [splits.index(typ) for typ in conf_types if typ in splits]
440
+ assert len(conf_occurences) > 0, (
441
+ f"Path: {file_path} doesn't contain folder with name among {conf_types}"
442
+ )
443
+
444
+ index = min([splits.index(typ) for typ in conf_types if typ in splits])
445
+ rel_path = "/".join(splits[index:])
446
+ return rel_path
447
+
448
+
449
+ def module_path(file_path: str) -> str:
450
+ adjusted_path = chronon_path(file_path)
451
+ assert adjusted_path.endswith(".py"), f"Path: {file_path} doesn't end with '.py'"
452
+ without_extension = adjusted_path[:-3]
453
+ mod_path = without_extension.replace("/", ".")
454
+ return mod_path
455
+
456
+
457
+ def compose(arg, *methods):
458
+ """
459
+ Allows composing deeply nested method calls - typically used in selects & derivations
460
+ The first arg is what is threaded into methods, methods can have more than one arg.
461
+
462
+ Example:
463
+
464
+ .. code-block:: python
465
+ compose(
466
+ "user_id_approx_distinct_count_by_query",
467
+ "map_entries",
468
+ "array_sort (x, y) -> IF(y.value > x.value, -1, IF(y.value < x.value, 1, 0))",
469
+ "transform entry -> entry.key"
470
+ )
471
+
472
+ would produce (without the new lines or indents):
473
+
474
+ .. code-block:: text
475
+
476
+ transform(
477
+ array_sort(
478
+ map_entries(
479
+ user_id_approx_distinct_count_by_query
480
+ ),
481
+ (x, y) -> IF(y.value > x.value, -1, IF(y.value < x.value, 1, 0))
482
+ ),
483
+ entry -> entry.key
484
+ )
485
+ """
486
+
487
+ indent = " " * (len(methods))
488
+
489
+ result = [indent + arg]
490
+
491
+ for method in methods:
492
+ method_parts = method.split(" ", 1)
493
+ method = method_parts[0]
494
+
495
+ if len(method_parts) > 1:
496
+ remaining_args = method_parts[1]
497
+ last = result.pop()
498
+ result = result + [last + ",", indent + remaining_args]
499
+
500
+ indent = indent[:-4]
501
+ result = [f"{indent}{method}("] + result + [f"{indent})"]
502
+
503
+ return "\n".join(result)
504
+
505
+
506
+ def clean_expression(expr):
507
+ """
508
+ Cleans up an expression by removing leading and trailing whitespace and newlines.
509
+ """
510
+ return re.sub(r"\s+", " ", expr).strip()
ai/chronon/windows.py ADDED
@@ -0,0 +1,48 @@
1
+ import gen_thrift.common.ttypes as common
2
+
3
+
4
+ def _days(length: int) -> common.Window:
5
+ return common.Window(length=length, timeUnit=common.TimeUnit.DAYS)
6
+
7
+
8
+ def _hours(length: int) -> common.Window:
9
+ return common.Window(length=length, timeUnit=common.TimeUnit.HOURS)
10
+
11
+
12
+ def _from_str(s: str) -> common.Window:
13
+ """
14
+ converts strings like "30d", "2h" etc into common.Window
15
+
16
+ Args:
17
+ s (str): Duration string in format "<number>(d|h)" where d=days, h=hours
18
+
19
+ Returns:
20
+ common.Window: Window object with specified duration
21
+
22
+ Raises:
23
+ ValueError: If string format is invalid
24
+ """
25
+
26
+ if not s or len(s) < 2:
27
+ raise ValueError(f"Invalid duration format: {s}")
28
+
29
+ # Get the numeric value and unit
30
+ value = s[:-1]
31
+ unit = s[-1].lower()
32
+
33
+ try:
34
+ length = int(value)
35
+ if length <= 0:
36
+ raise ValueError(f"Duration must be positive: {s}")
37
+
38
+ if unit == "d":
39
+ return _days(length)
40
+ elif unit == "h":
41
+ return _hours(length)
42
+ else:
43
+ raise ValueError(f"Invalid time unit '{unit}'. Must be 'd' for days or 'h' for hours")
44
+
45
+ except ValueError as e:
46
+ if "invalid literal for int()" in str(e):
47
+ raise ValueError(f"Invalid numeric value in duration: {value}") from e
48
+ raise e from None