datatailr 0.1.5__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datatailr might be problematic. Click here for more details.

@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # *************************************************************************
4
+ #
5
+ # Copyright (c) 2025 - Datatailr Inc.
6
+ # All Rights Reserved.
7
+ #
8
+ # This file is part of Datatailr and subject to the terms and conditions
9
+ # defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
10
+ # of this file, in parts or full, via any medium is strictly prohibited.
11
+ # *************************************************************************
12
+
13
+ import os
14
+
15
+ from datatailr.logging import DatatailrLogger
16
+
17
+ logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
18
+
19
+
20
+ def run():
21
+ logger.info("Starting Datatailr app...")
22
+ entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
23
+
24
+ if entrypoint is None:
25
+ raise ValueError("Environment variable 'DATATAILR_ENTRYPOINT' is not set.")
26
+
27
+ os.system(entrypoint)
28
+ logger.info(f"Running entrypoint: {entrypoint}")
@@ -12,19 +12,19 @@
12
12
 
13
13
  import importlib
14
14
  import os
15
- import pickle
16
15
 
17
- from datatailr import dt__Blob
18
16
  from datatailr.logging import DatatailrLogger
19
17
 
20
18
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
21
19
 
22
20
 
23
- def main():
21
+ def run():
22
+ logger.info("Running Datatailr batch job")
24
23
  entry_point = os.environ.get("DATATAILR_BATCH_ENTRYPOINT")
25
24
  batch_run_id = os.environ.get("DATATAILR_BATCH_RUN_ID")
26
25
  batch_id = os.environ.get("DATATAILR_BATCH_ID")
27
26
  job_id = os.environ.get("DATATAILR_JOB_ID")
27
+ logger.info(f"Batch run ID: {batch_run_id}, Batch ID: {batch_id}, Job ID: {job_id}")
28
28
 
29
29
  if entry_point is None:
30
30
  raise ValueError(
@@ -44,20 +44,5 @@ def main():
44
44
  raise ValueError(
45
45
  f"The function '{func_name}' in module '{module_name}' is not callable."
46
46
  )
47
- result = function()
48
- result_path = f"batch-results-{batch_run_id}-{job_id}.pkl"
49
- with open(result_path, "wb") as f:
50
- pickle.dump(result, f)
51
- blob = dt__Blob()
52
- blob.cp(result_path, "blob://")
53
- logger.info(f"{result_path} copied to blob storage.")
54
-
55
-
56
- if __name__ == "__main__":
57
- try:
58
- logger.debug("Starting job execution...")
59
- main()
60
- logger.debug("Job executed successfully.")
61
- except Exception as e:
62
- logger.error(f"Error during job execution: {e}")
63
- raise
47
+ function()
48
+ logger.info("Datatailr batch job completed successfully.")
@@ -8,31 +8,47 @@
8
8
  # of this file, in parts or full, via any medium is strictly prohibited.
9
9
  # *************************************************************************
10
10
 
11
- from datatailr.errors import BatchJobError, DatatailrError
11
+ r"""
12
+ Datatailr Scheduler Module
13
+ ==========================
14
+
15
+ The `datatailr.scheduler` module provides a framework for scheduling and managing batch jobs.
16
+
17
+ The main job types are:
18
+ _______________________
19
+
20
+ - **Batch**: Represents a batch job that can be scheduled and executed.
21
+ The job can include multiple tasks which can be run in parallel or sequentially.
22
+ - **Service**: Represents a service job that runs continuously.
23
+ - **App**: Represents a web app or a dashboard, which can be built using one of the supported frameworks,
24
+ such as `Streamlit <https://streamlit.io/>`_, `Dash <https://dash.plotly.com/>`_, or `Panel <https://panel.holoviz.org/>`_.
25
+ - **Excel**: Represents an Excel add-in.
26
+ """
27
+
28
+ from datatailr.errors import BatchJobError
12
29
  from datatailr.scheduler.base import (
13
- ACL,
14
30
  EntryPoint,
15
31
  Environment,
16
32
  Job,
17
33
  JobType,
18
34
  Resources,
19
- User,
35
+ set_allow_unsafe_scheduling,
20
36
  )
21
37
  from datatailr.scheduler.batch import Batch, BatchJob, DuplicateJobNameError
22
- from datatailr.scheduler.batch_decorator import batch_decorator as batch
38
+ from datatailr.scheduler.batch_decorator import batch_decorator as batch_job
39
+ from datatailr.scheduler.schedule import Schedule
23
40
 
24
41
  __all__ = [
25
42
  "Job",
26
43
  "JobType",
27
44
  "Environment",
28
- "User",
29
45
  "Resources",
30
- "ACL",
31
46
  "EntryPoint",
32
47
  "Batch",
33
48
  "BatchJob",
34
- "batch",
35
- "DatatailrError",
49
+ "batch_job",
36
50
  "BatchJobError",
37
51
  "DuplicateJobNameError",
52
+ "set_allow_unsafe_scheduling",
53
+ "Schedule",
38
54
  ]
@@ -21,11 +21,24 @@ and the inner dictionaries contain the arguments.
21
21
  This module is for internal use of the datatailr package.
22
22
  """
23
23
 
24
- from collections import defaultdict
25
- from typing import Any, Dict
24
+ from datatailr.dt_json import json, decode_json
25
+ import os
26
+ import pickle
27
+ from typing import Any, Dict, Optional
26
28
 
27
- from datatailr import is_dt_installed
28
- from datatailr.scheduler import BatchJob
29
+ from datatailr import is_dt_installed, Blob
30
+ from datatailr.errors import DatatailrError
31
+
32
+
33
+ __BLOB_STORAGE__ = Blob()
34
+
35
+
36
+ class CacheNotFoundError(DatatailrError):
37
+ """Custom error for cache operations."""
38
+
39
+ def __init__(self, message: str):
40
+ super().__init__(message)
41
+ self.message = message
29
42
 
30
43
 
31
44
  class ArgumentsCache:
@@ -36,11 +49,12 @@ class ArgumentsCache:
36
49
  :param use_persistent_cache: If True, use the persistent cache backend. Otherwise, use in-memory cache.
37
50
  """
38
51
  self.use_persistent_cache = use_persistent_cache
39
- self.in_memory_cache: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(
40
- lambda: defaultdict(dict)
41
- )
52
+ if not self.use_persistent_cache:
53
+ # Create a temp folder, for local caching
54
+ os.makedirs("/tmp/datatailr/batch/arguments", exist_ok=True)
55
+ os.makedirs("/tmp/datatailr/batch/results", exist_ok=True)
42
56
 
43
- def add_arguments(self, batch_run_id: str, job: str, arguments: Dict[str, Any]):
57
+ def add_arguments(self, batch_id: str, arguments: Dict[str, Any]):
44
58
  """
45
59
  Add arguments to the cache for a specific job and batch run.
46
60
 
@@ -48,12 +62,16 @@ class ArgumentsCache:
48
62
  :param job_name: Name of the job.
49
63
  :param arguments: Dictionary of arguments to store.
50
64
  """
51
- if self.use_persistent_cache and isinstance(job, str):
52
- self._add_to_persistent_cache(batch_run_id, job, arguments)
65
+ path = f"/tmp/datatailr/batch/arguments/{batch_id}.pkl"
66
+ if self.use_persistent_cache:
67
+ self._add_to_persistent_cache(path, arguments)
53
68
  else:
54
- self.in_memory_cache[batch_run_id][job]["args"] = arguments
69
+ with open(path, "wb") as f:
70
+ pickle.dump(arguments, f)
55
71
 
56
- def get_arguments(self, batch_run_id: str, job: str) -> Dict[str, Any]:
72
+ def get_arguments(
73
+ self, batch_id: str, job: str, batch_run_id: Optional[str]
74
+ ) -> Dict[str, Any]:
57
75
  """
58
76
  Retrieve arguments from the cache for a specific job and batch run.
59
77
 
@@ -61,20 +79,37 @@ class ArgumentsCache:
61
79
  :param job_name: Name of the job.
62
80
  :return: Dictionary of arguments.
63
81
  """
82
+ path = f"/tmp/datatailr/batch/arguments/{batch_id}.pkl"
64
83
  if self.use_persistent_cache and isinstance(job, str):
65
- return self._get_from_persistent_cache(batch_run_id, job)
66
- arguments = {}
67
- for key, value in (
68
- self.in_memory_cache.get(batch_run_id, {})
69
- .get(job, {})
70
- .get("args", {})
71
- .items()
72
- ):
73
- if isinstance(value, BatchJob):
74
- arguments[key] = value.name
75
- else:
76
- arguments[key] = value
77
- return arguments
84
+ try:
85
+ arg_keys = self._get_from_persistent_cache(path)
86
+ except RuntimeError:
87
+ return {}
88
+ else:
89
+ if not os.path.exists(path):
90
+ raise CacheNotFoundError(
91
+ f"Cache file not found: {path}. Ensure that the arguments have been cached."
92
+ )
93
+ with open(path, "rb") as f:
94
+ try:
95
+ arg_keys = pickle.load(f)
96
+ except EOFError:
97
+ return {}
98
+ if not isinstance(arg_keys, dict):
99
+ raise TypeError(
100
+ f"Expected a dictionary for arguments, got {type(arg_keys)}"
101
+ )
102
+ if batch_run_id is None:
103
+ return arg_keys[job]
104
+ arguments_mapping = decode_json(
105
+ os.getenv("DATATAILR_JOB_ARGUMENT_MAPPING", "{}")
106
+ )
107
+ arguments_mapping = {value: key for key, value in arguments_mapping.items()}
108
+ args = {
109
+ arguments_mapping.get(name, name): self.get_result(batch_run_id, value)
110
+ for name, value in arg_keys[job].items()
111
+ }
112
+ return args
78
113
 
79
114
  def add_result(self, batch_run_id: str, job: str, result: Any):
80
115
  """
@@ -84,12 +119,14 @@ class ArgumentsCache:
84
119
  :param job: Name of the job.
85
120
  :param result: Result of the batch job.
86
121
  """
122
+ path = f"/tmp/datatailr/batch/results/{batch_run_id}_{job}.pkl"
87
123
  if self.use_persistent_cache and isinstance(job, str):
88
- self._add_to_persistent_cache(batch_run_id, job, {"result": result})
124
+ self._add_to_persistent_cache(path, result)
89
125
  else:
90
- self.in_memory_cache[batch_run_id][job]["result"] = result
126
+ with open(path, "wb") as f:
127
+ pickle.dump(result, f)
91
128
 
92
- def get_result(self, batch_run_id: str, job: str) -> Any:
129
+ def get_result(self, batch_run_id: str, job: Any) -> Any:
93
130
  """
94
131
  Retrieve the result of a batch job from the cache.
95
132
 
@@ -97,30 +134,36 @@ class ArgumentsCache:
97
134
  :param job: Name of the job.
98
135
  :return: Result of the batch job.
99
136
  """
137
+ path = f"/tmp/datatailr/batch/results/{batch_run_id}_{job}.pkl"
100
138
  if self.use_persistent_cache and isinstance(job, str):
101
- return self._get_from_persistent_cache(batch_run_id, job).get("result")
102
- return self.in_memory_cache[batch_run_id][job].get("result")
103
-
104
- def _add_to_persistent_cache(
105
- self, batch_run_id: str, job_name: str, arguments: Dict[str, Any]
106
- ):
139
+ return self._get_from_persistent_cache(path)
140
+ else:
141
+ if not os.path.exists(path):
142
+ return job
143
+ with open(path, "rb") as f:
144
+ try:
145
+ return pickle.load(f)
146
+ except EOFError:
147
+ return None
148
+
149
+ def _add_to_persistent_cache(self, path: str, blob: Any):
107
150
  """
108
151
  Add arguments to the persistent cache.
152
+ This method serializes the blob using pickle and stores it in the Blob storage.
153
+ :param path: Path in the Blob storage where the blob will be stored.
154
+ :param blob: The blob to store, typically a dictionary of arguments.
155
+ :raises TypeError: If the blob cannot be pickled.
109
156
 
110
- :param batch_run_id: Identifier for the batch run.
111
- :param job_name: Name of the job.
112
- :param arguments: Dictionary of arguments to store.
113
157
  """
114
- pass
158
+ path = path.replace("/tmp/", "")
159
+ __BLOB_STORAGE__.put_blob(path, json.dumps(blob))
115
160
 
116
- def _get_from_persistent_cache(
117
- self, batch_run_id: str, job_name: str
118
- ) -> Dict[str, Any]:
161
+ def _get_from_persistent_cache(self, path: str) -> Any:
119
162
  """
120
163
  Retrieve arguments from the persistent cache.
121
164
 
122
- :param batch_run_id: Identifier for the batch run.
123
- :param job_name: Name of the job.
124
- :return: Dictionary of arguments.
165
+ :param path: Path in the Blob storage where the blob is stored.
125
166
  """
126
- return {}
167
+ path = path.replace("/tmp/", "")
168
+ data = __BLOB_STORAGE__.get_blob(path)
169
+ return json.loads(data)