neo4j-etl-lib 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etl_lib/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """
2
2
  Building blocks for ETL pipelines.
3
3
  """
4
- __version__ = "0.1.0"
4
+ __version__ = "0.1.1"
etl_lib/cli/run_tools.py CHANGED
@@ -55,7 +55,7 @@ def __driver(ctx):
55
55
  database_name = ctx.obj["database_name"]
56
56
  neo4j_password = ctx.obj["neo4j_password"]
57
57
  return GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password), database=database_name,
58
- notifications_min_severity="OFF", user_agent="ETL CLI 0.1")
58
+ notifications_min_severity="OFF", user_agent="ETL CLI")
59
59
 
60
60
 
61
61
  @click.group()
@@ -165,25 +165,57 @@ def detail(ctx, run_id, details):
165
165
  __print_details(driver, run_id)
166
166
 
167
167
 
168
+ # noinspection PyTypeChecker
168
169
  @cli.command()
169
- @click.option('--run-id', required=False, help='Run ID to delete')
170
- @click.option('--since', help='Delete runs since a specific date')
171
- @click.option('--older', help='Delete runs older than a specific date')
170
+ @click.option('--run-id', required=False, type=str, help='Run IDs to delete, works with comma separated list')
171
+ @click.option('--before', type=click.DateTime(formats=["%Y-%m-%d"]), help='Delete runs before a specific date in format YYYY-MM-DD')
172
+ @click.option('--older', help='Delete runs older than x days', type=int)
172
173
  @click.pass_context
173
- def delete(ctx, run_id, since, older):
174
+ def delete(ctx, run_id, before, older):
174
175
  """
175
- Delete runs based on run ID, date, or age. One and only one of --run-id, --since, or --older must be provided.
176
+ Delete runs based on run ID, date, or age. One and only one of --run-id, --before, or --older must be provided.
176
177
  """
177
178
  # Ensure mutual exclusivity
178
- options = [run_id, since, older]
179
+ options = [run_id, before, older]
179
180
  if sum(bool(opt) for opt in options) != 1:
180
- print("You must specify exactly one of --run-id, --since, or --older.")
181
+ print("You must specify exactly one of --run-id, --before, or --older.")
181
182
  return
182
183
 
183
184
  if run_id:
184
- print(f"Deleting run ID: {run_id}")
185
- elif since:
186
- print(f"Deleting runs since: {since}")
185
+ ids = run_id.split(',')
186
+ delete_runs(ctx, ids)
187
+ elif before:
188
+ print(f"Deleting runs before: {before}")
189
+ with __driver(ctx) as driver:
190
+ record= driver.execute_query(
191
+ """MATCH (r:ETLRun) WHERE date(r.startTime) < date($before)
192
+ RETURN collect(r.uuid) AS ids
193
+ """,
194
+ result_transformer_=neo4j.Result.single,
195
+ before=before)
196
+ ids = record[0]
197
+ delete_runs(ctx, ids)
198
+
187
199
  elif older:
188
200
  print(f"Deleting runs older than: {older}")
189
- # Implement delete logic here
201
+ with __driver(ctx) as driver:
202
+ record = driver.execute_query(
203
+ """MATCH (r:ETLRun) WHERE date(r.startTime) < (date() - duration({days: $days}))
204
+ RETURN collect(r.uuid) AS ids
205
+ """,
206
+ result_transformer_=neo4j.Result.single,
207
+ days=older)
208
+ ids = record[0]
209
+ delete_runs(ctx, ids)
210
+
211
+
212
+ def delete_runs(ctx, ids):
213
+ print(f"Deleting run IDs: {ids}")
214
+ with __driver(ctx) as driver:
215
+ records, _, _ = driver.execute_query(
216
+ """
217
+ MATCH (r:ETLRun)-[*]->(n) WHERE r.uuid IN $ids
218
+ DETACH DELETE n
219
+ DETACH DELETE r
220
+ """, ids=ids, routing_=neo4j.RoutingControl.WRITE)
221
+ print(f"Deleted run IDs: {ids} successfully")
@@ -3,7 +3,7 @@ from datetime import datetime
3
3
 
4
4
  from tabulate import tabulate
5
5
 
6
- from etl_lib.core.Task import Task, TaskGroup
6
+ from etl_lib.core.Task import Task, TaskGroup, TaskReturn
7
7
 
8
8
 
9
9
  class ProgressReporter:
@@ -47,7 +47,7 @@ class ProgressReporter:
47
47
  self.logger.info(f"{'\t' * task.depth}starting {task.task_name()}")
48
48
  return task
49
49
 
50
- def finished_task(self, task: Task, success: bool, summery: dict, error: str = None) -> Task:
50
+ def finished_task(self, task: Task, result: TaskReturn) -> Task:
51
51
  """
52
52
  Marks the task as finished.
53
53
 
@@ -55,23 +55,21 @@ class ProgressReporter:
55
55
 
56
56
  Args:
57
57
  task: Task to be marked as finished.
58
- success: True if the task has successfully finished.
59
- summery: statistics for this task (such as `nodes_created`)
60
- error: If an exception occurred, the exception text should be provided here.
58
+ result: result of the task execution, such as status and summery information.
61
59
 
62
60
  Returns:
63
61
  Task to be marked as started.
64
62
  """
65
63
  task.end_time = datetime.now()
66
- task.success = success
67
- task.summery = summery
64
+ task.success = result.success
65
+ task.summery = result.summery
68
66
 
69
- report = f"{'\t' * task.depth} finished {task.task_name()} in {task.end_time - task.start_time} with success: {success}"
70
- if error is not None:
71
- report += f", error: \n{error}"
67
+ report = f"{'\t' * task.depth} finished {task.task_name()} in {task.end_time - task.start_time} with success: {result.success}"
68
+ if result.error is not None:
69
+ report += f", error: \n{result.error}"
72
70
  else:
73
71
  # for the logger, remove entries with 0, but keep them in the original for reporting
74
- cleaned_summery = {key: value for key, value in summery.items() if value != 0}
72
+ cleaned_summery = {key: value for key, value in result.summery.items() if value != 0}
75
73
  if len(cleaned_summery) > 0:
76
74
  report += f"\n{tabulate([cleaned_summery], headers='keys', tablefmt='psql')}"
77
75
  self.logger.info(report)
@@ -168,9 +166,9 @@ class Neo4jProgressReporter(ProgressReporter):
168
166
  start_time=task.start_time)
169
167
  return task
170
168
 
171
- def finished_task(self, task: Task, success: bool, summery: dict, error: str = None) -> Task:
172
- super().finished_task(task=task, success=success, summery=summery, error=error)
173
- if success:
169
+ def finished_task(self, task: Task, result: TaskReturn) -> Task:
170
+ super().finished_task(task=task, result=result)
171
+ if result.success:
174
172
  status = "success"
175
173
  else:
176
174
  status = "failure"
@@ -179,7 +177,7 @@ class Neo4jProgressReporter(ProgressReporter):
179
177
  MATCH (t:ETLTask {uuid:$id}) SET t.endTime = $end_time, t.status = $status, t.error = $error
180
178
  CREATE (s:ETLStats) SET s=$summery
181
179
  CREATE (t)-[:HAS_STATS]->(s)
182
- """, id=task.uuid, end_time=task.end_time, summery=summery, status=status, error=error)
180
+ """, id=task.uuid, end_time=task.end_time, summery=result.summery, status=status, error=result.error)
183
181
  return task
184
182
 
185
183
  def __create_constraints(self):
etl_lib/core/Task.py CHANGED
@@ -46,7 +46,8 @@ class TaskReturn:
46
46
 
47
47
  # Combine success values and errors
48
48
  combined_success = self.success and other.success
49
- combined_error = f"{self.error or ''} | {other.error or ''}".strip(" |")
49
+ combined_error = None if not (self.error or other.error) \
50
+ else f"{self.error or ''} | {other.error or ''}".strip(" |")
50
51
 
51
52
  return TaskReturn(
52
53
  success=combined_success, summery=merged_summery, error=combined_error
@@ -99,12 +100,7 @@ class Task:
99
100
  except Exception as e:
100
101
  result = TaskReturn(success=False, summery={}, error=str(e))
101
102
 
102
- self.context.reporter.finished_task(
103
- task=self,
104
- success=result.success,
105
- summery=result.summery,
106
- error=result.error,
107
- )
103
+ self.context.reporter.finished_task(task=self,result=result)
108
104
 
109
105
  return result
110
106
 
@@ -1,4 +1,6 @@
1
- from typing import Generator
1
+ from typing import Generator, Callable, Optional
2
+
3
+ from neo4j import Record
2
4
 
3
5
  from etl_lib.core.BatchProcessor import BatchResults, BatchProcessor
4
6
  from etl_lib.core.ETLContext import ETLContext
@@ -7,7 +9,14 @@ from etl_lib.core.Task import Task
7
9
 
8
10
  class CypherBatchSource(BatchProcessor):
9
11
 
10
- def __init__(self, context: ETLContext, task: Task, query: str, **kwargs):
12
+ def __init__(
13
+ self,
14
+ context: ETLContext,
15
+ task: Task,
16
+ query: str,
17
+ record_transformer: Optional[Callable[[Record], dict]] = None,
18
+ **kwargs
19
+ ):
11
20
  """
12
21
  Constructs a new CypherBatchSource.
13
22
 
@@ -15,10 +24,12 @@ class CypherBatchSource(BatchProcessor):
15
24
  context: :class:`etl_lib.core.ETLContext.ETLContext` instance.
16
25
  task: :class:`etl_lib.core.Task.Task` instance owning this batchProcessor.
17
26
  query: Cypher query to execute.
27
+ record_transformer: Optional function to transform each record. See Neo4j API documentation on `result_transformer_`
18
28
  kwargs: Arguments passed as parameters with the query.
19
29
  """
20
30
  super().__init__(context, task)
21
31
  self.query = query
32
+ self.record_transformer = record_transformer
22
33
  self.kwargs = kwargs
23
34
 
24
35
  def __read_records(self, tx, batch_size):
@@ -26,7 +37,11 @@ class CypherBatchSource(BatchProcessor):
26
37
  result = tx.run(self.query, **self.kwargs)
27
38
 
28
39
  for record in result:
29
- batch_.append(record.data())
40
+ data = record.data()
41
+ if self.record_transformer:
42
+ data = self.record_transformer(data)
43
+ batch_.append(data)
44
+
30
45
  if len(batch_) == batch_size:
31
46
  yield batch_
32
47
  batch_ = []
@@ -24,10 +24,10 @@ class ExecuteCypherTask(Task):
24
24
  for query in self._query():
25
25
  result = self.context.neo4j.query_database(session=session, query=query, **kwargs)
26
26
  stats = merge_summery(stats, result.summery)
27
- return TaskReturn(True, stats)
27
+ return TaskReturn(success=True, summery=stats)
28
28
  else:
29
29
  result = self.context.neo4j.query_database(session=session, query=self._query(), **kwargs)
30
- return TaskReturn(True, result.summery)
30
+ return TaskReturn(success=True, summery=result.summery)
31
31
 
32
32
  @abc.abstractmethod
33
33
  def _query(self) -> str | list[str]:
@@ -125,7 +125,7 @@ class DummyReporter:
125
125
  def started_task(self, task: Task) -> Task:
126
126
  pass
127
127
 
128
- def finished_task(self, task, success: bool, summery: dict, error: str = None) -> Task:
128
+ def finished_task(self, task, result) -> Task:
129
129
  pass
130
130
 
131
131
  def report_progress(self, task, batches: int, expected_batches: int, stats: dict) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: neo4j-etl-lib
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Building blocks for ETL pipelines.
5
5
  Keywords: etl,graph,database
6
6
  Author-email: Bert Radke <bert.radke@pm.me>
@@ -1,11 +1,11 @@
1
- etl_lib/__init__.py,sha256=xZKM1gxoW-QX6-igG9rff50v2lL6MgnuhzOOxOORaUI,65
1
+ etl_lib/__init__.py,sha256=sxY6lj4IZU25bZRF4lb6N5nn6yH1W4S1Qqysw-NzcXI,65
2
2
  etl_lib/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- etl_lib/cli/run_tools.py,sha256=YMpa-WICon6mjuPuwyrtdBM9IiHdO9UPBYtA_y6UA0Y,7292
3
+ etl_lib/cli/run_tools.py,sha256=KAm6XRz5audOP_PhMVozEckvdeMJ0HfyleEFc5jAalc,8579
4
4
  etl_lib/core/BatchProcessor.py,sha256=6quNPE9Dp8hYJDQDTqxQtxbQ3KCmb56Mko34EIsNhyI,3352
5
5
  etl_lib/core/ClosedLoopBatchProcessor.py,sha256=unlx_A339oi2nOOXF0irrVf8j_GFhwcTuk_w5liqbWc,1321
6
6
  etl_lib/core/ETLContext.py,sha256=ZTk_IDILpjUji0DphPUzTNx8k_2hZRxy37mqIcEA-kM,5641
7
- etl_lib/core/ProgressReporter.py,sha256=z5aVBjDJZSNGr6zmY8DsMC6dzEcnhAV7RboHWJdl49g,8557
8
- etl_lib/core/Task.py,sha256=qhCRYEJciYdaYzMurUTTzGQgm7UeKe0Ik37Fp-qAgr8,9256
7
+ etl_lib/core/ProgressReporter.py,sha256=QR9ZwwyHEEBYa8i3Udc5J68Ir1bsPIM1fFyt0n_lqFU,8407
8
+ etl_lib/core/Task.py,sha256=3e8iVXSfXaeBecvgTcs2LiIf2JwpKETRFhH4ig6lock,9202
9
9
  etl_lib/core/ValidationBatchProcessor.py,sha256=EhO6PFQB-4PZgIOTXP4PwkbAl5HRK0zgTeKMseRU5QU,3261
10
10
  etl_lib/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  etl_lib/core/utils.py,sha256=wwfyvy78fL6sqHdV0IFqAVyEkp6vo5Yo8gRZua2dulw,816
@@ -13,17 +13,17 @@ etl_lib/data_sink/CSVBatchSink.py,sha256=oq4VJwnA4WSyJzdvwstGv73vOEuWmPSfCynhVmx
13
13
  etl_lib/data_sink/CypherBatchSink.py,sha256=RMuelUat55ojLQMRYmoiXG0D_fgWH0RLbmUd01UMv_c,1511
14
14
  etl_lib/data_sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  etl_lib/data_source/CSVBatchSource.py,sha256=HILkaQAFua1OM7xgSNKY6teXpcJjWUPaS4Aol-GLYL8,2767
16
- etl_lib/data_source/CypherBatchSource.py,sha256=Umyr5-eQ5vI7EFqjDhUTgSGzuUkglGKjYIWLpijdGrU,1752
16
+ etl_lib/data_source/CypherBatchSource.py,sha256=06WuW11BqYjAXBZqL96Qr9MR8JrcjujDpxXe8cI-SYY,2238
17
17
  etl_lib/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  etl_lib/task/CreateReportingConstraintsTask.py,sha256=aV5i1EwjfuG-eEGoNaB-NcaPhyu0NgdVhmZr5MIv8ak,760
19
- etl_lib/task/ExecuteCypherTask.py,sha256=wpPF-bbawRiNS1cCXLhIwuXROAcXsv3OfdKc6DH5q2o,1252
19
+ etl_lib/task/ExecuteCypherTask.py,sha256=thE8YTZzv1abxNhhDcb4p4ke6qmI6kWR4XQ-GrCBBBU,1284
20
20
  etl_lib/task/GDSTask.py,sha256=X1E83wYa-N7AXy43WPEqIy77d__z-2wpBjWNhGNXJzA,1781
21
21
  etl_lib/task/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  etl_lib/task/data_loading/CSVLoad2Neo4jTask.py,sha256=US9Sa6ytPPEa6BSVUBttlWdKzqyxlF-09If5XCf-LIE,2277
23
23
  etl_lib/task/data_loading/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  etl_lib/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- etl_lib/test_utils/utils.py,sha256=G_qT2WHrBAnNNCmAjCZAgqPP0NseJzBDyBttYmSshQU,5150
26
- neo4j_etl_lib-0.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
- neo4j_etl_lib-0.1.0.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
28
- neo4j_etl_lib-0.1.0.dist-info/METADATA,sha256=nk13cf2M1ErdY9fL0T2leYjJlkdXZOtnKpa-XMu8ifE,2210
29
- neo4j_etl_lib-0.1.0.dist-info/RECORD,,
25
+ etl_lib/test_utils/utils.py,sha256=j7RMhT5Q69-5EAVwB1hePPJobq69_uYxuMTfd6gnbbc,5109
26
+ neo4j_etl_lib-0.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
+ neo4j_etl_lib-0.1.1.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
28
+ neo4j_etl_lib-0.1.1.dist-info/METADATA,sha256=LG9xc0NIjBUtdRZwLl9O8WpSXjJqCWLIWf0m8j0iZHQ,2210
29
+ neo4j_etl_lib-0.1.1.dist-info/RECORD,,