neo4j-etl-lib 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etl_lib/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """
2
2
  Building blocks for ETL pipelines.
3
3
  """
4
- __version__ = "0.3.0"
4
+ __version__ = "0.3.1"
@@ -45,7 +45,7 @@ class ProgressReporter:
45
45
  The task that was provided.
46
46
  """
47
47
  task.start_time = datetime.now()
48
- self.logger.info(f"{'\t' * task.depth}starting {task.task_name()}")
48
+ self.logger.info(f"{' ' * (4 * task.depth)}starting {task.task_name()}")
49
49
  return task
50
50
 
51
51
  def finished_task(self, task: Task, result: TaskReturn) -> Task:
@@ -30,7 +30,7 @@ class CSVBatchSource(BatchProcessor):
30
30
  self.csv_file = csv_file
31
31
  self.kwargs = kwargs
32
32
 
33
- def get_batch(self, max_batch__size: int) -> Generator[BatchResults]:
33
+ def get_batch(self, max_batch__size: int) -> Generator[BatchResults, None, None]:
34
34
  for batch_size, chunks_ in self.__read_csv(self.csv_file, batch_size=max_batch__size, **self.kwargs):
35
35
  yield BatchResults(chunk=chunks_, statistics={"csv_lines_read": batch_size}, batch_size=batch_size)
36
36
 
etl_lib/task/GDSTask.py CHANGED
@@ -28,11 +28,14 @@ class GDSTask(Task):
28
28
  Function that uses the gds client to perform tasks. See the following example:
29
29
 
30
30
  def gds_fun(etl_context):
31
- with etl_context.neo4j.gds() as gds:
32
- gds.graph.drop("neo4j-offices", failIfMissing=False)
33
- g_office, project_result = gds.graph.project("neo4j-offices", "City", "FLY_TO")
34
- mutate_result = gds.pageRank.mutate(g_office, tolerance=0.5, mutateProperty="rank")
35
- return TaskReturn(success=True, summery=transform_dict(mutate_result.to_dict()))
31
+ gds = etl_context.neo4j.gds
32
+ gds.graph.drop("neo4j-offices", failIfMissing=False)
33
+ g_office, project_result = gds.graph.project("neo4j-offices", "City", "FLY_TO")
34
+ mutate_result = gds.pageRank.write(g_office, tolerance=0.5, writeProperty="rank")
35
+ return TaskReturn(success=True, summery=transform_dict(mutate_result.to_dict()))
36
+
37
+ Notes: Do *NOT* use `etl_context.neo4j.gds` with a context manager. The GDS client closes the underlying
38
+ connection when exiting the context.
36
39
 
37
40
  :param context: The ETLContext to use. Provides the gds client to the func via `etl_context.neo4j.gds()`
38
41
  :param func: a function that expects a param `etl_context` and returns a `TaskReturn` object.
@@ -7,7 +7,7 @@ from _pytest.tmpdir import tmp_path
7
7
  from neo4j import Driver
8
8
  from neo4j.time import Date
9
9
 
10
- from etl_lib.core.ETLContext import QueryResult, Neo4jContext, ETLContext, SQLContext, gds
10
+ from etl_lib.core.ETLContext import ETLContext, Neo4jContext, QueryResult, SQLContext, gds
11
11
  from etl_lib.core.Task import Task
12
12
 
13
13
 
@@ -96,7 +96,7 @@ def get_database_name():
96
96
  raise Exception("define NEO4J_TEST_DATABASE environment variable")
97
97
 
98
98
 
99
- class TestNeo4jContext(Neo4jContext):
99
+ class MockNeo4jContext(Neo4jContext):
100
100
 
101
101
  def __init__(self, driver: Driver):
102
102
  self.logger = logging.getLogger(self.__class__.__name__)
@@ -105,29 +105,32 @@ class TestNeo4jContext(Neo4jContext):
105
105
  self.gds = gds(self)
106
106
 
107
107
 
108
- class TestETLContext(ETLContext):
108
+ class MockETLContext(ETLContext):
109
109
 
110
110
  def __init__(self, driver: Driver, tmp_path):
111
111
  self.logger = logging.getLogger(self.__class__.__name__)
112
112
  self.__env_vars = {"ETL_ERROR_PATH": tmp_path}
113
- self.neo4j = TestNeo4jContext(driver)
113
+ self.neo4j = MockNeo4jContext(driver)
114
114
  self.reporter = DummyReporter()
115
115
 
116
116
  def env(self, key: str) -> Any:
117
117
  if key in self.__env_vars:
118
118
  return self.__env_vars[key]
119
119
 
120
- class TestSQLETLContext(ETLContext):
120
+
121
+ class MockSQLETLContext(ETLContext):
121
122
 
122
123
  def __init__(self, sql_uri):
123
124
  self.logger = logging.getLogger(self.__class__.__name__)
124
125
  self.reporter = DummyReporter()
126
+ self.__env_vars = {}
125
127
  self.sql = SQLContext(sql_uri)
126
128
 
127
129
  def env(self, key: str) -> Any:
128
130
  if key in self.__env_vars:
129
131
  return self.__env_vars[key]
130
132
 
133
+
131
134
  class DummyReporter:
132
135
 
133
136
  def register_tasks(self, main: Task):
@@ -163,6 +166,7 @@ class DummyContext:
163
166
  def env(self, key: str) -> Any:
164
167
  pass
165
168
 
169
+
166
170
  class DummyPredecessor:
167
171
  def __init__(self, batches):
168
172
  self.batches = batches
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: neo4j-etl-lib
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Building blocks for ETL pipelines.
5
5
  Keywords: etl,graph,database
6
6
  Author-email: Bert Radke <bert.radke@pm.me>
@@ -14,11 +14,11 @@ Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Database
15
15
  Classifier: Development Status :: 4 - Beta
16
16
  License-File: LICENSE
17
- Requires-Dist: pydantic>=2.10.5; python_version >= '3.8'
18
- Requires-Dist: neo4j-rust-ext>=5.27.0; python_version >= '3.7'
19
- Requires-Dist: python-dotenv>=1.0.1; python_version >= '3.8'
20
- Requires-Dist: tabulate>=0.9.0; python_version >= '3.7'
21
- Requires-Dist: click>=8.1.8; python_version >= '3.7'
17
+ Requires-Dist: pydantic>=2.10.5; python_version >= '3.10'
18
+ Requires-Dist: neo4j-rust-ext>=5.27.0,<6; python_version >= '3.10'
19
+ Requires-Dist: python-dotenv>=1.0.1; python_version >= '3.10'
20
+ Requires-Dist: tabulate>=0.9.0; python_version >= '3.10'
21
+ Requires-Dist: click>=8.1.8; python_version >= '3.10'
22
22
  Requires-Dist: pydantic[email-validator]
23
23
  Requires-Dist: pytest>=8.3.0 ; extra == "dev" and ( python_version >= '3.8')
24
24
  Requires-Dist: testcontainers[neo4j]==4.9.0 ; extra == "dev" and ( python_version >= '3.9' and python_version < '4.0')
@@ -35,11 +35,13 @@ Requires-Dist: sphinx-autoapi ; extra == "dev"
35
35
  Requires-Dist: sqlalchemy ; extra == "dev"
36
36
  Requires-Dist: psycopg2-binary ; extra == "dev"
37
37
  Requires-Dist: graphdatascience>=1.13 ; extra == "gds" and ( python_version >= '3.9')
38
+ Requires-Dist: nox>=2024.0.0 ; extra == "nox"
38
39
  Requires-Dist: sqlalchemy ; extra == "sql"
39
40
  Project-URL: Documentation, https://neo-technology-field.github.io/python-etl-lib/index.html
40
41
  Project-URL: Home, https://github.com/neo-technology-field/python-etl-lib
41
42
  Provides-Extra: dev
42
43
  Provides-Extra: gds
44
+ Provides-Extra: nox
43
45
  Provides-Extra: sql
44
46
 
45
47
  # Neo4j ETL Toolbox
@@ -1,11 +1,11 @@
1
- etl_lib/__init__.py,sha256=FyaTAuElsn3y3j1g15X141PnLTYxPrSUVU_YaVmiyPs,65
1
+ etl_lib/__init__.py,sha256=x6coFV38ytJ_wPhR3c0UEzX65oTz2ouKwygkC_tyRLM,65
2
2
  etl_lib/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  etl_lib/cli/run_tools.py,sha256=KIar-y22P4kKm-yoJjecYsPwqC7U76M71dEgFO5-ZBo,8561
4
4
  etl_lib/core/BatchProcessor.py,sha256=mRpdxZ6ZMKI8XsY3TPuy4dVcvRqLKCO-p63KeOhFyKE,3417
5
5
  etl_lib/core/ClosedLoopBatchProcessor.py,sha256=WzML1nldhZRbP8fhlD6utuK5SBYRl1cJgEobVDIdBP4,1626
6
6
  etl_lib/core/ETLContext.py,sha256=wmEnbs3n_80B6La9Py_-MHG8BN0FajE9MjGPej0A3To,8045
7
7
  etl_lib/core/ParallelBatchProcessor.py,sha256=jNo1Xv1Ts34UZIseoQLDZOhHOVeEr8dUibKUt0FJ4Hw,7318
8
- etl_lib/core/ProgressReporter.py,sha256=UvWAPCuOrqyUcb5_kosIsCg1dyVQL-tnjgqnzs2cwZA,9372
8
+ etl_lib/core/ProgressReporter.py,sha256=tkE-W6qlR25nU8nUoECcxZDnjnG8AtQH9s9s5WBh_-Q,9377
9
9
  etl_lib/core/SplittingBatchProcessor.py,sha256=OIRMUVFpUoZc0w__JJjUr7B9QC3sBlqQp41xghrQzC0,11616
10
10
  etl_lib/core/Task.py,sha256=muQFY5qj2n-ZVV8F6vlHqo2lVSvB3wtGdIgkSXVpOFM,9365
11
11
  etl_lib/core/ValidationBatchProcessor.py,sha256=U1M2Qp9Ledt8qFiHAg8zMxE9lLRkBrr51NKs_Y8skK8,3400
@@ -15,13 +15,13 @@ etl_lib/data_sink/CSVBatchSink.py,sha256=oq4VJwnA4WSyJzdvwstGv73vOEuWmPSfCynhVmx
15
15
  etl_lib/data_sink/CypherBatchSink.py,sha256=nBH4bzN1IvdSFcKgiAIrAY5IauB565sdyVrnRc1hg_4,1566
16
16
  etl_lib/data_sink/SQLBatchSink.py,sha256=vyGrrxpdmCLUZMI2_W2ORej3FLGbwN9-b2GMYHd-k9g,1451
17
17
  etl_lib/data_sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- etl_lib/data_source/CSVBatchSource.py,sha256=HILkaQAFua1OM7xgSNKY6teXpcJjWUPaS4Aol-GLYL8,2767
18
+ etl_lib/data_source/CSVBatchSource.py,sha256=0q1XdPhAIKw1HcTpnp_F4WxRUzk-24Q8Qd-WeIo5OZ0,2779
19
19
  etl_lib/data_source/CypherBatchSource.py,sha256=06WuW11BqYjAXBZqL96Qr9MR8JrcjujDpxXe8cI-SYY,2238
20
20
  etl_lib/data_source/SQLBatchSource.py,sha256=O3ZA2GXvo5j_KGwOILzguYZMPY_FJkV5j8FIa3-d9oM,4067
21
21
  etl_lib/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  etl_lib/task/CreateReportingConstraintsTask.py,sha256=nTcHLBIgXz_h2OQg-SHjQr68bhH974u0MwrtWPnVwng,762
23
23
  etl_lib/task/ExecuteCypherTask.py,sha256=thE8YTZzv1abxNhhDcb4p4ke6qmI6kWR4XQ-GrCBBBU,1284
24
- etl_lib/task/GDSTask.py,sha256=X1E83wYa-N7AXy43WPEqIy77d__z-2wpBjWNhGNXJzA,1781
24
+ etl_lib/task/GDSTask.py,sha256=UP_NMvdeQ9ueLUFlREfe0q3XhFHTCaMvXArSpvxZNiQ,1918
25
25
  etl_lib/task/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  etl_lib/task/data_loading/CSVLoad2Neo4jTask.py,sha256=9XiVdJHpABE-Hx1bsvTKLJWtChc8XMwXeO5RicaHDUo,3873
27
27
  etl_lib/task/data_loading/ParallelCSVLoad2Neo4jTask.py,sha256=2xN-5bHV9XgoaJLbbTEBuJFoZHV_CYi_hg6M1HQ-ffA,4030
@@ -29,8 +29,8 @@ etl_lib/task/data_loading/ParallelSQLLoad2Neo4jTask.py,sha256=9xpCW5i8yGnUHyg475
29
29
  etl_lib/task/data_loading/SQLLoad2Neo4jTask.py,sha256=HR3DcjOUkQN4SbCkgQYzljQCYhOcb-x2-DR5dBdapzU,2953
30
30
  etl_lib/task/data_loading/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  etl_lib/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
- etl_lib/test_utils/utils.py,sha256=kxWJqdRf1pg-4ByMfrtW3HDbgXIvyVtLndGDVvMCmoI,5641
33
- neo4j_etl_lib-0.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- neo4j_etl_lib-0.3.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
35
- neo4j_etl_lib-0.3.0.dist-info/METADATA,sha256=GJcjdPvmzjEUq0pLndSSVzOg3c7CR6bIWz3sB_9tkVY,2506
36
- neo4j_etl_lib-0.3.0.dist-info/RECORD,,
32
+ etl_lib/test_utils/utils.py,sha256=CgYOCXcUyndOdRAmGyPLoCIuEik0yzy6FLV2k16cpDM,5673
33
+ neo4j_etl_lib-0.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ neo4j_etl_lib-0.3.1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
35
+ neo4j_etl_lib-0.3.1.dist-info/METADATA,sha256=Pm921qyxL36Ed_Ppp2cW3OFPxUGMv7IyRTmtba3n96o,2580
36
+ neo4j_etl_lib-0.3.1.dist-info/RECORD,,