neo4j-etl-lib 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/PKG-INFO +8 -6
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/pyproject.toml +10 -5
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/__init__.py +1 -1
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/ProgressReporter.py +1 -1
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/data_source/CSVBatchSource.py +1 -1
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/GDSTask.py +8 -5
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/test_utils/utils.py +9 -5
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/LICENSE +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/README.md +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/cli/__init__.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/cli/run_tools.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/BatchProcessor.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/ClosedLoopBatchProcessor.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/ETLContext.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/ParallelBatchProcessor.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/SplittingBatchProcessor.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/Task.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/ValidationBatchProcessor.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/__init__.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/core/utils.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/data_sink/CSVBatchSink.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/data_sink/CypherBatchSink.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/data_sink/SQLBatchSink.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/data_sink/__init__.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/data_source/CypherBatchSource.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/data_source/SQLBatchSource.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/data_source/__init__.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/CreateReportingConstraintsTask.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/ExecuteCypherTask.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/__init__.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/data_loading/CSVLoad2Neo4jTask.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/data_loading/ParallelCSVLoad2Neo4jTask.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/data_loading/ParallelSQLLoad2Neo4jTask.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/data_loading/SQLLoad2Neo4jTask.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/data_loading/__init__.py +0 -0
- {neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/test_utils/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: neo4j-etl-lib
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Building blocks for ETL pipelines.
|
|
5
5
|
Keywords: etl,graph,database
|
|
6
6
|
Author-email: Bert Radke <bert.radke@pm.me>
|
|
@@ -14,11 +14,11 @@ Classifier: Programming Language :: Python :: 3
|
|
|
14
14
|
Classifier: Topic :: Database
|
|
15
15
|
Classifier: Development Status :: 4 - Beta
|
|
16
16
|
License-File: LICENSE
|
|
17
|
-
Requires-Dist: pydantic>=2.10.5; python_version >= '3.
|
|
18
|
-
Requires-Dist: neo4j-rust-ext>=5.27.0; python_version >= '3.
|
|
19
|
-
Requires-Dist: python-dotenv>=1.0.1; python_version >= '3.
|
|
20
|
-
Requires-Dist: tabulate>=0.9.0; python_version >= '3.
|
|
21
|
-
Requires-Dist: click>=8.1.8; python_version >= '3.
|
|
17
|
+
Requires-Dist: pydantic>=2.10.5; python_version >= '3.10'
|
|
18
|
+
Requires-Dist: neo4j-rust-ext>=5.27.0,<6; python_version >= '3.10'
|
|
19
|
+
Requires-Dist: python-dotenv>=1.0.1; python_version >= '3.10'
|
|
20
|
+
Requires-Dist: tabulate>=0.9.0; python_version >= '3.10'
|
|
21
|
+
Requires-Dist: click>=8.1.8; python_version >= '3.10'
|
|
22
22
|
Requires-Dist: pydantic[email-validator]
|
|
23
23
|
Requires-Dist: pytest>=8.3.0 ; extra == "dev" and ( python_version >= '3.8')
|
|
24
24
|
Requires-Dist: testcontainers[neo4j]==4.9.0 ; extra == "dev" and ( python_version >= '3.9' and python_version < '4.0')
|
|
@@ -35,11 +35,13 @@ Requires-Dist: sphinx-autoapi ; extra == "dev"
|
|
|
35
35
|
Requires-Dist: sqlalchemy ; extra == "dev"
|
|
36
36
|
Requires-Dist: psycopg2-binary ; extra == "dev"
|
|
37
37
|
Requires-Dist: graphdatascience>=1.13 ; extra == "gds" and ( python_version >= '3.9')
|
|
38
|
+
Requires-Dist: nox>=2024.0.0 ; extra == "nox"
|
|
38
39
|
Requires-Dist: sqlalchemy ; extra == "sql"
|
|
39
40
|
Project-URL: Documentation, https://neo-technology-field.github.io/python-etl-lib/index.html
|
|
40
41
|
Project-URL: Home, https://github.com/neo-technology-field/python-etl-lib
|
|
41
42
|
Provides-Extra: dev
|
|
42
43
|
Provides-Extra: gds
|
|
44
|
+
Provides-Extra: nox
|
|
43
45
|
Provides-Extra: sql
|
|
44
46
|
|
|
45
47
|
# Neo4j ETL Toolbox
|
|
@@ -22,11 +22,11 @@ dynamic = ["version", "description"]
|
|
|
22
22
|
keywords = ["etl", "graph", "database"]
|
|
23
23
|
|
|
24
24
|
dependencies = [
|
|
25
|
-
"pydantic>=2.10.5; python_version >= '3.
|
|
26
|
-
"neo4j-rust-ext>=5.27.0; python_version >= '3.
|
|
27
|
-
"python-dotenv>=1.0.1; python_version >= '3.
|
|
28
|
-
"tabulate>=0.9.0; python_version >= '3.
|
|
29
|
-
"click>=8.1.8; python_version >= '3.
|
|
25
|
+
"pydantic>=2.10.5; python_version >= '3.10'",
|
|
26
|
+
"neo4j-rust-ext>=5.27.0,<6; python_version >= '3.10'",
|
|
27
|
+
"python-dotenv>=1.0.1; python_version >= '3.10'",
|
|
28
|
+
"tabulate>=0.9.0; python_version >= '3.10'",
|
|
29
|
+
"click>=8.1.8; python_version >= '3.10'",
|
|
30
30
|
"pydantic[email_validator]"
|
|
31
31
|
]
|
|
32
32
|
|
|
@@ -41,6 +41,11 @@ dev = [
|
|
|
41
41
|
gds = ["graphdatascience>=1.13; python_version >= '3.9'"]
|
|
42
42
|
sql = ["sqlalchemy"]
|
|
43
43
|
|
|
44
|
+
# Local-only multy-version testing, install via `pip install ".[dev,nox]"`
|
|
45
|
+
nox = [
|
|
46
|
+
"nox>=2024.0.0"
|
|
47
|
+
]
|
|
48
|
+
|
|
44
49
|
[project.urls]
|
|
45
50
|
Home = "https://github.com/neo-technology-field/python-etl-lib"
|
|
46
51
|
Documentation = "https://neo-technology-field.github.io/python-etl-lib/index.html"
|
|
@@ -45,7 +45,7 @@ class ProgressReporter:
|
|
|
45
45
|
The task that was provided.
|
|
46
46
|
"""
|
|
47
47
|
task.start_time = datetime.now()
|
|
48
|
-
self.logger.info(f"{'
|
|
48
|
+
self.logger.info(f"{' ' * (4 * task.depth)}starting {task.task_name()}")
|
|
49
49
|
return task
|
|
50
50
|
|
|
51
51
|
def finished_task(self, task: Task, result: TaskReturn) -> Task:
|
|
@@ -30,7 +30,7 @@ class CSVBatchSource(BatchProcessor):
|
|
|
30
30
|
self.csv_file = csv_file
|
|
31
31
|
self.kwargs = kwargs
|
|
32
32
|
|
|
33
|
-
def get_batch(self, max_batch__size: int) -> Generator[BatchResults]:
|
|
33
|
+
def get_batch(self, max_batch__size: int) -> Generator[BatchResults, None, None]:
|
|
34
34
|
for batch_size, chunks_ in self.__read_csv(self.csv_file, batch_size=max_batch__size, **self.kwargs):
|
|
35
35
|
yield BatchResults(chunk=chunks_, statistics={"csv_lines_read": batch_size}, batch_size=batch_size)
|
|
36
36
|
|
|
@@ -28,11 +28,14 @@ class GDSTask(Task):
|
|
|
28
28
|
Function that uses the gds client to perform tasks. See the following example:
|
|
29
29
|
|
|
30
30
|
def gds_fun(etl_context):
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
31
|
+
gds = etl_context.neo4j.gds
|
|
32
|
+
gds.graph.drop("neo4j-offices", failIfMissing=False)
|
|
33
|
+
g_office, project_result = gds.graph.project("neo4j-offices", "City", "FLY_TO")
|
|
34
|
+
mutate_result = gds.pageRank.write(g_office, tolerance=0.5, writeProperty="rank")
|
|
35
|
+
return TaskReturn(success=True, summery=transform_dict(mutate_result.to_dict()))
|
|
36
|
+
|
|
37
|
+
Notes: Do *NOT* use `etl_context.neo4j.gds` with a context manager. The GDS client closes the underlying
|
|
38
|
+
connection when exiting the context.
|
|
36
39
|
|
|
37
40
|
:param context: The ETLContext to use. Provides the gds client to the func via `etl_context.neo4j.gds()`
|
|
38
41
|
:param func: a function that expects a param `etl_context` and returns a `TaskReturn` object.
|
|
@@ -7,7 +7,7 @@ from _pytest.tmpdir import tmp_path
|
|
|
7
7
|
from neo4j import Driver
|
|
8
8
|
from neo4j.time import Date
|
|
9
9
|
|
|
10
|
-
from etl_lib.core.ETLContext import
|
|
10
|
+
from etl_lib.core.ETLContext import ETLContext, Neo4jContext, QueryResult, SQLContext, gds
|
|
11
11
|
from etl_lib.core.Task import Task
|
|
12
12
|
|
|
13
13
|
|
|
@@ -96,7 +96,7 @@ def get_database_name():
|
|
|
96
96
|
raise Exception("define NEO4J_TEST_DATABASE environment variable")
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
class
|
|
99
|
+
class MockNeo4jContext(Neo4jContext):
|
|
100
100
|
|
|
101
101
|
def __init__(self, driver: Driver):
|
|
102
102
|
self.logger = logging.getLogger(self.__class__.__name__)
|
|
@@ -105,29 +105,32 @@ class TestNeo4jContext(Neo4jContext):
|
|
|
105
105
|
self.gds = gds(self)
|
|
106
106
|
|
|
107
107
|
|
|
108
|
-
class
|
|
108
|
+
class MockETLContext(ETLContext):
|
|
109
109
|
|
|
110
110
|
def __init__(self, driver: Driver, tmp_path):
|
|
111
111
|
self.logger = logging.getLogger(self.__class__.__name__)
|
|
112
112
|
self.__env_vars = {"ETL_ERROR_PATH": tmp_path}
|
|
113
|
-
self.neo4j =
|
|
113
|
+
self.neo4j = MockNeo4jContext(driver)
|
|
114
114
|
self.reporter = DummyReporter()
|
|
115
115
|
|
|
116
116
|
def env(self, key: str) -> Any:
|
|
117
117
|
if key in self.__env_vars:
|
|
118
118
|
return self.__env_vars[key]
|
|
119
119
|
|
|
120
|
-
|
|
120
|
+
|
|
121
|
+
class MockSQLETLContext(ETLContext):
|
|
121
122
|
|
|
122
123
|
def __init__(self, sql_uri):
|
|
123
124
|
self.logger = logging.getLogger(self.__class__.__name__)
|
|
124
125
|
self.reporter = DummyReporter()
|
|
126
|
+
self.__env_vars = {}
|
|
125
127
|
self.sql = SQLContext(sql_uri)
|
|
126
128
|
|
|
127
129
|
def env(self, key: str) -> Any:
|
|
128
130
|
if key in self.__env_vars:
|
|
129
131
|
return self.__env_vars[key]
|
|
130
132
|
|
|
133
|
+
|
|
131
134
|
class DummyReporter:
|
|
132
135
|
|
|
133
136
|
def register_tasks(self, main: Task):
|
|
@@ -163,6 +166,7 @@ class DummyContext:
|
|
|
163
166
|
def env(self, key: str) -> Any:
|
|
164
167
|
pass
|
|
165
168
|
|
|
169
|
+
|
|
166
170
|
class DummyPredecessor:
|
|
167
171
|
def __init__(self, batches):
|
|
168
172
|
self.batches = batches
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/CreateReportingConstraintsTask.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/data_loading/CSVLoad2Neo4jTask.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{neo4j_etl_lib-0.3.0 → neo4j_etl_lib-0.3.1}/src/etl_lib/task/data_loading/SQLLoad2Neo4jTask.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|