PyPI - neo4j-etl-lib - Versions diffs - 0.0.2__tar.gz → 0.1.0__tar.gz - Mend

neo4j-etl-lib 0.0.2tar.gz → 0.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

neo4j_etl_lib-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,54 @@
+Metadata-Version: 2.4
+Name: neo4j-etl-lib
+Version: 0.1.0
+Summary: Building blocks for ETL pipelines.
+Keywords: etl,graph,database
+Author-email: Bert Radke <bert.radke@pm.me>
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Database
+Classifier: Development Status :: 4 - Beta
+License-File: LICENSE
+Requires-Dist: pydantic>=2.10.5; python_version >= '3.8'
+Requires-Dist: neo4j>=5.27.0; python_version >= '3.7'
+Requires-Dist: python-dotenv>=1.0.1; python_version >= '3.8'
+Requires-Dist: tabulate>=0.9.0; python_version >= '3.7'
+Requires-Dist: click>=8.1.8; python_version >= '3.7'
+Requires-Dist: pytest>=8.3.0 ; extra == "dev" and ( python_version >= '3.8')
+Requires-Dist: testcontainers[neo4j]==4.9.0 ; extra == "dev" and ( python_version >= '3.9' and python_version < '4.0')
+Requires-Dist: pytest-cov ; extra == "dev"
+Requires-Dist: bumpver ; extra == "dev"
+Requires-Dist: isort ; extra == "dev"
+Requires-Dist: pip-tools ; extra == "dev"
+Requires-Dist: sphinx ; extra == "dev"
+Requires-Dist: sphinx-rtd-theme ; extra == "dev"
+Requires-Dist: pydata-sphinx-theme ; extra == "dev"
+Requires-Dist: sphinx-autodoc-typehints ; extra == "dev"
+Requires-Dist: sphinxcontrib-napoleon ; extra == "dev"
+Requires-Dist: sphinx-autoapi ; extra == "dev"
+Requires-Dist: graphdatascience>=1.13 ; extra == "gds" and ( python_version >= '3.9')
+Project-URL: Documentation, https://neo-technology-field.github.io/python-etl-lib/index.html
+Project-URL: Home, https://github.com/neo-technology-field/python-etl-lib
+Provides-Extra: dev
+Provides-Extra: gds
+# Neo4j ETL Toolbox
+A Python library of building blocks to assemble etl pipelines.
+Complete documentation can be found on https://neo-technology-field.github.io/python-etl-lib/index.html
+See https://github.com/neo-technology-field/python-etl-lib/tree/main/examples/gtfs for an example project.
+The library can be installed via
+```bash
+pip install neo4j-etl-lib
+```

neo4j_etl_lib-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,14 @@
+# Neo4j ETL Toolbox
+A Python library of building blocks to assemble etl pipelines.
+Complete documentation can be found on https://neo-technology-field.github.io/python-etl-lib/index.html
+See https://github.com/neo-technology-field/python-etl-lib/tree/main/examples/gtfs for an example project.
+The library can be installed via
+```bash
+pip install neo4j-etl-lib
+```

{neo4j_etl_lib-0.0.2 → neo4j_etl_lib-0.1.0}/pyproject.toml RENAMED Viewed

@@ -34,7 +34,8 @@ dev = [
     "pytest>=8.3.0; python_version >= '3.8'",
     "testcontainers[neo4j]==4.9.0; python_version >= '3.9' and python_version < '4.0'",
     "pytest-cov", "bumpver", "isort", "pip-tools",
-    "sphinx", "sphinx-rtd-theme", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-napoleon"
+    "sphinx", "sphinx-rtd-theme", "pydata-sphinx-theme", "sphinx-autodoc-typehints",
+    "sphinxcontrib-napoleon", "sphinx-autoapi"
 ]
 gds = ["graphdatascience>=1.13; python_version >= '3.9'"]

{neo4j_etl_lib-0.0.2 → neo4j_etl_lib-0.1.0}/src/etl_lib/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """
 Building blocks for ETL pipelines.
 """
-__version__ = "0.0.2"
+__version__ = "0.1.0"

{neo4j_etl_lib-0.0.2 → neo4j_etl_lib-0.1.0}/src/etl_lib/cli/run_tools.py RENAMED Viewed

@@ -98,7 +98,7 @@ def cli(ctx, neo4j_uri, neo4j_user, neo4j_password, log_file, database_name):
 @click.pass_context
 def query(ctx, number_runs):
     """
-    Retrieve the list of the last x etl runs from the database and display them.
+    Retrieve the list of the last x ETL runs from the database and display them.
     """
     print(f"Listing runs in database '{ctx.obj['database_name']}'")
     with __driver(ctx) as driver:
@@ -155,7 +155,7 @@ def detail(ctx, run_id, details):
                 "status": record["status"],
                 "batches": record["batches"],
                 "duration": __duration_from_start_end(record["startTime"], record["endTime"]),
-                "changes": sum(record.get("stats", {}).values())
+                "changes": record.get("changes", 0)
             }
             for record in records
         ]

{neo4j_etl_lib-0.0.2 → neo4j_etl_lib-0.1.0}/src/etl_lib/core/BatchProcessor.py RENAMED Viewed

@@ -4,7 +4,6 @@ import sys
 from dataclasses import dataclass, field
 from typing import Generator
-from etl_lib.core.ETLContext import ETLContext
 from etl_lib.core.Task import Task
 from etl_lib.core.utils import merge_summery
@@ -53,7 +52,7 @@ class BatchProcessor:
     and returned in batches to the caller. Usage of `Generators` ensure that not all data must be loaded at once.
     """
-    def __init__(self, context: ETLContext, task: Task, predecessor=None):
+    def __init__(self, context, task: Task = None, predecessor=None):
         """
         Constructs a new :py:class:`etl_lib.core.BatchProcessor` instance.

{neo4j_etl_lib-0.0.2 → neo4j_etl_lib-0.1.0}/src/etl_lib/core/ETLContext.py RENAMED Viewed

@@ -2,7 +2,7 @@ import logging
 from typing import NamedTuple, Any
 from graphdatascience import GraphDataScience
-from neo4j import Driver, GraphDatabase, WRITE_ACCESS, SummaryCounters
+from neo4j import GraphDatabase, WRITE_ACCESS, SummaryCounters
 from etl_lib.core.ProgressReporter import get_reporter
@@ -20,18 +20,19 @@ def append_results(r1: QueryResult, r2: QueryResult) -> QueryResult:
 class Neo4jContext:
-    uri: str
-    auth: (str, str)
-    driver: Driver
-    database: str
+    """
+    Holds the connection to the neo4j database and provides facilities to execute queries.
+    """
     def __init__(self, env_vars: dict):
         """
         Create a new Neo4j context.
         Reads the following env_vars keys:
         - `NEO4J_URI`,
         - `NEO4J_USERNAME`,
         - `NEO4J_PASSWORD`.
+        - `NEO4J_DATABASE`,
         """
         self.logger = logging.getLogger(self.__class__.__name__)
         self.uri = env_vars["NEO4J_URI"]
@@ -43,6 +44,10 @@ class Neo4jContext:
     def query_database(self, session, query, **kwargs) -> QueryResult:
         """
         Executes a Cypher query on the Neo4j database.
+        Args:
+            session: Neo4j database session.
+            query: Cypher query either as a single query or as a list.
         """
         if isinstance(query, list):
             results = []
@@ -78,12 +83,33 @@ class Neo4jContext:
         }
     def session(self, database=None):
+        """
+        Create a new Neo4j session in write mode, caller is responsible to close the session.
+        Args:
+            database: name of the database to use for this session. If not provided, the database name provided during
+                construction will be used.
+        Returns:
+            newly created Neo4j session.
+        """
         if database is None:
             return self.driver.session(database=self.database, default_access_mode=WRITE_ACCESS)
         else:
             return self.driver.session(database=database, default_access_mode=WRITE_ACCESS)
     def gds(self, database=None) -> GraphDataScience:
+        """
+        Creates a new GraphDataScience client.
+        Args:
+            database: Name of the database to use for this dgs client.
+                If not provided, the database name provided during construction will be used.
+        Returns:
+            gds client.
+        """
         if database is None:
             return GraphDataScience.from_neo4j_driver(driver=self.driver, database=self.database)
         else:
@@ -104,8 +130,6 @@ class ETLContext:
     Will be passed to all :py:class:`etl_lib.core.Task` to provide access to environment variables and functionally
     deemed general enough that all parts of the ETL pipeline would need it.
     """
-    neo4j: Neo4jContext
-    __env_vars: dict
     def __init__(self, env_vars: dict):
         """

{neo4j_etl_lib-0.0.2 → neo4j_etl_lib-0.1.0}/src/etl_lib/core/ProgressReporter.py RENAMED Viewed

@@ -66,7 +66,7 @@ class ProgressReporter:
         task.success = success
         task.summery = summery
-        report = f"{'\t' * task.depth}finished {task.task_name()} with success: {success}"
+        report = f"{'\t' * task.depth} finished {task.task_name()} in {task.end_time - task.start_time} with success: {success}"
         if error is not None:
             report += f", error: \n{error}"
         else:
@@ -197,10 +197,10 @@ def get_reporter(context) -> ProgressReporter:
     """
     Returns a ProgressReporter instance.
-    If the :py:class:`ETLContext <etl_lib.core.ETLContext>` env holds the key `REPORTER_DATABASE` then
-    a :py:class:`Neo4jProgressReporter` instance is created with the given database name.
+    If the :class:`ETLContext <etl_lib.core.ETLContext>` env holds the key `REPORTER_DATABASE` then
+    a :class:`Neo4jProgressReporter` instance is created with the given database name.
-    Otherwise, a  :py:class:`ProgressReporter` (no logging to database) instance will be created.
+    Otherwise, a  :class:`ProgressReporter` (no logging to database) instance will be created.
     """
     db = context.env("REPORTER_DATABASE")

{neo4j_etl_lib-0.0.2 → neo4j_etl_lib-0.1.0}/src/etl_lib/core/Task.py RENAMED Viewed

@@ -78,9 +78,6 @@ class Task:
         """Time when the :py:func:`~execute` has finished., `None` before."""
         self.success: bool
         """True if the task has finished successful. False otherwise, `None` before the task has finished."""
-        self.summery: dict  # TODO: still in use?
-        """Summery statistics about the task performed, such as rows inserted, updated."""
-        self.error: str  # TODO: still in use?
         self.depth: int = 0
         """Level or depth of the task in the hierarchy. The root task is depth 0. Updated by the Reporter"""

{neo4j_etl_lib-0.0.2 → neo4j_etl_lib-0.1.0}/src/etl_lib/core/ValidationBatchProcessor.py RENAMED Viewed

@@ -47,7 +47,7 @@ class ValidationBatchProcessor(BatchProcessor):
             for row in batch.chunk:
                 try:
                     # Validate and transform the row
-                    validated_row = self.model(**row).model_dump()
+                    validated_row = json.loads(self.model(**row).model_dump_json())
                     valid_rows.append(validated_row)
                 except ValidationError as e:
                     # Collect invalid rows with errors

neo4j_etl_lib-0.1.0/src/etl_lib/core/utils.py ADDED Viewed

@@ -0,0 +1,28 @@
+import logging
+def merge_summery(summery_1: dict, summery_2: dict) -> dict:
+    """
+    Helper function to merge dicts. Assuming that values are numbers.
+    If a key exists in both dicts, then the result will contain a key with the added values.
+    """
+    return {i: summery_1.get(i, 0) + summery_2.get(i, 0)
+            for i in set(summery_1).union(summery_2)}
+def setup_logging(log_file=None):
+    """
+    Set up logging to console and optionally to a log file.
+    :param log_file: Path to the log file
+    :type log_file: str, optional
+    """
+    handlers = [logging.StreamHandler()]
+    if log_file:
+        handlers.append(logging.FileHandler(log_file))
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        handlers=handlers
+    )

neo4j_etl_lib-0.1.0/src/etl_lib/data_sink/CSVBatchSink.py ADDED Viewed

@@ -0,0 +1,57 @@
+import csv
+from pathlib import Path
+from typing import Generator
+from etl_lib.core.ETLContext import ETLContext
+from etl_lib.core.BatchProcessor import BatchProcessor, BatchResults, append_result
+from etl_lib.core.Task import Task
+class CSVBatchSink(BatchProcessor):
+    """
+    BatchProcessor to write batches of data to a CSV file.
+    """
+    def __init__(self, context: ETLContext, task: Task, predecessor: BatchProcessor, file_path: Path, **kwargs):
+        """
+        Constructs a new CSVBatchSink.
+        Args:
+            context: :class:`etl_lib.core.ETLContext.ETLContext` instance.
+            task: :class:`etl_lib.core.Task.Task` instance owning this batchProcessor.
+            predecessor: BatchProcessor which :func:`~get_batch` function will be called to receive batches to process.
+            file_path: Path to the CSV file where data will be written. If the file exists, data will be appended.
+            **kwargs: Additional arguments passed to `csv.DictWriter` to allow tuning the csv creation.
+        """
+        super().__init__(context, task, predecessor)
+        self.file_path = file_path
+        self.file_initialized = False
+        self.csv_kwargs = kwargs
+    def get_batch(self, batch_size: int) -> Generator[BatchResults, None, None]:
+        assert self.predecessor is not None
+        for batch_result in self.predecessor.get_batch(batch_size):
+            self._write_to_csv(batch_result.chunk)
+            yield append_result(batch_result, {"rows_written": len(batch_result.chunk)})
+    def _write_to_csv(self, data: list[dict]):
+        """
+        Writes a batch of data to the CSV file.
+        Args:
+            data: A list of dictionaries representing rows of data.
+        """
+        if not data:
+            return
+        fieldnames = data[0].keys()
+        write_header = not self.file_initialized or not self.file_path.exists()
+        with self.file_path.open(mode="a", newline="", encoding="utf-8") as csvfile:
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames, **self.csv_kwargs)
+            if write_header:
+                writer.writeheader()
+            writer.writerows(data)
+        self.file_initialized = True

neo4j_etl_lib-0.0.2/src/etl_lib/data_sink/CypherBatchProcessor.py → neo4j_etl_lib-0.1.0/src/etl_lib/data_sink/CypherBatchSink.py RENAMED Viewed

@@ -5,19 +5,19 @@ from etl_lib.core.BatchProcessor import BatchProcessor, BatchResults, append_res
 from etl_lib.core.Task import Task
-class CypherBatchProcessor(BatchProcessor):
+class CypherBatchSink(BatchProcessor):
     """
     BatchProcessor to write batches of data to a Neo4j database.
     """
     def __init__(self, context: ETLContext, task: Task, predecessor: BatchProcessor, query: str):
         """
-        Constructs a new CypherBatchProcessor.
+        Constructs a new CypherBatchSink.
         Args:
-            context: :py:class:`etl_lib.core.ETLContext.ETLContext` instance.
-            task: :py:class:`etl_lib.core.Task.Task` instance owning this batchProcessor.
-            predecessor: BatchProcessor which :py:func:`~get_batch` function will be called to receive batches to process.
+            context: :class:`etl_lib.core.ETLContext.ETLContext` instance.
+            task: :class:`etl_lib.core.Task.Task` instance owning this batchProcessor.
+            predecessor: BatchProcessor which :func:`~get_batch` function will be called to receive batches to process.
             query: Cypher to write the query to Neo4j.
                 Data will be passed as `batch` parameter.
                 Therefor, the query should start with a `UNWIND $batch AS row`.

neo4j_etl_lib-0.0.2/src/etl_lib/data_source/CSVBatchProcessor.py → neo4j_etl_lib-0.1.0/src/etl_lib/data_source/CSVBatchSource.py RENAMED Viewed

@@ -4,11 +4,10 @@ from pathlib import Path
 from typing import Generator
 from etl_lib.core.BatchProcessor import BatchProcessor, BatchResults
-from etl_lib.core.ETLContext import ETLContext
 from etl_lib.core.Task import Task
-class CSVBatchProcessor(BatchProcessor):
+class CSVBatchSource(BatchProcessor):
     """
     BatchProcessor that reads a CSV file using the `csv` package.
@@ -17,13 +16,13 @@ class CSVBatchProcessor(BatchProcessor):
     starting with 0.
     """
-    def __init__(self, csv_file: Path, context: ETLContext, task: Task, **kwargs):
+    def __init__(self, csv_file: Path, context, task: Task = None, **kwargs):
         """
-        Constructs a new CSVBatchProcessor.
+        Constructs a new CSVBatchSource.
         Args:
             csv_file: Path to the CSV file.
-            context: :py:class:`etl_lib.core.ETLContext.ETLContext` instance.
+            context: :class:`etl_lib.core.ETLContext.ETLContext` instance.
             kwargs: Will be passed on to the `csv.DictReader` providing a way to customise the reading to different
                 csv formats.
         """
@@ -32,10 +31,10 @@ class CSVBatchProcessor(BatchProcessor):
         self.kwargs = kwargs
     def get_batch(self, max_batch__size: int) -> Generator[BatchResults]:
-        for batch_size, chunks_ in self.read_csv(self.csv_file, batch_size=max_batch__size, **self.kwargs):
+        for batch_size, chunks_ in self.__read_csv(self.csv_file, batch_size=max_batch__size, **self.kwargs):
             yield BatchResults(chunk=chunks_, statistics={"csv_lines_read": batch_size}, batch_size=batch_size)
-    def read_csv(self, file: Path, batch_size: int, **kwargs):
+    def __read_csv(self, file: Path, batch_size: int, **kwargs):
         if file.suffix == ".gz":
             with gzip.open(file, "rt", encoding='utf-8-sig') as f:
                 yield from self.__parse_csv(batch_size, file=f, **kwargs)
@@ -44,30 +43,23 @@ class CSVBatchProcessor(BatchProcessor):
                 yield from self.__parse_csv(batch_size, file=f, **kwargs)
     def __parse_csv(self, batch_size, file, **kwargs):
-        csv_file = csv.DictReader(file, **kwargs)
-        yield from self.__split_to_batches(csv_file, batch_size)
+        """Read CSV in batches without loading the entire file at once."""
+        csv_reader = csv.DictReader(file, **kwargs)
-    def __split_to_batches(self, source: [dict], batch_size):
-        """
-        Splits the provided source into batches.
-        Args:
-            source: Anything that can be loop over, ideally, this should also be a generator
-            batch_size: desired batch size
-        Returns:
-            generator object to loop over the batches. Each batch is an Array.
-        """
         cnt = 0
         batch_ = []
-        for i in source:
-            i["_row"] = cnt
+        for row in csv_reader:
+            row["_row"] = cnt
             cnt += 1
-            batch_.append(self.__clean_dict(i))
+            batch_.append(self.__clean_dict(row))
             if len(batch_) == batch_size:
                 yield len(batch_), batch_
                 batch_ = []
-        if len(batch_) > 0:
+        # Yield any remaining data
+        if batch_:
             yield len(batch_), batch_
     def __clean_dict(self, input_dict):

neo4j_etl_lib-0.1.0/src/etl_lib/data_source/CypherBatchSource.py ADDED Viewed

@@ -0,0 +1,47 @@
+from typing import Generator
+from etl_lib.core.BatchProcessor import BatchResults, BatchProcessor
+from etl_lib.core.ETLContext import ETLContext
+from etl_lib.core.Task import Task
+class CypherBatchSource(BatchProcessor):
+    def __init__(self, context: ETLContext, task: Task, query: str, **kwargs):
+        """
+       Constructs a new CypherBatchSource.
+       Args:
+           context: :class:`etl_lib.core.ETLContext.ETLContext` instance.
+           task: :class:`etl_lib.core.Task.Task` instance owning this batchProcessor.
+           query: Cypher query to execute.
+           kwargs: Arguments passed as parameters with the query.
+       """
+        super().__init__(context, task)
+        self.query = query
+        self.kwargs = kwargs
+    def __read_records(self, tx, batch_size):
+        batch_ = []
+        result = tx.run(self.query, **self.kwargs)
+        for record in result:
+            batch_.append(record.data())
+            if len(batch_) == batch_size:
+                yield batch_
+                batch_ = []
+        if batch_:
+            yield batch_
+    def get_batch(self, max_batch_size: int) -> Generator[BatchResults, None, None]:
+        # not using managed tx on purpose. First of, we want to keep the tx open while delivering batches
+        # automatic retry logic would help, as we do not want to start the query again
+        with self.context.neo4j.session() as session:
+            with session.begin_transaction() as tx:
+                for chunk in self.__read_records(tx, max_batch_size):
+                    yield BatchResults(
+                        chunk=chunk,
+                        statistics={"cypher_rows_read": len(chunk)},
+                        batch_size=len(chunk)
+                    )

neo4j_etl_lib-0.1.0/src/etl_lib/task/CreateReportingConstraintsTask.py ADDED Viewed

@@ -0,0 +1,17 @@
+from etl_lib.core.Task import Task, TaskReturn
+class CreateReportingConstraintsTask(Task):
+    """Creates the constraint in the REPORTER_DATABASE database."""
+    def __init__(self, config):
+        super().__init__(config)
+    def run_internal(self, **kwargs) -> TaskReturn:
+        database = self.context.env("REPORTER_DATABASE")
+        assert database is not None, "REPORTER_DATABASE needs to be set in order to run this task"
+        with self.context.neo4j.session(database) as session:
+            result = self.context.neo4j.query_database(session=session,
+                                                       query="CREATE CONSTRAINT IF NOT EXISTS FOR (n:ETLTask) REQUIRE n.uuid IS UNIQUE")
+            return TaskReturn(True, result.summery)

{neo4j_etl_lib-0.0.2 → neo4j_etl_lib-0.1.0}/src/etl_lib/task/ExecuteCypherTask.py RENAMED Viewed

@@ -6,7 +6,12 @@ from etl_lib.core.utils import merge_summery
 class ExecuteCypherTask(Task):
+    """
+    Execute cypher (write) as a Task.
+    This task is for data refinement jobs, as it does not return cypher results.
+    Parameters can be passed as keyword arguments to the constructor and will be available as parameters inside cypher.
+    """
     def __init__(self, context: ETLContext):
         super().__init__(context)
         self.context = context

neo4j_etl_lib-0.1.0/src/etl_lib/task/data_loading/CSVLoad2Neo4jTask.py ADDED Viewed

@@ -0,0 +1,57 @@
+import abc
+import logging
+from pathlib import Path
+from typing import Type
+from pydantic import BaseModel
+from etl_lib.core.ETLContext import ETLContext
+from etl_lib.core.ClosedLoopBatchProcessor import ClosedLoopBatchProcessor
+from etl_lib.core.Task import Task, TaskReturn
+from etl_lib.core.ValidationBatchProcessor import ValidationBatchProcessor
+from etl_lib.data_sink.CypherBatchSink import CypherBatchSink
+from etl_lib.data_source.CSVBatchSource import CSVBatchSource
+class CSVLoad2Neo4jTask(Task):
+    """
+    Loads the specified CSV file to Neo4j.
+    Uses BatchProcessors to read, validate and write to Neo4j.
+    The validation step is using pydantic, hence a Pydantic model needs to be provided.
+    Rows that fail the validation, will be written to en error file. The location of the error file is determined as
+    follows:
+    If the context env vars hold an entry `ETL_ERROR_PATH` the file will be place there, with the name set to name
+    of the provided filename appended with `.error.json`
+    If  `ETL_ERROR_PATH` is not set, the file will be placed in the same directory as the CSV file.
+    """
+    def __init__(self, context: ETLContext, model: Type[BaseModel], file: Path, batch_size: int = 5000):
+        super().__init__(context)
+        self.batch_size = batch_size
+        self.model = model
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.file = file
+    def run_internal(self, **kwargs) -> TaskReturn:
+        error_path = self.context.env("ETL_ERROR_PATH")
+        if error_path is None:
+            error_file = self.file.with_suffix(".error.json")
+        else:
+            error_file = error_path / self.file.with_name(self.file.stem + ".error.json").name
+        csv = CSVBatchSource(self.file, self.context, self)
+        validator = ValidationBatchProcessor(self.context, self, csv, self.model, error_file)
+        cypher = CypherBatchSink(self.context, self, validator, self._query())
+        end = ClosedLoopBatchProcessor(self.context, self, cypher)
+        result = next(end.get_batch(self.batch_size))
+        return TaskReturn(True, result.statistics)
+    def __repr__(self):
+        return f"{self.__class__.__name__}({self.file})"
+    @abc.abstractmethod
+    def _query(self):
+        pass

neo4j_etl_lib-0.1.0/src/etl_lib/test_utils/__init__.py ADDED Viewed

File without changes

neo4j-etl-lib 0.0.2__tar.gz → 0.1.0__tar.gz

neo4j-etl-lib 0.0.2tar.gz → 0.1.0tar.gz