PyPI - pathling - Versions diffs - 7.0.0.dev2__tar.gz → 7.0.1__tar.gz - Mend

pathling 7.0.0.dev2tar.gz → 7.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

{pathling-7.0.0.dev2/pathling.egg-info → pathling-7.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pathling
-Version: 7.0.0.dev2
+Version: 7.0.1
 Summary: Python API for Pathling
 Home-page: https://github.com/aehrc/pathling
 Author: Australian e-Health Research Centre, CSIRO
@@ -291,11 +291,11 @@ Maven package. Once the cluster is restarted, the libraries should be available
 for import and use within all notebooks.
 By default, Databricks uses Java 8 within its clusters, while Pathling requires
-Java 11. To enable Java 11 support within your cluster, navigate to __Advanced
+Java 17. To enable Java 17 support within your cluster, navigate to __Advanced
 Options > Spark > Environment Variables__ and add the following:
 ```bash
-JNAME=zulu11-ca-amd64
+JNAME=zulu17-ca-amd64
 ```
 See the Databricks documentation on

{pathling-7.0.0.dev2 → pathling-7.0.1}/README.md RENAMED Viewed

@@ -271,11 +271,11 @@ Maven package. Once the cluster is restarted, the libraries should be available
 for import and use within all notebooks.
 By default, Databricks uses Java 8 within its clusters, while Pathling requires
-Java 11. To enable Java 11 support within your cluster, navigate to __Advanced
+Java 17. To enable Java 17 support within your cluster, navigate to __Advanced
 Options > Spark > Environment Variables__ and add the following:
 ```bash
-JNAME=zulu11-ca-amd64
+JNAME=zulu17-ca-amd64
 ```
 See the Databricks documentation on

{pathling-7.0.0.dev2 → pathling-7.0.1}/examples/query.py RENAMED Viewed

@@ -14,9 +14,8 @@
 #  limitations under the License.
 import os
-from tempfile import mkdtemp
 from pyspark.sql import DataFrame, SparkSession
+from tempfile import mkdtemp
 from pathling import PathlingContext, DataSource, Expression as exp
 from pathling._version import __java_version__
@@ -36,7 +35,7 @@ NDJSON_DIR_2 = os.path.join(TEMP_DIR, "ndjson")
 spark = (
     SparkSession.builder.config(
         "spark.jars.packages",
-        f"au.csiro.pathling:library-runtime:{__java_version__},io.delta:delta-spark_2.12:3.1.0",
+        f"au.csiro.pathling:library-runtime:{__java_version__},io.delta:delta-spark_2.12:3.2.0",
     )
     .config(
         "spark.sql.extensions",

{pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/_version.py RENAMED Viewed

@@ -2,8 +2,8 @@
 # Auto generated from POM project version.
 # Please do not modify.
 #
-__version__="7.0.0.dev2"
-__java_version__="7.0.0-SNAPSHOT"
+__version__="7.0.1"
+__java_version__="7.0.1"
 __scala_version__="2.12"
-__delta_version__="3.1.0"
+__delta_version__="3.2.0"
 __hadoop_version__="3.3.4"

{pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/datasink.py RENAMED Viewed

@@ -28,6 +28,22 @@ class ImportMode:
     MERGE: str = "merge"
+class SaveMode:
+    """
+    Constants that represent the different save modes.
+    OVERWRITE: Overwrite any existing data.
+    APPEND: Append the new data to the existing data.
+    IGNORE: Only save the data if the file does not already exist.
+    ERROR: Raise an error if the file already exists.
+    """
+    OVERWRITE: str = "overwrite"
+    APPEND: str = "append"
+    IGNORE: str = "ignore"
+    ERROR: str = "error"
 class DataSinks(SparkConversionsMixin):
     """
     A class for writing FHIR data to a variety of different targets.
@@ -41,12 +57,22 @@ class DataSinks(SparkConversionsMixin):
             )
         )
-    def ndjson(self, path: str, file_name_mapper: Callable[[str], str] = None) -> None:
+    def ndjson(
+        self,
+        path: str,
+        save_mode: Optional[str] = SaveMode.ERROR,
+        file_name_mapper: Callable[[str], str] = None,
+    ) -> None:
         """
         Writes the data to a directory of NDJSON files. The files will be named using the resource
         type and the ".ndjson" extension.
         :param path: The URI of the directory to write the files to.
+        :param save_mode: The save mode to use when writing the data:
+            - "overwrite" will overwrite any existing data.
+            - "append" will append the new data to the existing data.
+            - "ignore" will only save the data if the file does not already exist.
+            - "error" will raise an error if the file already exists.
         :param file_name_mapper: An optional function that can be used to customise the mapping of
         the resource type to the file name.
         """
@@ -54,17 +80,22 @@ class DataSinks(SparkConversionsMixin):
             wrapped_mapper = StringMapper(
                 self.spark._jvm._gateway_client, file_name_mapper
             )
-            self._datasinks.ndjson(path, wrapped_mapper)
+            self._datasinks.ndjson(path, save_mode, wrapped_mapper)
         else:
-            self._datasinks.ndjson(path)
+            self._datasinks.ndjson(path, save_mode)
-    def parquet(self, path: str) -> None:
+    def parquet(self, path: str, save_mode: Optional[str] = SaveMode.ERROR) -> None:
         """
         Writes the data to a directory of Parquet files.
         :param path: The URI of the directory to write the files to.
+        :param save_mode: The save mode to use when writing the data:
+            - "overwrite" will overwrite any existing data.
+            - "append" will append the new data to the existing data.
+            - "ignore" will only save the data if the file does not already exist.
+            - "error" will raise an error if the file already exists.
         """
-        self._datasinks.parquet(path)
+        self._datasinks.parquet(path, save_mode)
     def delta(
         self, path: str, import_mode: Optional[str] = ImportMode.OVERWRITE

{pathling-7.0.0.dev2 → pathling-7.0.1/pathling.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pathling
-Version: 7.0.0.dev2
+Version: 7.0.1
 Summary: Python API for Pathling
 Home-page: https://github.com/aehrc/pathling
 Author: Australian e-Health Research Centre, CSIRO
@@ -291,11 +291,11 @@ Maven package. Once the cluster is restarted, the libraries should be available
 for import and use within all notebooks.
 By default, Databricks uses Java 8 within its clusters, while Pathling requires
-Java 11. To enable Java 11 support within your cluster, navigate to __Advanced
+Java 17. To enable Java 17 support within your cluster, navigate to __Advanced
 Options > Spark > Environment Variables__ and add the following:
 ```bash
-JNAME=zulu11-ca-amd64
+JNAME=zulu17-ca-amd64
 ```
 See the Databricks documentation on