pathling 7.0.0.dev2__tar.gz → 7.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {pathling-7.0.0.dev2/pathling.egg-info → pathling-7.0.1}/PKG-INFO +3 -3
  2. {pathling-7.0.0.dev2 → pathling-7.0.1}/README.md +2 -2
  3. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/query.py +2 -3
  4. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/_version.py +3 -3
  5. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/datasink.py +36 -5
  6. {pathling-7.0.0.dev2 → pathling-7.0.1/pathling.egg-info}/PKG-INFO +3 -3
  7. {pathling-7.0.0.dev2 → pathling-7.0.1}/LICENSE +0 -0
  8. {pathling-7.0.0.dev2 → pathling-7.0.1}/MANIFEST.in +0 -0
  9. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/data/bundles/Bennett146_Swaniawski813_704c9750-f6e6-473b-ee83-fbd48e07fe3f.json +0 -0
  10. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/data/bundles/Dino214_Parisian75_40d82b80-b682-cd8b-da6d-396809878641.json +0 -0
  11. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/data/resources/Condition.ndjson +0 -0
  12. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/data/resources/Patient.ndjson +0 -0
  13. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/designation.py +0 -0
  14. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/display.py +0 -0
  15. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/encode_bundles.py +0 -0
  16. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/encode_resources.py +0 -0
  17. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/member_of.py +0 -0
  18. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/member_of_old.py +0 -0
  19. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/property_of.py +0 -0
  20. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/subsumes.py +0 -0
  21. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/subsumes_old.py +0 -0
  22. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/translate.py +0 -0
  23. {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/translate_old.py +0 -0
  24. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/__init__.py +0 -0
  25. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/coding.py +0 -0
  26. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/context.py +0 -0
  27. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/core.py +0 -0
  28. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/datasource.py +0 -0
  29. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/fhir.py +0 -0
  30. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/functions.py +0 -0
  31. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/query.py +0 -0
  32. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/udfs.py +0 -0
  33. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling.egg-info/SOURCES.txt +0 -0
  34. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling.egg-info/dependency_links.txt +0 -0
  35. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling.egg-info/requires.txt +0 -0
  36. {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling.egg-info/top_level.txt +0 -0
  37. {pathling-7.0.0.dev2 → pathling-7.0.1}/setup.cfg +0 -0
  38. {pathling-7.0.0.dev2 → pathling-7.0.1}/setup.py +0 -0
  39. {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_datasource.py +0 -0
  40. {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_encoders.py +0 -0
  41. {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_functions.py +0 -0
  42. {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_query.py +0 -0
  43. {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_udfs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pathling
3
- Version: 7.0.0.dev2
3
+ Version: 7.0.1
4
4
  Summary: Python API for Pathling
5
5
  Home-page: https://github.com/aehrc/pathling
6
6
  Author: Australian e-Health Research Centre, CSIRO
@@ -291,11 +291,11 @@ Maven package. Once the cluster is restarted, the libraries should be available
291
291
  for import and use within all notebooks.
292
292
 
293
293
  By default, Databricks uses Java 8 within its clusters, while Pathling requires
294
- Java 11. To enable Java 11 support within your cluster, navigate to __Advanced
294
+ Java 17. To enable Java 17 support within your cluster, navigate to __Advanced
295
295
  Options > Spark > Environment Variables__ and add the following:
296
296
 
297
297
  ```bash
298
- JNAME=zulu11-ca-amd64
298
+ JNAME=zulu17-ca-amd64
299
299
  ```
300
300
 
301
301
  See the Databricks documentation on
@@ -271,11 +271,11 @@ Maven package. Once the cluster is restarted, the libraries should be available
271
271
  for import and use within all notebooks.
272
272
 
273
273
  By default, Databricks uses Java 8 within its clusters, while Pathling requires
274
- Java 11. To enable Java 11 support within your cluster, navigate to __Advanced
274
+ Java 17. To enable Java 17 support within your cluster, navigate to __Advanced
275
275
  Options > Spark > Environment Variables__ and add the following:
276
276
 
277
277
  ```bash
278
- JNAME=zulu11-ca-amd64
278
+ JNAME=zulu17-ca-amd64
279
279
  ```
280
280
 
281
281
  See the Databricks documentation on
@@ -14,9 +14,8 @@
14
14
  # limitations under the License.
15
15
 
16
16
  import os
17
- from tempfile import mkdtemp
18
-
19
17
  from pyspark.sql import DataFrame, SparkSession
18
+ from tempfile import mkdtemp
20
19
 
21
20
  from pathling import PathlingContext, DataSource, Expression as exp
22
21
  from pathling._version import __java_version__
@@ -36,7 +35,7 @@ NDJSON_DIR_2 = os.path.join(TEMP_DIR, "ndjson")
36
35
  spark = (
37
36
  SparkSession.builder.config(
38
37
  "spark.jars.packages",
39
- f"au.csiro.pathling:library-runtime:{__java_version__},io.delta:delta-spark_2.12:3.1.0",
38
+ f"au.csiro.pathling:library-runtime:{__java_version__},io.delta:delta-spark_2.12:3.2.0",
40
39
  )
41
40
  .config(
42
41
  "spark.sql.extensions",
@@ -2,8 +2,8 @@
2
2
  # Auto generated from POM project version.
3
3
  # Please do not modify.
4
4
  #
5
- __version__="7.0.0.dev2"
6
- __java_version__="7.0.0-SNAPSHOT"
5
+ __version__="7.0.1"
6
+ __java_version__="7.0.1"
7
7
  __scala_version__="2.12"
8
- __delta_version__="3.1.0"
8
+ __delta_version__="3.2.0"
9
9
  __hadoop_version__="3.3.4"
@@ -28,6 +28,22 @@ class ImportMode:
28
28
  MERGE: str = "merge"
29
29
 
30
30
 
31
+ class SaveMode:
32
+ """
33
+ Constants that represent the different save modes.
34
+
35
+ OVERWRITE: Overwrite any existing data.
36
+ APPEND: Append the new data to the existing data.
37
+ IGNORE: Only save the data if the file does not already exist.
38
+ ERROR: Raise an error if the file already exists.
39
+ """
40
+
41
+ OVERWRITE: str = "overwrite"
42
+ APPEND: str = "append"
43
+ IGNORE: str = "ignore"
44
+ ERROR: str = "error"
45
+
46
+
31
47
  class DataSinks(SparkConversionsMixin):
32
48
  """
33
49
  A class for writing FHIR data to a variety of different targets.
@@ -41,12 +57,22 @@ class DataSinks(SparkConversionsMixin):
41
57
  )
42
58
  )
43
59
 
44
- def ndjson(self, path: str, file_name_mapper: Callable[[str], str] = None) -> None:
60
+ def ndjson(
61
+ self,
62
+ path: str,
63
+ save_mode: Optional[str] = SaveMode.ERROR,
64
+ file_name_mapper: Callable[[str], str] = None,
65
+ ) -> None:
45
66
  """
46
67
  Writes the data to a directory of NDJSON files. The files will be named using the resource
47
68
  type and the ".ndjson" extension.
48
69
 
49
70
  :param path: The URI of the directory to write the files to.
71
+ :param save_mode: The save mode to use when writing the data:
72
+ - "overwrite" will overwrite any existing data.
73
+ - "append" will append the new data to the existing data.
74
+ - "ignore" will only save the data if the file does not already exist.
75
+ - "error" will raise an error if the file already exists.
50
76
  :param file_name_mapper: An optional function that can be used to customise the mapping of
51
77
  the resource type to the file name.
52
78
  """
@@ -54,17 +80,22 @@ class DataSinks(SparkConversionsMixin):
54
80
  wrapped_mapper = StringMapper(
55
81
  self.spark._jvm._gateway_client, file_name_mapper
56
82
  )
57
- self._datasinks.ndjson(path, wrapped_mapper)
83
+ self._datasinks.ndjson(path, save_mode, wrapped_mapper)
58
84
  else:
59
- self._datasinks.ndjson(path)
85
+ self._datasinks.ndjson(path, save_mode)
60
86
 
61
- def parquet(self, path: str) -> None:
87
+ def parquet(self, path: str, save_mode: Optional[str] = SaveMode.ERROR) -> None:
62
88
  """
63
89
  Writes the data to a directory of Parquet files.
64
90
 
65
91
  :param path: The URI of the directory to write the files to.
92
+ :param save_mode: The save mode to use when writing the data:
93
+ - "overwrite" will overwrite any existing data.
94
+ - "append" will append the new data to the existing data.
95
+ - "ignore" will only save the data if the file does not already exist.
96
+ - "error" will raise an error if the file already exists.
66
97
  """
67
- self._datasinks.parquet(path)
98
+ self._datasinks.parquet(path, save_mode)
68
99
 
69
100
  def delta(
70
101
  self, path: str, import_mode: Optional[str] = ImportMode.OVERWRITE
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pathling
3
- Version: 7.0.0.dev2
3
+ Version: 7.0.1
4
4
  Summary: Python API for Pathling
5
5
  Home-page: https://github.com/aehrc/pathling
6
6
  Author: Australian e-Health Research Centre, CSIRO
@@ -291,11 +291,11 @@ Maven package. Once the cluster is restarted, the libraries should be available
291
291
  for import and use within all notebooks.
292
292
 
293
293
  By default, Databricks uses Java 8 within its clusters, while Pathling requires
294
- Java 11. To enable Java 11 support within your cluster, navigate to __Advanced
294
+ Java 17. To enable Java 17 support within your cluster, navigate to __Advanced
295
295
  Options > Spark > Environment Variables__ and add the following:
296
296
 
297
297
  ```bash
298
- JNAME=zulu11-ca-amd64
298
+ JNAME=zulu17-ca-amd64
299
299
  ```
300
300
 
301
301
  See the Databricks documentation on
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes