pathling 7.0.0.dev2__tar.gz → 7.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pathling-7.0.0.dev2/pathling.egg-info → pathling-7.0.1}/PKG-INFO +3 -3
- {pathling-7.0.0.dev2 → pathling-7.0.1}/README.md +2 -2
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/query.py +2 -3
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/_version.py +3 -3
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/datasink.py +36 -5
- {pathling-7.0.0.dev2 → pathling-7.0.1/pathling.egg-info}/PKG-INFO +3 -3
- {pathling-7.0.0.dev2 → pathling-7.0.1}/LICENSE +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/MANIFEST.in +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/data/bundles/Bennett146_Swaniawski813_704c9750-f6e6-473b-ee83-fbd48e07fe3f.json +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/data/bundles/Dino214_Parisian75_40d82b80-b682-cd8b-da6d-396809878641.json +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/data/resources/Condition.ndjson +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/data/resources/Patient.ndjson +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/designation.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/display.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/encode_bundles.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/encode_resources.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/member_of.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/member_of_old.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/property_of.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/subsumes.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/subsumes_old.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/translate.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/examples/translate_old.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/__init__.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/coding.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/context.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/core.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/datasource.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/fhir.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/functions.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/query.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling/udfs.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling.egg-info/SOURCES.txt +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling.egg-info/dependency_links.txt +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling.egg-info/requires.txt +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/pathling.egg-info/top_level.txt +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/setup.cfg +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/setup.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_datasource.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_encoders.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_functions.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_query.py +0 -0
- {pathling-7.0.0.dev2 → pathling-7.0.1}/tests/test_udfs.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pathling
|
|
3
|
-
Version: 7.0.
|
|
3
|
+
Version: 7.0.1
|
|
4
4
|
Summary: Python API for Pathling
|
|
5
5
|
Home-page: https://github.com/aehrc/pathling
|
|
6
6
|
Author: Australian e-Health Research Centre, CSIRO
|
|
@@ -291,11 +291,11 @@ Maven package. Once the cluster is restarted, the libraries should be available
|
|
|
291
291
|
for import and use within all notebooks.
|
|
292
292
|
|
|
293
293
|
By default, Databricks uses Java 8 within its clusters, while Pathling requires
|
|
294
|
-
Java
|
|
294
|
+
Java 17. To enable Java 17 support within your cluster, navigate to __Advanced
|
|
295
295
|
Options > Spark > Environment Variables__ and add the following:
|
|
296
296
|
|
|
297
297
|
```bash
|
|
298
|
-
JNAME=
|
|
298
|
+
JNAME=zulu17-ca-amd64
|
|
299
299
|
```
|
|
300
300
|
|
|
301
301
|
See the Databricks documentation on
|
|
@@ -271,11 +271,11 @@ Maven package. Once the cluster is restarted, the libraries should be available
|
|
|
271
271
|
for import and use within all notebooks.
|
|
272
272
|
|
|
273
273
|
By default, Databricks uses Java 8 within its clusters, while Pathling requires
|
|
274
|
-
Java
|
|
274
|
+
Java 17. To enable Java 17 support within your cluster, navigate to __Advanced
|
|
275
275
|
Options > Spark > Environment Variables__ and add the following:
|
|
276
276
|
|
|
277
277
|
```bash
|
|
278
|
-
JNAME=
|
|
278
|
+
JNAME=zulu17-ca-amd64
|
|
279
279
|
```
|
|
280
280
|
|
|
281
281
|
See the Databricks documentation on
|
|
@@ -14,9 +14,8 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
16
|
import os
|
|
17
|
-
from tempfile import mkdtemp
|
|
18
|
-
|
|
19
17
|
from pyspark.sql import DataFrame, SparkSession
|
|
18
|
+
from tempfile import mkdtemp
|
|
20
19
|
|
|
21
20
|
from pathling import PathlingContext, DataSource, Expression as exp
|
|
22
21
|
from pathling._version import __java_version__
|
|
@@ -36,7 +35,7 @@ NDJSON_DIR_2 = os.path.join(TEMP_DIR, "ndjson")
|
|
|
36
35
|
spark = (
|
|
37
36
|
SparkSession.builder.config(
|
|
38
37
|
"spark.jars.packages",
|
|
39
|
-
f"au.csiro.pathling:library-runtime:{__java_version__},io.delta:delta-spark_2.12:3.
|
|
38
|
+
f"au.csiro.pathling:library-runtime:{__java_version__},io.delta:delta-spark_2.12:3.2.0",
|
|
40
39
|
)
|
|
41
40
|
.config(
|
|
42
41
|
"spark.sql.extensions",
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
# Auto generated from POM project version.
|
|
3
3
|
# Please do not modify.
|
|
4
4
|
#
|
|
5
|
-
__version__="7.0.
|
|
6
|
-
__java_version__="7.0.
|
|
5
|
+
__version__="7.0.1"
|
|
6
|
+
__java_version__="7.0.1"
|
|
7
7
|
__scala_version__="2.12"
|
|
8
|
-
__delta_version__="3.
|
|
8
|
+
__delta_version__="3.2.0"
|
|
9
9
|
__hadoop_version__="3.3.4"
|
|
@@ -28,6 +28,22 @@ class ImportMode:
|
|
|
28
28
|
MERGE: str = "merge"
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
class SaveMode:
|
|
32
|
+
"""
|
|
33
|
+
Constants that represent the different save modes.
|
|
34
|
+
|
|
35
|
+
OVERWRITE: Overwrite any existing data.
|
|
36
|
+
APPEND: Append the new data to the existing data.
|
|
37
|
+
IGNORE: Only save the data if the file does not already exist.
|
|
38
|
+
ERROR: Raise an error if the file already exists.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
OVERWRITE: str = "overwrite"
|
|
42
|
+
APPEND: str = "append"
|
|
43
|
+
IGNORE: str = "ignore"
|
|
44
|
+
ERROR: str = "error"
|
|
45
|
+
|
|
46
|
+
|
|
31
47
|
class DataSinks(SparkConversionsMixin):
|
|
32
48
|
"""
|
|
33
49
|
A class for writing FHIR data to a variety of different targets.
|
|
@@ -41,12 +57,22 @@ class DataSinks(SparkConversionsMixin):
|
|
|
41
57
|
)
|
|
42
58
|
)
|
|
43
59
|
|
|
44
|
-
def ndjson(
|
|
60
|
+
def ndjson(
|
|
61
|
+
self,
|
|
62
|
+
path: str,
|
|
63
|
+
save_mode: Optional[str] = SaveMode.ERROR,
|
|
64
|
+
file_name_mapper: Callable[[str], str] = None,
|
|
65
|
+
) -> None:
|
|
45
66
|
"""
|
|
46
67
|
Writes the data to a directory of NDJSON files. The files will be named using the resource
|
|
47
68
|
type and the ".ndjson" extension.
|
|
48
69
|
|
|
49
70
|
:param path: The URI of the directory to write the files to.
|
|
71
|
+
:param save_mode: The save mode to use when writing the data:
|
|
72
|
+
- "overwrite" will overwrite any existing data.
|
|
73
|
+
- "append" will append the new data to the existing data.
|
|
74
|
+
- "ignore" will only save the data if the file does not already exist.
|
|
75
|
+
- "error" will raise an error if the file already exists.
|
|
50
76
|
:param file_name_mapper: An optional function that can be used to customise the mapping of
|
|
51
77
|
the resource type to the file name.
|
|
52
78
|
"""
|
|
@@ -54,17 +80,22 @@ class DataSinks(SparkConversionsMixin):
|
|
|
54
80
|
wrapped_mapper = StringMapper(
|
|
55
81
|
self.spark._jvm._gateway_client, file_name_mapper
|
|
56
82
|
)
|
|
57
|
-
self._datasinks.ndjson(path, wrapped_mapper)
|
|
83
|
+
self._datasinks.ndjson(path, save_mode, wrapped_mapper)
|
|
58
84
|
else:
|
|
59
|
-
self._datasinks.ndjson(path)
|
|
85
|
+
self._datasinks.ndjson(path, save_mode)
|
|
60
86
|
|
|
61
|
-
def parquet(self, path: str) -> None:
|
|
87
|
+
def parquet(self, path: str, save_mode: Optional[str] = SaveMode.ERROR) -> None:
|
|
62
88
|
"""
|
|
63
89
|
Writes the data to a directory of Parquet files.
|
|
64
90
|
|
|
65
91
|
:param path: The URI of the directory to write the files to.
|
|
92
|
+
:param save_mode: The save mode to use when writing the data:
|
|
93
|
+
- "overwrite" will overwrite any existing data.
|
|
94
|
+
- "append" will append the new data to the existing data.
|
|
95
|
+
- "ignore" will only save the data if the file does not already exist.
|
|
96
|
+
- "error" will raise an error if the file already exists.
|
|
66
97
|
"""
|
|
67
|
-
self._datasinks.parquet(path)
|
|
98
|
+
self._datasinks.parquet(path, save_mode)
|
|
68
99
|
|
|
69
100
|
def delta(
|
|
70
101
|
self, path: str, import_mode: Optional[str] = ImportMode.OVERWRITE
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pathling
|
|
3
|
-
Version: 7.0.
|
|
3
|
+
Version: 7.0.1
|
|
4
4
|
Summary: Python API for Pathling
|
|
5
5
|
Home-page: https://github.com/aehrc/pathling
|
|
6
6
|
Author: Australian e-Health Research Centre, CSIRO
|
|
@@ -291,11 +291,11 @@ Maven package. Once the cluster is restarted, the libraries should be available
|
|
|
291
291
|
for import and use within all notebooks.
|
|
292
292
|
|
|
293
293
|
By default, Databricks uses Java 8 within its clusters, while Pathling requires
|
|
294
|
-
Java
|
|
294
|
+
Java 17. To enable Java 17 support within your cluster, navigate to __Advanced
|
|
295
295
|
Options > Spark > Environment Variables__ and add the following:
|
|
296
296
|
|
|
297
297
|
```bash
|
|
298
|
-
JNAME=
|
|
298
|
+
JNAME=zulu17-ca-amd64
|
|
299
299
|
```
|
|
300
300
|
|
|
301
301
|
See the Databricks documentation on
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|