pathling 9.7.1__tar.gz → 9.8.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. pathling-9.7.1/README.md → pathling-9.8.0.dev0/PKG-INFO +73 -11
  2. pathling-9.7.1/PKG-INFO → pathling-9.8.0.dev0/README.md +50 -30
  3. pathling-9.8.0.dev0/pathling/__init__.py +127 -0
  4. pathling-9.8.0.dev0/pathling/_spark_defaults.py +81 -0
  5. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/_version.py +2 -2
  6. pathling-9.8.0.dev0/pathling/cli/__init__.py +27 -0
  7. pathling-9.8.0.dev0/pathling/cli/config.py +542 -0
  8. pathling-9.8.0.dev0/pathling/cli/console.py +82 -0
  9. pathling-9.8.0.dev0/pathling/cli/convert.py +154 -0
  10. pathling-9.8.0.dev0/pathling/cli/departition.py +145 -0
  11. pathling-9.8.0.dev0/pathling/cli/errors.py +182 -0
  12. pathling-9.8.0.dev0/pathling/cli/export.py +256 -0
  13. pathling-9.8.0.dev0/pathling/cli/fhirpath.py +249 -0
  14. pathling-9.8.0.dev0/pathling/cli/io.py +263 -0
  15. pathling-9.8.0.dev0/pathling/cli/main.py +217 -0
  16. pathling-9.8.0.dev0/pathling/cli/render.py +456 -0
  17. pathling-9.8.0.dev0/pathling/cli/resources/quiet-log4j2.properties +7 -0
  18. pathling-9.8.0.dev0/pathling/cli/run.py +263 -0
  19. pathling-9.8.0.dev0/pathling/cli/session.py +174 -0
  20. pathling-9.8.0.dev0/pathling/cli/sparkconf.py +269 -0
  21. pathling-9.8.0.dev0/pathling/cli/terminology.py +814 -0
  22. pathling-9.8.0.dev0/pathling/cli/view.py +153 -0
  23. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/context.py +37 -26
  24. {pathling-9.7.1 → pathling-9.8.0.dev0}/pyproject.toml +8 -1
  25. pathling-9.7.1/pathling/__init__.py +0 -58
  26. {pathling-9.7.1 → pathling-9.8.0.dev0}/.gitignore +0 -0
  27. {pathling-9.7.1 → pathling-9.8.0.dev0}/LICENSE +0 -0
  28. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/bulk.py +0 -0
  29. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/data/bundles/Bennett146_Swaniawski813_704c9750-f6e6-473b-ee83-fbd48e07fe3f.json +0 -0
  30. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/data/bundles/Dino214_Parisian75_40d82b80-b682-cd8b-da6d-396809878641.json +0 -0
  31. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/data/csv/conditions.csv +0 -0
  32. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/data/resources/Condition.ndjson +0 -0
  33. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/data/resources/Patient.ndjson +0 -0
  34. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/designation.py +0 -0
  35. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/display.py +0 -0
  36. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/encode_bundles.py +0 -0
  37. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/encode_resources.py +0 -0
  38. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/fhir_search.py +0 -0
  39. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/fhir_view.py +0 -0
  40. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/member_of.py +0 -0
  41. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/property_of.py +0 -0
  42. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/subsumes.py +0 -0
  43. {pathling-9.7.1 → pathling-9.8.0.dev0}/examples/translate.py +0 -0
  44. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/bulk.py +0 -0
  45. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/coding.py +0 -0
  46. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/core.py +0 -0
  47. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/datasink.py +0 -0
  48. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/datasource.py +0 -0
  49. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/fhir.py +0 -0
  50. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/functions.py +0 -0
  51. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/spark.py +0 -0
  52. {pathling-9.7.1 → pathling-9.8.0.dev0}/pathling/udfs.py +0 -0
@@ -1,5 +1,27 @@
1
- Python API for Pathling
2
- =======================
1
+ Metadata-Version: 2.4
2
+ Name: pathling
3
+ Version: 9.8.0.dev0
4
+ Summary: Python API for Pathling
5
+ Project-URL: Homepage, https://github.com/aehrc/pathling
6
+ Author-email: "Australian e-Health Research Centre, CSIRO" <pathling@csiro.au>
7
+ License: Apache License, version 2.0
8
+ License-File: LICENSE
9
+ Keywords: analytics,fhir,pathling,spark,standards,terminology
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: License :: OSI Approved :: Apache Software License
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Requires-Python: >=3.9
16
+ Requires-Dist: click>=8.1.7
17
+ Requires-Dist: deprecated>=1.2.13
18
+ Requires-Dist: ipython>=8.18.1
19
+ Requires-Dist: pyspark<4.1.0,>=4.0.0
20
+ Requires-Dist: rich>=13.7.0
21
+ Requires-Dist: tomli>=2.0.1; python_version < '3.11'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Python API for Pathling
3
25
 
4
26
  This is the Python API for [Pathling](https://pathling.csiro.au). It provides a
5
27
  set of tools that aid the use of FHIR terminology services and FHIR data within
@@ -16,9 +38,49 @@ Prerequisites:
16
38
  To install, run this command:
17
39
 
18
40
  ```bash
19
- pip install pathling
41
+ pip install pathling
20
42
  ```
21
43
 
44
+ ## Command line interface
45
+
46
+ The package ships a `pathling` console script that surfaces the library's
47
+ functionality - data conversion, SQL on FHIR views, FHIRPath evaluation, bulk
48
+ export, terminology operations, Python scripting (`run`), and an interactive
49
+ console (`console`) - through scriptable commands. It requires a supported
50
+ Java runtime, as does the library itself.
51
+
52
+ The easiest way to run it is with [uv](https://docs.astral.sh/uv/):
53
+
54
+ ```bash
55
+ # Run without installing.
56
+ uvx pathling --version
57
+
58
+ # Or install it as a tool.
59
+ uv tool install pathling
60
+ pathling --help
61
+ ```
62
+
63
+ For example, to convert a directory of NDJSON to Parquet, or run a SQL on FHIR
64
+ view:
65
+
66
+ ```bash
67
+ pathling convert data/ --to parquet -o warehouse/
68
+ pathling view data/ --view patients.json --format csv
69
+ ```
70
+
71
+ The `run` and `console` commands execute Python code (a script, stdin, or
72
+ `-c CODE`) or open an interactive IPython session, with `spark` and `pathling`
73
+ variables already configured and in scope:
74
+
75
+ ```bash
76
+ pathling run my_script.py
77
+ pathling run -c "print(spark.version)"
78
+ pathling console
79
+ ```
80
+
81
+ See the [command line interface documentation](https://pathling.csiro.au/docs/libraries/cli)
82
+ for the full list of commands and options.
83
+
22
84
  ## Encoders
23
85
 
24
86
  The Python library features a set of encoders for converting FHIR data into
@@ -109,7 +171,7 @@ display(result)
109
171
  The result of this query would look something like this:
110
172
 
111
173
  | patient_id | street | use | city | zip |
112
- |------------|----------------------------|------|------------|-------|
174
+ | ---------- | -------------------------- | ---- | ---------- | ----- |
113
175
  | 1 | 398 Kautzer Walk Suite 62 | home | Barnstable | 02675 |
114
176
  | 1 | 186 Nitzsche Forge | work | Revere | 02151 |
115
177
  | 2 | 1087 Quitzon Club | home | Plymouth | NULL |
@@ -152,7 +214,7 @@ result.select('CODE', 'DESCRIPTION', 'VIRAL_INFECTION').show()
152
214
  Results in:
153
215
 
154
216
  | CODE | DESCRIPTION | VIRAL_INFECTION |
155
- |-----------|---------------------------|-----------------|
217
+ | --------- | ------------------------- | --------------- |
156
218
  | 65363002 | Otitis media | false |
157
219
  | 16114001 | Fracture of ankle | false |
158
220
  | 444814009 | Viral sinusitis | true |
@@ -177,7 +239,7 @@ result.select('CODE', 'DESCRIPTION', 'READ_CODE').show()
177
239
  Results in:
178
240
 
179
241
  | CODE | DESCRIPTION | READ_CODE |
180
- |-----------|---------------------------|-----------|
242
+ | --------- | ------------------------- | --------- |
181
243
  | 65363002 | Otitis media | X00ik |
182
244
  | 16114001 | Fracture of ankle | S34.. |
183
245
  | 444814009 | Viral sinusitis | XUjp0 |
@@ -213,7 +275,7 @@ result.select('CODE', 'DESCRIPTION', 'IS_ENT').show()
213
275
  Results in:
214
276
 
215
277
  | CODE | DESCRIPTION | IS_ENT |
216
- |-----------|-------------------|--------|
278
+ | --------- | ----------------- | ------ |
217
279
  | 65363002 | Otitis media | true |
218
280
  | 16114001 | Fracture of ankle | false |
219
281
  | 444814009 | Viral sinusitis | true |
@@ -245,7 +307,7 @@ with_displays = exploded_parents.withColumn(
245
307
  Results in:
246
308
 
247
309
  | CODE | DESCRIPTION | PARENT | PARENT_DISPLAY |
248
- |-----------|-------------------|-----------|-----------------------------------------|
310
+ | --------- | ----------------- | --------- | --------------------------------------- |
249
311
  | 65363002 | Otitis media | 43275000 | Otitis |
250
312
  | 65363002 | Otitis media | 68996008 | Disorder of middle ear |
251
313
  | 16114001 | Fracture of ankle | 125603006 | Injury of ankle |
@@ -279,7 +341,7 @@ exploded_synonyms = synonyms.selectExpr(
279
341
  Results in:
280
342
 
281
343
  | CODE | DESCRIPTION | SYNONYM |
282
- |-----------|--------------------------------------|--------------------------------------------|
344
+ | --------- | ------------------------------------ | ------------------------------------------ |
283
345
  | 65363002 | Otitis media | OM - Otitis media |
284
346
  | 16114001 | Fracture of ankle | Ankle fracture |
285
347
  | 16114001 | Fracture of ankle | Fracture of distal end of tibia and fibula |
@@ -322,8 +384,8 @@ Maven package. Once the cluster is restarted, the libraries should be available
322
384
  for import and use within all notebooks.
323
385
 
324
386
  By default, Databricks uses Java 8 within its clusters, while Pathling requires
325
- Java 21. To enable Java 21 support within your cluster, navigate to __Advanced
326
- Options > Spark > Environment Variables__ and add the following:
387
+ Java 21. To enable Java 21 support within your cluster, navigate to **Advanced
388
+ Options > Spark > Environment Variables** and add the following:
327
389
 
328
390
  ```bash
329
391
  JNAME=zulu21-ca-amd64
@@ -1,24 +1,4 @@
1
- Metadata-Version: 2.4
2
- Name: pathling
3
- Version: 9.7.1
4
- Summary: Python API for Pathling
5
- Project-URL: Homepage, https://github.com/aehrc/pathling
6
- Author-email: "Australian e-Health Research Centre, CSIRO" <pathling@csiro.au>
7
- License: Apache License, version 2.0
8
- License-File: LICENSE
9
- Keywords: analytics,fhir,pathling,spark,standards,terminology
10
- Classifier: Development Status :: 3 - Alpha
11
- Classifier: License :: OSI Approved :: Apache Software License
12
- Classifier: Programming Language :: Python :: 3.9
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.11
15
- Requires-Python: >=3.9
16
- Requires-Dist: deprecated>=1.2.13
17
- Requires-Dist: pyspark<4.1.0,>=4.0.0
18
- Description-Content-Type: text/markdown
19
-
20
- Python API for Pathling
21
- =======================
1
+ # Python API for Pathling
22
2
 
23
3
  This is the Python API for [Pathling](https://pathling.csiro.au). It provides a
24
4
  set of tools that aid the use of FHIR terminology services and FHIR data within
@@ -35,9 +15,49 @@ Prerequisites:
35
15
  To install, run this command:
36
16
 
37
17
  ```bash
38
- pip install pathling
18
+ pip install pathling
39
19
  ```
40
20
 
21
+ ## Command line interface
22
+
23
+ The package ships a `pathling` console script that surfaces the library's
24
+ functionality - data conversion, SQL on FHIR views, FHIRPath evaluation, bulk
25
+ export, terminology operations, Python scripting (`run`), and an interactive
26
+ console (`console`) - through scriptable commands. It requires a supported
27
+ Java runtime, as does the library itself.
28
+
29
+ The easiest way to run it is with [uv](https://docs.astral.sh/uv/):
30
+
31
+ ```bash
32
+ # Run without installing.
33
+ uvx pathling --version
34
+
35
+ # Or install it as a tool.
36
+ uv tool install pathling
37
+ pathling --help
38
+ ```
39
+
40
+ For example, to convert a directory of NDJSON to Parquet, or run a SQL on FHIR
41
+ view:
42
+
43
+ ```bash
44
+ pathling convert data/ --to parquet -o warehouse/
45
+ pathling view data/ --view patients.json --format csv
46
+ ```
47
+
48
+ The `run` and `console` commands execute Python code (a script, stdin, or
49
+ `-c CODE`) or open an interactive IPython session, with `spark` and `pathling`
50
+ variables already configured and in scope:
51
+
52
+ ```bash
53
+ pathling run my_script.py
54
+ pathling run -c "print(spark.version)"
55
+ pathling console
56
+ ```
57
+
58
+ See the [command line interface documentation](https://pathling.csiro.au/docs/libraries/cli)
59
+ for the full list of commands and options.
60
+
41
61
  ## Encoders
42
62
 
43
63
  The Python library features a set of encoders for converting FHIR data into
@@ -128,7 +148,7 @@ display(result)
128
148
  The result of this query would look something like this:
129
149
 
130
150
  | patient_id | street | use | city | zip |
131
- |------------|----------------------------|------|------------|-------|
151
+ | ---------- | -------------------------- | ---- | ---------- | ----- |
132
152
  | 1 | 398 Kautzer Walk Suite 62 | home | Barnstable | 02675 |
133
153
  | 1 | 186 Nitzsche Forge | work | Revere | 02151 |
134
154
  | 2 | 1087 Quitzon Club | home | Plymouth | NULL |
@@ -171,7 +191,7 @@ result.select('CODE', 'DESCRIPTION', 'VIRAL_INFECTION').show()
171
191
  Results in:
172
192
 
173
193
  | CODE | DESCRIPTION | VIRAL_INFECTION |
174
- |-----------|---------------------------|-----------------|
194
+ | --------- | ------------------------- | --------------- |
175
195
  | 65363002 | Otitis media | false |
176
196
  | 16114001 | Fracture of ankle | false |
177
197
  | 444814009 | Viral sinusitis | true |
@@ -196,7 +216,7 @@ result.select('CODE', 'DESCRIPTION', 'READ_CODE').show()
196
216
  Results in:
197
217
 
198
218
  | CODE | DESCRIPTION | READ_CODE |
199
- |-----------|---------------------------|-----------|
219
+ | --------- | ------------------------- | --------- |
200
220
  | 65363002 | Otitis media | X00ik |
201
221
  | 16114001 | Fracture of ankle | S34.. |
202
222
  | 444814009 | Viral sinusitis | XUjp0 |
@@ -232,7 +252,7 @@ result.select('CODE', 'DESCRIPTION', 'IS_ENT').show()
232
252
  Results in:
233
253
 
234
254
  | CODE | DESCRIPTION | IS_ENT |
235
- |-----------|-------------------|--------|
255
+ | --------- | ----------------- | ------ |
236
256
  | 65363002 | Otitis media | true |
237
257
  | 16114001 | Fracture of ankle | false |
238
258
  | 444814009 | Viral sinusitis | true |
@@ -264,7 +284,7 @@ with_displays = exploded_parents.withColumn(
264
284
  Results in:
265
285
 
266
286
  | CODE | DESCRIPTION | PARENT | PARENT_DISPLAY |
267
- |-----------|-------------------|-----------|-----------------------------------------|
287
+ | --------- | ----------------- | --------- | --------------------------------------- |
268
288
  | 65363002 | Otitis media | 43275000 | Otitis |
269
289
  | 65363002 | Otitis media | 68996008 | Disorder of middle ear |
270
290
  | 16114001 | Fracture of ankle | 125603006 | Injury of ankle |
@@ -298,7 +318,7 @@ exploded_synonyms = synonyms.selectExpr(
298
318
  Results in:
299
319
 
300
320
  | CODE | DESCRIPTION | SYNONYM |
301
- |-----------|--------------------------------------|--------------------------------------------|
321
+ | --------- | ------------------------------------ | ------------------------------------------ |
302
322
  | 65363002 | Otitis media | OM - Otitis media |
303
323
  | 16114001 | Fracture of ankle | Ankle fracture |
304
324
  | 16114001 | Fracture of ankle | Fracture of distal end of tibia and fibula |
@@ -341,8 +361,8 @@ Maven package. Once the cluster is restarted, the libraries should be available
341
361
  for import and use within all notebooks.
342
362
 
343
363
  By default, Databricks uses Java 8 within its clusters, while Pathling requires
344
- Java 21. To enable Java 21 support within your cluster, navigate to __Advanced
345
- Options > Spark > Environment Variables__ and add the following:
364
+ Java 21. To enable Java 21 support within your cluster, navigate to **Advanced
365
+ Options > Spark > Environment Variables** and add the following:
346
366
 
347
367
  ```bash
348
368
  JNAME=zulu21-ca-amd64
@@ -0,0 +1,127 @@
1
+ #
2
+ # Copyright © 2018-2026 Commonwealth Scientific and Industrial Research
3
+ # Organisation (CSIRO) ABN 41 687 119 230.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ """Python API for Pathling.
19
+
20
+ Public names are exposed lazily (PEP 562) so that importing the ``pathling``
21
+ package itself does not pull in PySpark and the JVM-backed submodules. The
22
+ heavy imports happen only when a public name is first accessed, which keeps the
23
+ command line interface's ``--help`` and ``--version`` paths fast.
24
+
25
+ Author: John Grimes.
26
+ """
27
+
28
+ # The TYPE_CHECKING imports below exist only so that type checkers and IDEs can
29
+ # resolve the lazily-exported public names; they are intentionally unused at
30
+ # runtime, so unused-import checks are disabled for this re-export shim.
31
+ # ruff: noqa: F401
32
+
33
+ import importlib
34
+ from typing import TYPE_CHECKING, Any
35
+
36
+ if TYPE_CHECKING:
37
+ from .coding import Coding
38
+ from .context import PathlingContext, StorageType
39
+ from .core import Expression, VariableExpression
40
+ from .datasource import DataSource, DataSources
41
+ from .fhir import MimeType, Version
42
+ from .functions import to_coding, to_ecl_value_set, to_snomed_coding
43
+ from .udfs import (
44
+ Equivalence,
45
+ PropertyType,
46
+ designation,
47
+ display,
48
+ member_of,
49
+ property_of,
50
+ subsumed_by,
51
+ subsumes,
52
+ translate,
53
+ )
54
+
55
+ # Maps each lazily-exported public name to the submodule that defines it.
56
+ _LAZY_EXPORTS = {
57
+ "Coding": "pathling.coding",
58
+ "PathlingContext": "pathling.context",
59
+ "StorageType": "pathling.context",
60
+ "Expression": "pathling.core",
61
+ "VariableExpression": "pathling.core",
62
+ "DataSource": "pathling.datasource",
63
+ "DataSources": "pathling.datasource",
64
+ "MimeType": "pathling.fhir",
65
+ "Version": "pathling.fhir",
66
+ "to_coding": "pathling.functions",
67
+ "to_snomed_coding": "pathling.functions",
68
+ "to_ecl_value_set": "pathling.functions",
69
+ "member_of": "pathling.udfs",
70
+ "translate": "pathling.udfs",
71
+ "subsumes": "pathling.udfs",
72
+ "subsumed_by": "pathling.udfs",
73
+ "property_of": "pathling.udfs",
74
+ "display": "pathling.udfs",
75
+ "designation": "pathling.udfs",
76
+ "PropertyType": "pathling.udfs",
77
+ "Equivalence": "pathling.udfs",
78
+ }
79
+
80
+ __all__ = list(_LAZY_EXPORTS)
81
+
82
+ # The package submodules that may be accessed lazily after a bare
83
+ # ``import pathling`` (e.g. ``pathling.udfs.member_of``). This curated allow-list
84
+ # keeps the fallback safe - unknown names still raise ``AttributeError`` - and
85
+ # restores the pre-shim behaviour where these submodules were importable as
86
+ # package attributes, without importing PySpark on a bare ``import pathling``.
87
+ _LAZY_SUBMODULES = (
88
+ "coding",
89
+ "context",
90
+ "core",
91
+ "datasource",
92
+ "fhir",
93
+ "functions",
94
+ "udfs",
95
+ )
96
+
97
+
98
+ def __getattr__(name: str) -> Any:
99
+ """Resolves a public name or submodule by importing it on first access.
100
+
101
+ :param name: the attribute being accessed on the ``pathling`` package.
102
+ :return: the resolved attribute value.
103
+ :raises AttributeError: if the name is neither a known public export nor a
104
+ known package submodule.
105
+ """
106
+ module_name = _LAZY_EXPORTS.get(name)
107
+ if module_name is not None:
108
+ module = importlib.import_module(module_name)
109
+ value = getattr(module, name)
110
+ # Cache on the package so subsequent lookups bypass this hook.
111
+ globals()[name] = value
112
+ return value
113
+ if name in _LAZY_SUBMODULES:
114
+ submodule = importlib.import_module(f"{__name__}.{name}")
115
+ # Cache the submodule so subsequent lookups bypass this hook.
116
+ globals()[name] = submodule
117
+ return submodule
118
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
119
+
120
+
121
+ def __dir__() -> list:
122
+ """Returns the public names and submodules of the package for introspection.
123
+
124
+ :return: the sorted list of public export names and lazily-available
125
+ submodule names.
126
+ """
127
+ return sorted(set(__all__) | set(_LAZY_SUBMODULES))
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright © 2018-2026 Commonwealth Scientific and Industrial Research
3
+ # Organisation (CSIRO) ABN 41 687 119 230.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ """The single, PySpark-free source of truth for Pathling's managed Spark defaults.
19
+
20
+ The managed coordinates, the Delta SQL extension, and the Delta catalog that
21
+ every Pathling Spark session requires are defined here, built from the versions
22
+ in :mod:`pathling._version`. Both the session builder
23
+ (:func:`pathling.context._build_spark_session`) and the CLI merge logic
24
+ (:mod:`pathling.cli.sparkconf`) import these values so the two cannot drift
25
+ apart. This module deliberately imports no PySpark, so the CLI configuration
26
+ path can reference the defaults without paying Spark's import cost.
27
+
28
+ Author: John Grimes.
29
+ """
30
+
31
+ from pathling._version import (
32
+ __delta_version__,
33
+ __java_version__,
34
+ __scala_version__,
35
+ )
36
+
37
+ # The Spark configuration key holding the comma-separated Maven coordinates.
38
+ PACKAGES_KEY = "spark.jars.packages"
39
+
40
+ # The Spark configuration key holding the comma-separated SQL extension classes.
41
+ EXTENSIONS_KEY = "spark.sql.extensions"
42
+
43
+ # The Spark configuration key for the session catalog implementation.
44
+ CATALOG_KEY = "spark.sql.catalog.spark_catalog"
45
+
46
+ # The managed Maven coordinate (group:artifact) for the Pathling library runtime.
47
+ LIBRARY_RUNTIME_COORDINATE = "au.csiro.pathling:library-runtime"
48
+
49
+ # The managed Maven coordinate (group:artifact) for Delta Lake, which carries the
50
+ # Scala binary version in its artifact identifier.
51
+ DELTA_COORDINATE = f"io.delta:delta-spark_{__scala_version__}"
52
+
53
+ # The group:artifact identities of the coordinates Pathling manages, used to
54
+ # detect a user-supplied override at a different version.
55
+ MANAGED_COORDINATES = frozenset({LIBRARY_RUNTIME_COORDINATE, DELTA_COORDINATE})
56
+
57
+ # The Delta SQL extension class that Pathling always requires.
58
+ DELTA_EXTENSION = "io.delta.sql.DeltaSparkSessionExtension"
59
+
60
+ # The Delta catalog implementation that Pathling always requires.
61
+ DELTA_CATALOG = "org.apache.spark.sql.delta.catalog.DeltaCatalog"
62
+
63
+
64
+ def managed_spark_defaults() -> dict:
65
+ """Returns the Spark configuration that Pathling always requires.
66
+
67
+ The packages string lists both managed coordinates at the versions declared
68
+ in :mod:`pathling._version` and retains a trailing comma, matching the
69
+ historical inline literal. The extension and catalog are fixed Delta class
70
+ names.
71
+
72
+ :return: a mapping of managed Spark configuration key to value.
73
+ """
74
+ return {
75
+ PACKAGES_KEY: (
76
+ f"{LIBRARY_RUNTIME_COORDINATE}:{__java_version__},"
77
+ f"{DELTA_COORDINATE}:{__delta_version__},"
78
+ ),
79
+ EXTENSIONS_KEY: DELTA_EXTENSION,
80
+ CATALOG_KEY: DELTA_CATALOG,
81
+ }
@@ -2,8 +2,8 @@
2
2
  # Auto generated from POM project version.
3
3
  # Please do not modify.
4
4
  #
5
- __version__="9.7.1"
6
- __java_version__="9.7.1"
5
+ __version__="9.8.0.dev0"
6
+ __java_version__="9.8.0-SNAPSHOT"
7
7
  __scala_version__="2.13"
8
8
  __delta_version__="4.0.0"
9
9
  __hadoop_version__="3.4.1"
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright © 2018-2026 Commonwealth Scientific and Industrial Research
3
+ # Organisation (CSIRO) ABN 41 687 119 230.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ """Command line interface for Pathling.
19
+
20
+ This subpackage exposes the Pathling Python library through a flat, verb-based
21
+ command tree installed as the ``pathling`` console script. Modules are kept
22
+ free of eager PySpark imports so that ``--help`` and ``--version`` remain fast;
23
+ the Spark session is created lazily by :mod:`pathling.cli.session` only when a
24
+ command needs it.
25
+
26
+ Author: John Grimes.
27
+ """