batch-analytics 0.2.6__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/PKG-INFO +1 -1
  2. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/pyproject.toml +1 -1
  3. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/job_runner.py +28 -2
  4. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics.egg-info/PKG-INFO +1 -1
  5. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/README.md +0 -0
  6. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/setup.cfg +0 -0
  7. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/__init__.py +0 -0
  8. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/__main__.py +0 -0
  9. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/analytics/__init__.py +0 -0
  10. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/analytics/correlation.py +0 -0
  11. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/analytics/linear_regression.py +0 -0
  12. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/analytics/pca_clustering.py +0 -0
  13. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/analytics/t_test.py +0 -0
  14. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/config.py +0 -0
  15. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/extract.py +0 -0
  16. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/log.py +0 -0
  17. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/modules.py +0 -0
  18. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/output/__init__.py +0 -0
  19. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/output/base.py +0 -0
  20. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/output/clickhouse.py +0 -0
  21. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/output/local.py +0 -0
  22. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/output/s3.py +0 -0
  23. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics/transform.py +0 -0
  24. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics.egg-info/SOURCES.txt +0 -0
  25. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics.egg-info/dependency_links.txt +0 -0
  26. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics.egg-info/entry_points.txt +0 -0
  27. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics.egg-info/requires.txt +0 -0
  28. {batch_analytics-0.2.6 → batch_analytics-0.2.8}/src/batch_analytics.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batch-analytics
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test).
5
5
  Author: Litewave Analytics Team
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "batch-analytics"
7
- version = "0.2.6"
7
+ version = "0.2.8"
8
8
  description = "PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -67,6 +67,30 @@ def create_spark_session(
67
67
  if cfg.master.startswith("k8s://"):
68
68
  packages.append("org.apache.hadoop:hadoop-aws:3.3.4")
69
69
 
70
+ # Spark-on-K8s: absolute paths in spark.jars are re-sent to executors as ./basename.jar and
71
+ # fail there ("Unable to create executor due to ./clickhouse-spark-runtime-..."). JARs baked
72
+ # into the driver/executor image belong on the JVM classpath instead.
73
+ if cfg.master.startswith("k8s://") and jar_list:
74
+ local_cp: List[str] = []
75
+ remote_jars: List[str] = []
76
+ for p in jar_list:
77
+ p = p.strip()
78
+ if not p:
79
+ continue
80
+ if p.startswith("/") and p.endswith(".jar"):
81
+ local_cp.append(p)
82
+ else:
83
+ remote_jars.append(p)
84
+ if local_cp:
85
+ joined = ":".join(local_cp)
86
+ builder = (
87
+ builder.config("spark.driver.extraClassPath", joined)
88
+ .config("spark.executor.extraClassPath", joined)
89
+ .config("spark.kubernetes.driver.extraClassPath", joined)
90
+ .config("spark.kubernetes.executor.extraClassPath", joined)
91
+ )
92
+ jar_list = remote_jars
93
+
70
94
  if jar_list:
71
95
  builder = builder.config("spark.jars", ",".join(jar_list))
72
96
  if packages:
@@ -176,10 +200,12 @@ def run_pipeline(
176
200
  # Override: BATCH_SPARK_CLICKHOUSE_PACKAGES=maven coords / https jar URLs (comma-sep) or "" for SPARK_JARS only.
177
201
  _raw_ch = os.environ.get("BATCH_SPARK_CLICKHOUSE_PACKAGES")
178
202
  if _raw_ch is None:
203
+ # Shaded *-all.jar; pin matches Docker image / SPARK_JARS (0.9.x; no 0.10.x on Central for this artifact).
204
+ _ch_jdbc = os.environ.get("BATCH_CLICKHOUSE_JDBC_VERSION", "0.9.8").strip()
179
205
  ch_pkgs = (
180
206
  "com.clickhouse.spark:clickhouse-spark-runtime-3.5_2.12:0.8.0,"
181
- "https://repo1.maven.org/maven2/com/clickhouse/clickhouse-jdbc/0.6.2/"
182
- "clickhouse-jdbc-0.6.2-all.jar"
207
+ f"https://repo1.maven.org/maven2/com/clickhouse/clickhouse-jdbc/{_ch_jdbc}/"
208
+ f"clickhouse-jdbc-{_ch_jdbc}-all.jar"
183
209
  )
184
210
  elif not _raw_ch.strip():
185
211
  ch_pkgs = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batch-analytics
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test).
5
5
  Author: Litewave Analytics Team
6
6
  License: MIT