batch-analytics 0.2.7__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/PKG-INFO +7 -4
  2. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/pyproject.toml +17 -4
  3. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/job_runner.py +24 -0
  4. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics.egg-info/PKG-INFO +7 -4
  5. batch_analytics-0.2.9/src/batch_analytics.egg-info/requires.txt +38 -0
  6. batch_analytics-0.2.7/src/batch_analytics.egg-info/requires.txt +0 -23
  7. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/README.md +0 -0
  8. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/setup.cfg +0 -0
  9. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/__init__.py +0 -0
  10. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/__main__.py +0 -0
  11. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/analytics/__init__.py +0 -0
  12. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/analytics/correlation.py +0 -0
  13. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/analytics/linear_regression.py +0 -0
  14. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/analytics/pca_clustering.py +0 -0
  15. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/analytics/t_test.py +0 -0
  16. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/config.py +0 -0
  17. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/extract.py +0 -0
  18. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/log.py +0 -0
  19. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/modules.py +0 -0
  20. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/output/__init__.py +0 -0
  21. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/output/base.py +0 -0
  22. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/output/clickhouse.py +0 -0
  23. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/output/local.py +0 -0
  24. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/output/s3.py +0 -0
  25. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics/transform.py +0 -0
  26. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics.egg-info/SOURCES.txt +0 -0
  27. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics.egg-info/dependency_links.txt +0 -0
  28. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics.egg-info/entry_points.txt +0 -0
  29. {batch_analytics-0.2.7 → batch_analytics-0.2.9}/src/batch_analytics.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batch-analytics
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test).
5
5
  Author: Litewave Analytics Team
6
6
  License: MIT
@@ -15,14 +15,17 @@ Requires-Dist: scipy>=1.5.0; extra == "ttest"
15
15
  Provides-Extra: s3
16
16
  Requires-Dist: boto3>=1.28; extra == "s3"
17
17
  Provides-Extra: clickhouse
18
- Requires-Dist: clickhouse-connect>=0.7; extra == "clickhouse"
18
+ Requires-Dist: clickhouse-connect<0.9,>=0.7; python_version < "3.9" and extra == "clickhouse"
19
+ Requires-Dist: clickhouse-connect>=0.7; python_version >= "3.9" and extra == "clickhouse"
19
20
  Provides-Extra: output
20
21
  Requires-Dist: boto3>=1.28; extra == "output"
21
- Requires-Dist: clickhouse-connect>=0.7; extra == "output"
22
+ Requires-Dist: clickhouse-connect<0.9,>=0.7; python_version < "3.9" and extra == "output"
23
+ Requires-Dist: clickhouse-connect>=0.7; python_version >= "3.9" and extra == "output"
22
24
  Provides-Extra: full
23
25
  Requires-Dist: scipy>=1.5.0; extra == "full"
24
26
  Requires-Dist: boto3>=1.28; extra == "full"
25
- Requires-Dist: clickhouse-connect>=0.7; extra == "full"
27
+ Requires-Dist: clickhouse-connect<0.9,>=0.7; python_version < "3.9" and extra == "full"
28
+ Requires-Dist: clickhouse-connect>=0.7; python_version >= "3.9" and extra == "full"
26
29
 
27
30
  # Batch Analytics
28
31
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "batch-analytics"
7
- version = "0.2.7"
7
+ version = "0.2.9"
8
8
  description = "PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -20,10 +20,23 @@ dev = ["pytest>=7.0"]
20
20
  # Welch t-test module (batch_analytics.analytics.t_test)
21
21
  ttest = ["scipy>=1.5.0"]
22
22
  s3 = ["boto3>=1.28"]
23
- clickhouse = ["clickhouse-connect>=0.7"]
24
- output = ["boto3>=1.28", "clickhouse-connect>=0.7"]
23
+ # 0.9+ uses list[...] etc. and breaks on Python 3.8; 3.9+ can take current clickhouse-connect.
24
+ clickhouse = [
25
+ "clickhouse-connect>=0.7,<0.9; python_version < '3.9'",
26
+ "clickhouse-connect>=0.7; python_version >= '3.9'",
27
+ ]
28
+ output = [
29
+ "boto3>=1.28",
30
+ "clickhouse-connect>=0.7,<0.9; python_version < '3.9'",
31
+ "clickhouse-connect>=0.7; python_version >= '3.9'",
32
+ ]
25
33
  # Install all optional runtime deps used anywhere in the package
26
- full = ["scipy>=1.5.0", "boto3>=1.28", "clickhouse-connect>=0.7"]
34
+ full = [
35
+ "scipy>=1.5.0",
36
+ "boto3>=1.28",
37
+ "clickhouse-connect>=0.7,<0.9; python_version < '3.9'",
38
+ "clickhouse-connect>=0.7; python_version >= '3.9'",
39
+ ]
27
40
 
28
41
  [project.scripts]
29
42
  batch-analytics = "batch_analytics.job_runner:main"
@@ -67,6 +67,30 @@ def create_spark_session(
67
67
  if cfg.master.startswith("k8s://"):
68
68
  packages.append("org.apache.hadoop:hadoop-aws:3.3.4")
69
69
 
70
+ # Spark-on-K8s: absolute paths in spark.jars are re-sent to executors as ./basename.jar and
71
+ # fail there ("Unable to create executor due to ./clickhouse-spark-runtime-..."). JARs baked
72
+ # into the driver/executor image belong on the JVM classpath instead.
73
+ if cfg.master.startswith("k8s://") and jar_list:
74
+ local_cp: List[str] = []
75
+ remote_jars: List[str] = []
76
+ for p in jar_list:
77
+ p = p.strip()
78
+ if not p:
79
+ continue
80
+ if p.startswith("/") and p.endswith(".jar"):
81
+ local_cp.append(p)
82
+ else:
83
+ remote_jars.append(p)
84
+ if local_cp:
85
+ joined = ":".join(local_cp)
86
+ builder = (
87
+ builder.config("spark.driver.extraClassPath", joined)
88
+ .config("spark.executor.extraClassPath", joined)
89
+ .config("spark.kubernetes.driver.extraClassPath", joined)
90
+ .config("spark.kubernetes.executor.extraClassPath", joined)
91
+ )
92
+ jar_list = remote_jars
93
+
70
94
  if jar_list:
71
95
  builder = builder.config("spark.jars", ",".join(jar_list))
72
96
  if packages:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batch-analytics
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test).
5
5
  Author: Litewave Analytics Team
6
6
  License: MIT
@@ -15,14 +15,17 @@ Requires-Dist: scipy>=1.5.0; extra == "ttest"
15
15
  Provides-Extra: s3
16
16
  Requires-Dist: boto3>=1.28; extra == "s3"
17
17
  Provides-Extra: clickhouse
18
- Requires-Dist: clickhouse-connect>=0.7; extra == "clickhouse"
18
+ Requires-Dist: clickhouse-connect<0.9,>=0.7; python_version < "3.9" and extra == "clickhouse"
19
+ Requires-Dist: clickhouse-connect>=0.7; python_version >= "3.9" and extra == "clickhouse"
19
20
  Provides-Extra: output
20
21
  Requires-Dist: boto3>=1.28; extra == "output"
21
- Requires-Dist: clickhouse-connect>=0.7; extra == "output"
22
+ Requires-Dist: clickhouse-connect<0.9,>=0.7; python_version < "3.9" and extra == "output"
23
+ Requires-Dist: clickhouse-connect>=0.7; python_version >= "3.9" and extra == "output"
22
24
  Provides-Extra: full
23
25
  Requires-Dist: scipy>=1.5.0; extra == "full"
24
26
  Requires-Dist: boto3>=1.28; extra == "full"
25
- Requires-Dist: clickhouse-connect>=0.7; extra == "full"
27
+ Requires-Dist: clickhouse-connect<0.9,>=0.7; python_version < "3.9" and extra == "full"
28
+ Requires-Dist: clickhouse-connect>=0.7; python_version >= "3.9" and extra == "full"
26
29
 
27
30
  # Batch Analytics
28
31
 
@@ -0,0 +1,38 @@
1
+ pyspark<3.6,>=3.4
2
+ numpy>=1.19.0
3
+
4
+ [clickhouse]
5
+
6
+ [clickhouse:python_version < "3.9"]
7
+ clickhouse-connect<0.9,>=0.7
8
+
9
+ [clickhouse:python_version >= "3.9"]
10
+ clickhouse-connect>=0.7
11
+
12
+ [dev]
13
+ pytest>=7.0
14
+
15
+ [full]
16
+ scipy>=1.5.0
17
+ boto3>=1.28
18
+
19
+ [full:python_version < "3.9"]
20
+ clickhouse-connect<0.9,>=0.7
21
+
22
+ [full:python_version >= "3.9"]
23
+ clickhouse-connect>=0.7
24
+
25
+ [output]
26
+ boto3>=1.28
27
+
28
+ [output:python_version < "3.9"]
29
+ clickhouse-connect<0.9,>=0.7
30
+
31
+ [output:python_version >= "3.9"]
32
+ clickhouse-connect>=0.7
33
+
34
+ [s3]
35
+ boto3>=1.28
36
+
37
+ [ttest]
38
+ scipy>=1.5.0
@@ -1,23 +0,0 @@
1
- pyspark<3.6,>=3.4
2
- numpy>=1.19.0
3
-
4
- [clickhouse]
5
- clickhouse-connect>=0.7
6
-
7
- [dev]
8
- pytest>=7.0
9
-
10
- [full]
11
- scipy>=1.5.0
12
- boto3>=1.28
13
- clickhouse-connect>=0.7
14
-
15
- [output]
16
- boto3>=1.28
17
- clickhouse-connect>=0.7
18
-
19
- [s3]
20
- boto3>=1.28
21
-
22
- [ttest]
23
- scipy>=1.5.0