batch-analytics 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/PKG-INFO +1 -1
  2. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/pyproject.toml +1 -1
  3. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/analytics/pca_clustering.py +3 -2
  4. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics.egg-info/PKG-INFO +1 -1
  5. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/README.md +0 -0
  6. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/setup.cfg +0 -0
  7. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/__init__.py +0 -0
  8. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/__main__.py +0 -0
  9. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/analytics/__init__.py +0 -0
  10. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/analytics/correlation.py +0 -0
  11. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/analytics/linear_regression.py +0 -0
  12. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/analytics/t_test.py +0 -0
  13. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/config.py +0 -0
  14. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/extract.py +0 -0
  15. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/job_runner.py +0 -0
  16. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/log.py +0 -0
  17. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/modules.py +0 -0
  18. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/output/__init__.py +0 -0
  19. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/output/base.py +0 -0
  20. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/output/clickhouse.py +0 -0
  21. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/output/local.py +0 -0
  22. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/output/s3.py +0 -0
  23. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics/transform.py +0 -0
  24. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics.egg-info/SOURCES.txt +0 -0
  25. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics.egg-info/dependency_links.txt +0 -0
  26. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics.egg-info/entry_points.txt +0 -0
  27. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics.egg-info/requires.txt +0 -0
  28. {batch_analytics-0.3.3 → batch_analytics-0.3.4}/src/batch_analytics.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batch-analytics
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test).
5
5
  Author: Litewave Analytics Team
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "batch-analytics"
7
- version = "0.3.3"
7
+ version = "0.3.4"
8
8
  description = "PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -84,6 +84,7 @@ def run_pca_clustering(
84
84
  n_comp_max = min(len(feature_cols), 20)
85
85
  pca = PCA(k=n_comp_max, inputCol="features", outputCol="pca_features")
86
86
  pca_model = pca.fit(df_scaled)
87
+ df_pca = pca_model.transform(df_scaled)
87
88
 
88
89
  explained = pca_model.explainedVariance.toArray().tolist()
89
90
  cumsum = []
@@ -108,9 +109,9 @@ def run_pca_clustering(
108
109
 
109
110
  k = config.analytics.cluster_k
110
111
  kmeans = KMeans(k=k, seed=42, featuresCol="pca_features", predictionCol="cluster")
111
- kmeans_model = kmeans.fit(df_scaled)
112
+ kmeans_model = kmeans.fit(df_pca)
112
113
 
113
- df_clustered = kmeans_model.transform(df_scaled)
114
+ df_clustered = kmeans_model.transform(df_pca)
114
115
  cluster_sizes = (
115
116
  df_clustered.groupBy("cluster")
116
117
  .count()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batch-analytics
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test).
5
5
  Author: Litewave Analytics Team
6
6
  License: MIT