batch-analytics 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ """
2
+ Module registry for batch analytics. Maps --modules short names to run functions.
3
+
4
+ Must stay in sync with analytics_runner catalog module_arg for each Spark method.
5
+ See analytics_runner/catalog/analytics_catalog.yaml.
6
+ """
7
+
8
+ from .analytics import (
9
+ run_linear_regression,
10
+ run_correlation,
11
+ run_pca_clustering,
12
+ run_t_test,
13
+ )
14
+
15
+ # module_arg -> (run_fn, result_key)
16
+ MODULE_REGISTRY = {
17
+ "lr": (run_linear_regression, "linear_regression"),
18
+ "corr": (run_correlation, "correlation"),
19
+ "pca": (run_pca_clustering, "pca_clustering"),
20
+ "ttest": (run_t_test, "t_test"),
21
+ }
22
+
23
+ VALID_MODULES = list(MODULE_REGISTRY.keys())
24
+ DEFAULT_MODULES = VALID_MODULES.copy()
@@ -0,0 +1,22 @@
1
+ """
2
+ Output drivers for analytics results: local, S3, ClickHouse.
3
+
4
+ Configuration via env vars (injected by analytics_runner):
5
+ - OUTPUT_TYPE: local | s3 | clickhouse
6
+ - OUTPUT_S3_PATH: s3://bucket/prefix/ (when type=s3)
7
+ - OUTPUT_CLICKHOUSE_DATABASE, OUTPUT_CLICKHOUSE_TABLE (when type=clickhouse)
8
+ - TASK_ID: task identifier (injected by spark_runner)
9
+ """
10
+
11
+ from .base import OutputDriver, write_analytics_output
12
+ from .local import LocalOutputDriver
13
+ from .s3 import S3OutputDriver
14
+ from .clickhouse import ClickHouseOutputDriver
15
+
16
+ __all__ = [
17
+ "OutputDriver",
18
+ "write_analytics_output",
19
+ "LocalOutputDriver",
20
+ "S3OutputDriver",
21
+ "ClickHouseOutputDriver",
22
+ ]
@@ -0,0 +1,97 @@
1
+ """
2
+ Base output driver interface and write orchestration.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ from abc import ABC, abstractmethod
8
+ from typing import Any
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def _serialize_for_json(obj: Any) -> Any:
14
+ """Convert numpy/Python types to JSON-serializable forms."""
15
+ import numpy as np
16
+
17
+ if isinstance(obj, dict):
18
+ return {k: _serialize_for_json(v) for k, v in obj.items()}
19
+ if isinstance(obj, (list, tuple)):
20
+ return [_serialize_for_json(x) for x in obj]
21
+ if isinstance(obj, np.ndarray):
22
+ return obj.tolist()
23
+ if isinstance(obj, (np.floating, np.float32, np.float64)):
24
+ return float(obj)
25
+ if isinstance(obj, (np.integer, np.int32, np.int64)):
26
+ return int(obj)
27
+ if isinstance(obj, np.bool_):
28
+ return bool(obj)
29
+ return obj
30
+
31
+
32
+ class OutputDriver(ABC):
33
+ """Interface for writing analytics results to a destination."""
34
+
35
+ @abstractmethod
36
+ def write(
37
+ self,
38
+ run_id: str,
39
+ task_id: str,
40
+ artifacts: dict[str, Any],
41
+ ) -> list[str]:
42
+ """
43
+ Write analytics artifacts to the destination.
44
+
45
+ Args:
46
+ run_id: Unique run identifier
47
+ task_id: Task identifier from spark_runner
48
+ artifacts: Dict of module_name -> result (e.g. {"linear_regression": {...}})
49
+
50
+ Returns:
51
+ List of written locations (paths, keys, or identifiers)
52
+ """
53
+ pass
54
+
55
+
56
+ def write_analytics_output(
57
+ run_id: str,
58
+ task_id: str,
59
+ artifacts: dict[str, Any],
60
+ output_type: str,
61
+ **driver_kwargs: Any,
62
+ ) -> list[str]:
63
+ """
64
+ Write analytics results using the configured output driver.
65
+
66
+ Args:
67
+ run_id: Run identifier
68
+ task_id: Task identifier (from TASK_ID env or run_id fallback)
69
+ artifacts: Analytics module outputs
70
+ output_type: local | s3 | clickhouse
71
+ **driver_kwargs: Driver-specific config (path, database, table, etc.)
72
+
73
+ Returns:
74
+ List of written locations
75
+ """
76
+ if not artifacts:
77
+ logger.debug("No analytics artifacts to write")
78
+ return []
79
+
80
+ output_type = (output_type or "local").lower().strip()
81
+ if output_type == "local":
82
+ driver = LocalOutputDriver(**driver_kwargs)
83
+ elif output_type == "s3":
84
+ driver = S3OutputDriver(**driver_kwargs)
85
+ elif output_type == "clickhouse":
86
+ driver = ClickHouseOutputDriver(**driver_kwargs)
87
+ else:
88
+ logger.warning("Unknown OUTPUT_TYPE=%r, falling back to local", output_type)
89
+ driver = LocalOutputDriver(**driver_kwargs)
90
+
91
+ try:
92
+ locations = driver.write(run_id, task_id, artifacts)
93
+ logger.info("Wrote analytics output to %s: %s", output_type, locations)
94
+ return locations
95
+ except Exception as e:
96
+ logger.exception("Failed to write analytics output: %s", e)
97
+ raise
@@ -0,0 +1,89 @@
1
+ """
2
+ ClickHouse output driver: inserts analytics results into a ClickHouse table.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ from typing import Any
8
+
9
+ from .base import OutputDriver, _serialize_for_json
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def _create_table_sql(database: str, table: str) -> str:
15
+ """CREATE TABLE IF NOT EXISTS for analytics results."""
16
+ return f"""
17
+ CREATE TABLE IF NOT EXISTS {database}.{table} (
18
+ task_id String,
19
+ run_id String,
20
+ module String,
21
+ result String,
22
+ created_at DateTime DEFAULT now()
23
+ ) ENGINE = MergeTree()
24
+ ORDER BY (task_id, run_id, module)
25
+ """
26
+
27
+
28
+ class ClickHouseOutputDriver(OutputDriver):
29
+ """Insert analytics artifacts into a ClickHouse table as JSON strings."""
30
+
31
+ def __init__(
32
+ self,
33
+ database: str,
34
+ table: str,
35
+ host: str | None = None,
36
+ port: int = 8123,
37
+ user: str = "default",
38
+ password: str = "",
39
+ **kwargs: Any,
40
+ ) -> None:
41
+ self.database = database
42
+ self.table = table
43
+ self.host = host
44
+ self.port = port
45
+ self.user = user
46
+ self.password = password
47
+
48
+ def _client(self):
49
+ """Lazy clickhouse-connect client."""
50
+ try:
51
+ import clickhouse_connect
52
+ except ImportError as e:
53
+ raise ImportError(
54
+ "ClickHouse output requires clickhouse-connect. "
55
+ "Install with: pip install batch-analytics[clickhouse]"
56
+ ) from e
57
+
58
+ return clickhouse_connect.get_client(
59
+ host=self.host or "localhost",
60
+ port=self.port,
61
+ username=self.user,
62
+ password=self.password if self.password else None,
63
+ )
64
+
65
+ def write(
66
+ self,
67
+ run_id: str,
68
+ task_id: str,
69
+ artifacts: dict[str, Any],
70
+ ) -> list[str]:
71
+ client = self._client()
72
+
73
+ # Ensure table exists
74
+ client.command(_create_table_sql(self.database, self.table).strip())
75
+
76
+ rows: list[tuple[str, str, str, str]] = []
77
+ for module, data in artifacts.items():
78
+ result_json = json.dumps(_serialize_for_json(data))
79
+ rows.append((task_id, run_id, module, result_json))
80
+
81
+ client.insert(
82
+ f"{self.database}.{self.table}",
83
+ rows,
84
+ column_names=["task_id", "run_id", "module", "result"],
85
+ )
86
+
87
+ location = f"{self.database}.{self.table}"
88
+ logger.info("Inserted %d analytics rows into %s", len(rows), location)
89
+ return [location]
@@ -0,0 +1,36 @@
1
+ """
2
+ Local output driver: writes analytics results to a local directory (BATCH_LOG_PATH).
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from .base import OutputDriver, _serialize_for_json
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class LocalOutputDriver(OutputDriver):
16
+ """Write analytics artifacts to local filesystem."""
17
+
18
+ def __init__(self, path: str | Path = "/tmp/analytics_logs", **kwargs: Any) -> None:
19
+ self.path = Path(path)
20
+ self.path.mkdir(parents=True, exist_ok=True)
21
+
22
+ def write(
23
+ self,
24
+ run_id: str,
25
+ task_id: str,
26
+ artifacts: dict[str, Any],
27
+ ) -> list[str]:
28
+ locations: list[str] = []
29
+ for name, data in artifacts.items():
30
+ filepath = self.path / f"{run_id}_analytics_{name}.json"
31
+ with open(filepath, "w") as f:
32
+ json.dump(_serialize_for_json(data), f, indent=2)
33
+ locations.append(str(filepath))
34
+
35
+ logger.info("Wrote %d analytics artifacts to %s", len(locations), self.path)
36
+ return locations
@@ -0,0 +1,82 @@
1
+ """
2
+ S3 output driver: uploads analytics results as JSON to S3.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ from typing import Any
8
+
9
+ from .base import OutputDriver, _serialize_for_json
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class S3OutputDriver(OutputDriver):
15
+ """Upload analytics artifacts to S3 as JSON objects."""
16
+
17
+ def __init__(
18
+ self,
19
+ path: str,
20
+ region: str | None = None,
21
+ endpoint: str | None = None,
22
+ **kwargs: Any,
23
+ ) -> None:
24
+ """
25
+ Args:
26
+ path: S3 path (e.g. s3://bucket/prefix/) - must end with /
27
+ region: AWS region (default from env)
28
+ endpoint: Custom endpoint for S3-compatible storage
29
+ """
30
+ self.path = path.rstrip("/") + "/"
31
+ self.region = region
32
+ self.endpoint = endpoint
33
+
34
+ def _client(self):
35
+ """Lazy boto3 client to avoid import at module load."""
36
+ try:
37
+ import boto3
38
+ from botocore.config import Config
39
+ except ImportError as e:
40
+ raise ImportError(
41
+ "S3 output requires boto3. Install with: pip install batch-analytics[s3]"
42
+ ) from e
43
+
44
+ config = Config(signature_version="s3v4")
45
+ client_kwargs: dict[str, Any] = {}
46
+ if self.region:
47
+ client_kwargs["region_name"] = self.region
48
+ if self.endpoint:
49
+ client_kwargs["endpoint_url"] = self.endpoint
50
+
51
+ return boto3.client("s3", config=config, **client_kwargs)
52
+
53
+ def _parse_s3_path(self) -> tuple[str, str]:
54
+ """Parse s3://bucket/prefix into bucket and prefix."""
55
+ if not self.path.startswith("s3://"):
56
+ raise ValueError(f"Invalid S3 path: {self.path}")
57
+ parts = self.path[5:].split("/", 1) # Remove s3://
58
+ bucket = parts[0]
59
+ prefix = parts[1].rstrip("/") + "/" if len(parts) > 1 else ""
60
+ return bucket, prefix
61
+
62
+ def write(
63
+ self,
64
+ run_id: str,
65
+ task_id: str,
66
+ artifacts: dict[str, Any],
67
+ ) -> list[str]:
68
+ bucket, prefix = self._parse_s3_path()
69
+ client = self._client()
70
+
71
+ # Use task_id for path: prefix/task_id/run_id_analytics_module.json
72
+ key_prefix = f"{prefix}{task_id}/"
73
+
74
+ locations: list[str] = []
75
+ for name, data in artifacts.items():
76
+ key = f"{key_prefix}{run_id}_analytics_{name}.json"
77
+ body = json.dumps(_serialize_for_json(data), indent=2)
78
+ client.put_object(Bucket=bucket, Key=key, Body=body, ContentType="application/json")
79
+ locations.append(f"s3://{bucket}/{key}")
80
+
81
+ logger.info("Uploaded %d analytics artifacts to s3://%s/%s", len(locations), bucket, key_prefix)
82
+ return locations
@@ -0,0 +1,184 @@
1
+ """
2
+ Transform stage: Clean data (remove duplicates), extract add_dimension, and stage.
3
+ """
4
+
5
+ import logging
6
+ from typing import Sequence
7
+
8
+ from pyspark.sql import DataFrame, SparkSession
9
+ from pyspark.sql.functions import coalesce, col, get_json_object, regexp_extract
10
+
11
+ from .config import BatchAnalyticsConfig
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def extract_anchor_id(
17
+ df: DataFrame,
18
+ config: BatchAnalyticsConfig,
19
+ ) -> DataFrame:
20
+ """
21
+ Extract anchor_id from add_dimension column.
22
+ Supports JSON format {"anchor_id":"value"} or Python-dict {"anchor_id":"value"}.
23
+ Creates a new column (anchor_id by default) with the extracted value.
24
+ """
25
+ col_name = config.transform.add_dimension_column
26
+ out_col = config.transform.anchor_id_column
27
+
28
+ if col_name not in df.columns:
29
+ logger.debug("Column %s not found, skipping anchor_id extraction", col_name)
30
+ return df
31
+
32
+ # Valid JSON: {"anchor_id":"GP/GPH(D)/II(W)/250019"}
33
+ json_extract = get_json_object(col(col_name), "$.anchor_id")
34
+ # Python-dict style: {'anchor_id':'GP/GPH(D)/II(W)/250019'}
35
+ regex_extract = regexp_extract(col(col_name), r"'anchor_id'\s*:\s*'([^']*)'", 1)
36
+
37
+ extracted = coalesce(json_extract, regex_extract)
38
+ return df.withColumn(out_col, extracted)
39
+
40
+
41
+ def remove_duplicates(
42
+ df: DataFrame,
43
+ key_columns: Sequence[str] | None = None,
44
+ ) -> DataFrame:
45
+ """
46
+ Remove duplicate rows.
47
+ If key_columns is provided, keeps first occurrence per key.
48
+ Otherwise, drops exact row duplicates.
49
+ """
50
+ before_count = df.count()
51
+ if key_columns:
52
+ df_cleaned = df.dropDuplicates(key_columns)
53
+ else:
54
+ df_cleaned = df.distinct()
55
+ after_count = df_cleaned.count()
56
+ removed = before_count - after_count
57
+ logger.info(
58
+ "Deduplication: %d -> %d rows (removed %d duplicates)",
59
+ before_count,
60
+ after_count,
61
+ removed,
62
+ )
63
+ return df_cleaned
64
+
65
+
66
+ def transform(
67
+ df: DataFrame,
68
+ config: BatchAnalyticsConfig,
69
+ ) -> DataFrame:
70
+ """
71
+ Apply transformation only: extract anchor_id, remove duplicates.
72
+ Does not write anywhere. Use stage_to_clickhouse() separately to persist.
73
+ """
74
+ transformed = extract_anchor_id(df, config)
75
+ dedup_cols = (
76
+ [c.strip() for c in config.transform.dedup_columns.split(",") if c.strip()]
77
+ if config.transform.dedup_columns
78
+ else None
79
+ )
80
+ return remove_duplicates(transformed, key_columns=dedup_cols)
81
+
82
+
83
+ def stage_to_clickhouse(
84
+ spark: SparkSession,
85
+ df: DataFrame,
86
+ config: BatchAnalyticsConfig,
87
+ ) -> None:
88
+ """
89
+ Write transformed data to ClickHouse staging table.
90
+ Separate job from transform; must complete before analytics can run.
91
+ Uses native connector if available, else JDBC.
92
+ """
93
+ n = df.count()
94
+ try:
95
+ writer = (
96
+ df.write.format("clickhouse")
97
+ .option("host", config.clickhouse.host)
98
+ .option("database", config.clickhouse.database)
99
+ .option("table", config.transform.staging_table)
100
+ .option("user", config.clickhouse.user)
101
+ .mode("overwrite")
102
+ )
103
+ if config.clickhouse.password:
104
+ writer = writer.option("password", config.clickhouse.password)
105
+ writer.save()
106
+ except Exception as e:
107
+ logger.warning("ClickHouse connector failed (%s), using JDBC", e)
108
+ df.write.jdbc(
109
+ config.clickhouse.jdbc_url,
110
+ config.transform.staging_table,
111
+ mode="overwrite",
112
+ properties=config.clickhouse.jdbc_properties,
113
+ )
114
+ logger.info(
115
+ "Staged data to ClickHouse %s.%s (%d rows)",
116
+ config.clickhouse.database,
117
+ config.transform.staging_table,
118
+ n,
119
+ )
120
+
121
+
122
+ def stage_to_path(
123
+ spark: SparkSession,
124
+ df: DataFrame,
125
+ config: BatchAnalyticsConfig,
126
+ ) -> None:
127
+ """Write transformed data to parquet/delta (for local dev or intermediate storage)."""
128
+ path = config.transform.staging_path
129
+ fmt = config.transform.staging_format
130
+ if fmt == "parquet":
131
+ df.write.mode("overwrite").parquet(path)
132
+ logger.info("Staged data to %s (parquet)", path)
133
+ elif fmt == "delta":
134
+ df.write.format("delta").mode("overwrite").save(path)
135
+ logger.info("Staged data to %s (delta)", path)
136
+ else:
137
+ df.write.format(fmt).mode("overwrite").save(path)
138
+ logger.info("Staged data to %s (%s)", path, fmt)
139
+
140
+
141
+ def transform_and_stage(
142
+ spark: SparkSession,
143
+ df: DataFrame,
144
+ config: BatchAnalyticsConfig,
145
+ ) -> DataFrame:
146
+ """
147
+ Transform and stage to ClickHouse. Kept for backward compatibility.
148
+ Prefer calling transform() then stage_to_clickhouse() separately.
149
+ """
150
+ cleaned = transform(df, config)
151
+ stage_to_clickhouse(spark, cleaned, config)
152
+ return cleaned
153
+
154
+
155
+ def load_staged(
156
+ spark: SparkSession,
157
+ config: BatchAnalyticsConfig,
158
+ ) -> DataFrame:
159
+ """
160
+ Load previously staged data (e.g. when running only analytics modules).
161
+ """
162
+ staging_path = config.transform.staging_path
163
+ fmt = config.transform.staging_format
164
+
165
+ if fmt == "parquet":
166
+ return spark.read.parquet(staging_path)
167
+ if fmt == "delta":
168
+ return spark.read.format("delta").load(staging_path)
169
+ if fmt == "clickhouse":
170
+ try:
171
+ return (
172
+ spark.read.format("clickhouse")
173
+ .option("host", config.clickhouse.host)
174
+ .option("database", config.clickhouse.database)
175
+ .option("table", config.transform.staging_table)
176
+ .load()
177
+ )
178
+ except Exception:
179
+ return spark.read.jdbc(
180
+ config.clickhouse.jdbc_url,
181
+ config.transform.staging_table,
182
+ properties=config.clickhouse.jdbc_properties,
183
+ )
184
+ return spark.read.format(fmt).load(staging_path)
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.4
2
+ Name: batch-analytics
3
+ Version: 0.1.0
4
+ Summary: PySpark batch analytics: Extract, Transform, Stage, and analytical modules (linear regression, correlation, PCA, t-test).
5
+ Author: Analytics Team
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: pyspark<3.6,>=3.4
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=7.0; extra == "dev"
12
+ Provides-Extra: s3
13
+ Requires-Dist: boto3>=1.28; extra == "s3"
14
+ Provides-Extra: clickhouse
15
+ Requires-Dist: clickhouse-connect>=0.7; extra == "clickhouse"
16
+ Provides-Extra: output
17
+ Requires-Dist: boto3>=1.28; extra == "output"
18
+ Requires-Dist: clickhouse-connect>=0.7; extra == "output"
19
+
20
+ # Batch Analytics
21
+
22
+ PySpark-based analytics pipeline for ClickHouse data: **Extract** → **Transform** → **Stage** → **Analytics**. Designed to run as the main application inside a Spark driver container (invoked by `analytics_runners` via SparkApplication CRD).
23
+
24
+ ## Bundle contents
25
+
26
+ Only the files required for the batch analytics job runner:
27
+
28
+ ```
29
+ analytics/
30
+ ├── pyproject.toml
31
+ ├── requirements-batch.txt
32
+ ├── README.md
33
+ └── src/
34
+ └── batch_analytics/
35
+ ├── __init__.py
36
+ ├── __main__.py # python -m batch_analytics
37
+ ├── job_runner.py # Entry point
38
+ ├── config.py
39
+ ├── extract.py
40
+ ├── transform.py
41
+ ├── log.py
42
+ ├── README.md
43
+ └── analytics/
44
+ ├── __init__.py
45
+ ├── linear_regression.py
46
+ ├── correlation.py
47
+ ├── pca_clustering.py
48
+ └── t_test.py
49
+ ```
50
+
51
+ ## Install
52
+
53
+ ```bash
54
+ pip install -e .
55
+ # or: pip install -r requirements-batch.txt && pip install -e .
56
+ ```
57
+
58
+ ## Run
59
+
60
+ ```bash
61
+ # Via module
62
+ python -m batch_analytics
63
+
64
+ # Via CLI (after pip install -e .)
65
+ batch-analytics
66
+
67
+ # Full pipeline
68
+ batch-analytics
69
+
70
+ # Analytics only (from staged ClickHouse table)
71
+ batch-analytics --from-stage --modules lr corr pca ttest
72
+ ```
73
+
74
+ ## Configuration
75
+
76
+ See `src/batch_analytics/README.md` for environment variables and usage.
77
+
78
+ ## Docker image
79
+
80
+ For Spark on Kubernetes, build an image that includes this package and exposes `job_runner.py` at the path used by `mainApplicationFile` (e.g. `local:///opt/analytics/job_runner.py`).
@@ -0,0 +1,23 @@
1
+ batch_analytics/__init__.py,sha256=5tdHpnDS80B_cgQN1aj1DVj28z0n-PAQyNbw_rJpFOk,1214
2
+ batch_analytics/__main__.py,sha256=lnQrJI_tQWnm1X0KjKwfwm5O9sIcg_SZNECbDanAFXM,119
3
+ batch_analytics/config.py,sha256=PW2NNGifaeOFyEkJ-dV4OXHb4fn32LUskN1MVvnUh5g,6803
4
+ batch_analytics/extract.py,sha256=qZKLebkJ9bn14wT6K04XgvNKwhjfGnDJ77MwQJckCoo,3555
5
+ batch_analytics/job_runner.py,sha256=xznaYVTwkUUs_upAaO_lT9vsiI2vjTv7sac6tFm3oIw,10673
6
+ batch_analytics/log.py,sha256=Hq3jFCdnMZzrHsFzO16lGZfKHxPYXkZOiZJpOgWxJmM,2743
7
+ batch_analytics/modules.py,sha256=ND2fZRtwwlI-HvH4xFg3Cj5YGd31qUEohHmjuejJY0o,677
8
+ batch_analytics/transform.py,sha256=KVlLUIpM2qnB877-QCm42qxPJMyhOcg7Rkd802jCsxs,5844
9
+ batch_analytics/analytics/__init__.py,sha256=wyyAXs3Owu92mhixlViK3yWfqH4KWXmopmswEqrLP70,515
10
+ batch_analytics/analytics/correlation.py,sha256=WmmZll8yfcB2rSSpoCOeTzYn4PDThupAOJKAkxsXzoo,3238
11
+ batch_analytics/analytics/linear_regression.py,sha256=wpdsjyzl29umPD4hHBCcHaBwb1cx4qWfKBNfTxCAX8I,4439
12
+ batch_analytics/analytics/pca_clustering.py,sha256=jdWwtU_G0Mkn50Lc_zOJv14clLE2a5-Gy0o-ynmLFvs,4375
13
+ batch_analytics/analytics/t_test.py,sha256=aYoRTEXSxaxsIYgievHSkRZPogxCK5tObGNIHf5RV04,5497
14
+ batch_analytics/output/__init__.py,sha256=79e5QJ9IJAHCk1e9HZi5g6WXUvnq5MKI9I0WhEaIbD4,665
15
+ batch_analytics/output/base.py,sha256=-HVj5HA4jQ7Lvk0B9WR2kyk_1wu6-wvVXGV10WzxgPg,2852
16
+ batch_analytics/output/clickhouse.py,sha256=gUI0LQqA6Lg4ZObDe3_9QJp_cguM9AbRCxdUeRgAfhY,2531
17
+ batch_analytics/output/local.py,sha256=aDGa-riSEaZStSV4qE_zLfb4Af70O1G4XvHUiXbPMGk,1072
18
+ batch_analytics/output/s3.py,sha256=oSyd8mL5nL-ryNddclpszFgfYChrogzgaNCJwdYZZYU,2695
19
+ batch_analytics-0.1.0.dist-info/METADATA,sha256=HdahIx8c2NiwtPXLQPEisMeXBIrfoBFmrKruYcrNBPM,2308
20
+ batch_analytics-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
21
+ batch_analytics-0.1.0.dist-info/entry_points.txt,sha256=v1Yx6LOlDBC0DmcdaaDM7KBROGe8GmBO7d6-Q2_z4dg,68
22
+ batch_analytics-0.1.0.dist-info/top_level.txt,sha256=wpRlC_JZ_uyGxzP1P3HlpNAjL8qS8kjXCE03DE2Dlyk,16
23
+ batch_analytics-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ batch-analytics = batch_analytics.job_runner:main
@@ -0,0 +1 @@
1
+ batch_analytics