PyPI - awx-zipline-ai - Versions diffs - 0.0.32__py3-none-any.whl - Mend

awx-zipline-ai 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

__init__.py +0 -0
agent/__init__.py +1 -0
agent/constants.py +15 -0
agent/ttypes.py +1684 -0
ai/__init__.py +0 -0
ai/chronon/__init__.py +0 -0
ai/chronon/airflow_helpers.py +248 -0
ai/chronon/cli/__init__.py +0 -0
ai/chronon/cli/compile/__init__.py +0 -0
ai/chronon/cli/compile/column_hashing.py +336 -0
ai/chronon/cli/compile/compile_context.py +173 -0
ai/chronon/cli/compile/compiler.py +183 -0
ai/chronon/cli/compile/conf_validator.py +742 -0
ai/chronon/cli/compile/display/__init__.py +0 -0
ai/chronon/cli/compile/display/class_tracker.py +102 -0
ai/chronon/cli/compile/display/compile_status.py +95 -0
ai/chronon/cli/compile/display/compiled_obj.py +12 -0
ai/chronon/cli/compile/display/console.py +3 -0
ai/chronon/cli/compile/display/diff_result.py +111 -0
ai/chronon/cli/compile/fill_templates.py +35 -0
ai/chronon/cli/compile/parse_configs.py +134 -0
ai/chronon/cli/compile/parse_teams.py +242 -0
ai/chronon/cli/compile/serializer.py +109 -0
ai/chronon/cli/compile/version_utils.py +42 -0
ai/chronon/cli/git_utils.py +145 -0
ai/chronon/cli/logger.py +59 -0
ai/chronon/constants.py +3 -0
ai/chronon/group_by.py +692 -0
ai/chronon/join.py +580 -0
ai/chronon/logger.py +23 -0
ai/chronon/model.py +40 -0
ai/chronon/query.py +126 -0
ai/chronon/repo/__init__.py +39 -0
ai/chronon/repo/aws.py +284 -0
ai/chronon/repo/cluster.py +136 -0
ai/chronon/repo/compile.py +62 -0
ai/chronon/repo/constants.py +164 -0
ai/chronon/repo/default_runner.py +269 -0
ai/chronon/repo/explore.py +418 -0
ai/chronon/repo/extract_objects.py +134 -0
ai/chronon/repo/gcp.py +586 -0
ai/chronon/repo/gitpython_utils.py +15 -0
ai/chronon/repo/hub_runner.py +261 -0
ai/chronon/repo/hub_uploader.py +109 -0
ai/chronon/repo/init.py +60 -0
ai/chronon/repo/join_backfill.py +119 -0
ai/chronon/repo/run.py +296 -0
ai/chronon/repo/serializer.py +133 -0
ai/chronon/repo/team_json_utils.py +46 -0
ai/chronon/repo/utils.py +481 -0
ai/chronon/repo/zipline.py +35 -0
ai/chronon/repo/zipline_hub.py +277 -0
ai/chronon/resources/__init__.py +0 -0
ai/chronon/resources/gcp/__init__.py +0 -0
ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
ai/chronon/resources/gcp/group_bys/test/data.py +30 -0
ai/chronon/resources/gcp/joins/__init__.py +0 -0
ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
ai/chronon/resources/gcp/joins/test/data.py +26 -0
ai/chronon/resources/gcp/sources/__init__.py +0 -0
ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
ai/chronon/resources/gcp/sources/test/data.py +26 -0
ai/chronon/resources/gcp/teams.py +58 -0
ai/chronon/source.py +86 -0
ai/chronon/staging_query.py +226 -0
ai/chronon/types.py +58 -0
ai/chronon/utils.py +510 -0
ai/chronon/windows.py +48 -0
awx_zipline_ai-0.0.32.dist-info/METADATA +197 -0
awx_zipline_ai-0.0.32.dist-info/RECORD +96 -0
awx_zipline_ai-0.0.32.dist-info/WHEEL +5 -0
awx_zipline_ai-0.0.32.dist-info/entry_points.txt +2 -0
awx_zipline_ai-0.0.32.dist-info/top_level.txt +4 -0
gen_thrift/__init__.py +0 -0
gen_thrift/api/__init__.py +1 -0
gen_thrift/api/constants.py +15 -0
gen_thrift/api/ttypes.py +3754 -0
gen_thrift/common/__init__.py +1 -0
gen_thrift/common/constants.py +15 -0
gen_thrift/common/ttypes.py +1814 -0
gen_thrift/eval/__init__.py +1 -0
gen_thrift/eval/constants.py +15 -0
gen_thrift/eval/ttypes.py +660 -0
gen_thrift/fetcher/__init__.py +1 -0
gen_thrift/fetcher/constants.py +15 -0
gen_thrift/fetcher/ttypes.py +127 -0
gen_thrift/hub/__init__.py +1 -0
gen_thrift/hub/constants.py +15 -0
gen_thrift/hub/ttypes.py +1109 -0
gen_thrift/observability/__init__.py +1 -0
gen_thrift/observability/constants.py +15 -0
gen_thrift/observability/ttypes.py +2355 -0
gen_thrift/planner/__init__.py +1 -0
gen_thrift/planner/constants.py +15 -0
gen_thrift/planner/ttypes.py +1967 -0

ai/chronon/repo/zipline_hub.py ADDED Viewed

@@ -0,0 +1,277 @@
+import json
+import os
+from datetime import date, datetime, timedelta, timezone
+from typing import Optional
+import google.auth
+import requests
+from google.auth.transport.requests import Request
+from google.cloud import iam_credentials_v1
+class ZiplineHub:
+    def __init__(self, base_url, sa_name=None):
+        if not base_url:
+            raise ValueError("Base URL for ZiplineHub cannot be empty.")
+        self.base_url = base_url
+        if self.base_url.startswith("https"):
+            print("\n 🔐 Using Google Cloud authentication for ZiplineHub.")
+            # First try to get ID token from environment (GitHub Actions)
+            self.id_token = os.getenv("GCP_ID_TOKEN")
+            if self.id_token:
+                print(" 🔑 Using ID token from environment")
+            elif sa_name is not None:
+                # Fallback to Google Cloud authentication
+                print(" 🔑 Generating ID token from service account credentials")
+                credentials, project_id = google.auth.default()
+                self.project_id = project_id
+                credentials.refresh(Request())
+                self.sa = f"{sa_name}@{project_id}.iam.gserviceaccount.com"
+            else:
+                print(" 🔑 Generating ID token from default credentials")
+                credentials, project_id = google.auth.default()
+                credentials.refresh(Request())
+                self.sa = None
+                self.id_token = credentials.id_token
+    def _generate_jwt_payload(self, service_account_email: str, resource_url: str) -> str:
+        """Generates JWT payload for service account.
+        Creates a properly formatted JWT payload with standard claims (iss, sub, aud,
+        iat, exp) needed for IAP authentication.
+        Args:
+            service_account_email (str): Specifies service account JWT is created for.
+            resource_url (str): Specifies scope of the JWT, the URL that the JWT will
+                be allowed to access.
+        Returns:
+            str: JSON string containing the JWT payload with properly formatted claims.
+        """
+        # Create current time and expiration time (1 hour later) in UTC
+        iat = datetime.now(tz=timezone.utc)
+        exp = iat + timedelta(seconds=3600)
+        # Convert datetime objects to numeric timestamps (seconds since epoch)
+        # as required by JWT standard (RFC 7519)
+        payload = {
+            "iss": service_account_email,
+            "sub": service_account_email,
+            "aud": resource_url,
+            "iat": int(iat.timestamp()),
+            "exp": int(exp.timestamp()),
+        }
+        return json.dumps(payload)
+    def _sign_jwt(self, target_sa: str, resource_url: str) -> str:
+        """Signs JWT payload using ADC and IAM credentials API.
+        Uses Google Cloud's IAM Credentials API to sign a JWT. This requires the
+        caller to have iap.webServiceVersions.accessViaIap permission on the target
+        service account.
+        Args:
+            target_sa (str): Service Account JWT is being created for.
+                iap.webServiceVersions.accessViaIap permission is required.
+            resource_url (str): Audience of the JWT, and scope of the JWT token.
+                This is the url of the IAP protected application.
+        Returns:
+            str: A signed JWT that can be used to access IAP protected apps.
+                Use in Authorization header as: 'Bearer <signed_jwt>'
+        """
+        # Get default credentials from environment or application credentials
+        source_credentials, project_id = google.auth.default()
+        # Initialize IAM credentials client with source credentials
+        iam_client = iam_credentials_v1.IAMCredentialsClient(credentials=source_credentials)
+        # Generate the service account resource name
+        # Use '-' as placeholder as per API requirements
+        name = iam_client.service_account_path("-", target_sa)
+        # Create and sign the JWT payload
+        payload = self._generate_jwt_payload(target_sa, resource_url)
+        request = iam_credentials_v1.SignJwtRequest(
+            name=name,
+            payload=payload,
+        )
+        # Sign the JWT using the IAM credentials API
+        response = iam_client.sign_jwt(request=request)
+        return response.signed_jwt
+    def call_diff_api(self, names_to_hashes: dict[str, str]) -> Optional[list[str]]:
+        url = f"{self.base_url}/upload/v2/diff"
+        diff_request = {"namesToHashes": names_to_hashes}
+        headers = {"Content-Type": "application/json"}
+        if self.base_url.startswith("https") and hasattr(self, "sa") and self.sa is not None:
+            headers["Authorization"] = f"Bearer {self._sign_jwt(self.sa, url)}"
+        elif self.base_url.startswith("https"):
+            headers["Authorization"] = f"Bearer {self.id_token}"
+        try:
+            response = requests.post(url, json=diff_request, headers=headers)
+            response.raise_for_status()
+            diff_response = response.json()
+            return diff_response["diff"]
+        except requests.RequestException as e:
+            if e.response is not None and e.response.status_code == 401 and self.sa is None:
+                print(
+                    " ❌  Error calling diff API. Unauthorized and no service account provided. Make sure the environment has default credentials set up or provide a service account name as SA_NAME in teams.py."
+                )
+            elif e.response is not None and e.response.status_code == 401 and self.sa is not None:
+                print(
+                    f" ❌  Error calling diff API. Unauthorized with provided service account: {self.sa}. Make sure the service account has the 'iap.webServiceVersions.accessViaIap' permission."
+                )
+            else:
+                print(f" ❌ Error calling diff API: {e}")
+            raise e
+    def call_upload_api(self, diff_confs, branch: str):
+        url = f"{self.base_url}/upload/v2/confs"
+        upload_request = {
+            "diffConfs": diff_confs,
+            "branch": branch,
+        }
+        headers = {"Content-Type": "application/json"}
+        if self.base_url.startswith("https") and hasattr(self, "sa") and self.sa is not None:
+            headers["Authorization"] = f"Bearer {self._sign_jwt(self.sa, url)}"
+        elif self.base_url.startswith("https"):
+            headers["Authorization"] = f"Bearer {self.id_token}"
+        try:
+            response = requests.post(url, json=upload_request, headers=headers)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as e:
+            if e.response is not None and e.response.status_code == 401 and self.sa is None:
+                print(
+                    " ❌  Error calling upload API. Unauthorized and no service account provided. Make sure the environment has default credentials set up or provide a service account name as SA_NAME in teams.py."
+                )
+            elif e.response is not None and e.response.status_code == 401 and self.sa is not None:
+                print(
+                    f" ❌  Error calling upload API. Unauthorized with provided service account: {self.sa}. Make sure the service account has the 'iap.webServiceVersions.accessViaIap' permission."
+                )
+            else:
+                print(f" ❌ Error calling upload API: {e}")
+            raise e
+    def call_schedule_api(self, modes, branch, conf_name, conf_hash):
+        url = f"{self.base_url}/schedule/v2/schedules"
+        schedule_request = {
+            "modeSchedules": modes,
+            "branch": branch,
+            "confName": conf_name,
+            "confHash": conf_hash,
+        }
+        headers = {"Content-Type": "application/json"}
+        if self.base_url.startswith("https") and hasattr(self, "sa") and self.sa is not None:
+            headers["Authorization"] = f"Bearer {self._sign_jwt(self.sa, url)}"
+        elif self.base_url.startswith("https"):
+            headers["Authorization"] = f"Bearer {self.id_token}"
+        try:
+            response = requests.post(url, json=schedule_request, headers=headers)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as e:
+            if e.response is not None and e.response.status_code == 401 and self.sa is None:
+                print(
+                    " ❌  Error deploying schedule. Unauthorized and no service account provided. Make sure the environment has default credentials set up or provide a service account name as SA_NAME in teams.py."
+                )
+            elif e.response is not None and e.response.status_code == 401 and self.sa is not None:
+                print(
+                    f" ❌  Error deploying schedule. Unauthorized with provided service account: {self.sa}. Make sure the service account has the 'iap.webServiceVersions.accessViaIap' permission."
+                )
+            else:
+                print(f" ❌ Error deploying schedule: {e}")
+            raise e
+    def call_sync_api(self, branch: str, names_to_hashes: dict[str, str]) -> Optional[list[str]]:
+        url = f"{self.base_url}/upload/v2/sync"
+        sync_request = {
+            "namesToHashes": names_to_hashes,
+            "branch": branch,
+        }
+        headers = {"Content-Type": "application/json"}
+        if self.base_url.startswith("https") and hasattr(self, "sa") and self.sa is not None:
+            headers["Authorization"] = f"Bearer {self._sign_jwt(self.sa, url)}"
+        elif self.base_url.startswith("https"):
+            headers["Authorization"] = f"Bearer {self.id_token}"
+        try:
+            response = requests.post(url, json=sync_request, headers=headers)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as e:
+            if e.response is not None and e.response.status_code == 401 and self.sa is None:
+                print(
+                    " ❌  Error calling sync API. Unauthorized and no service account provided. Make sure the environment has default credentials set up or provide a service account name as SA_NAME in teams.py."
+                )
+            elif e.response is not None and e.response.status_code == 401 and self.sa is not None:
+                print(
+                    f" ❌  Error calling sync API. Unauthorized with provided service account: {self.sa}. Make sure the service account has the 'iap.webServiceVersions.accessViaIap' permission."
+                )
+            else:
+                print(f" ❌ Error calling sync API: {e}")
+            raise e
+    def call_workflow_start_api(
+        self,
+        conf_name,
+        mode,
+        branch,
+        user,
+        conf_hash,
+        start=None,
+        end=None,
+        skip_long_running=False,
+    ):
+        url = f"{self.base_url}/workflow/v2/start"
+        end_dt = end.strftime("%Y-%m-%d") if end else date.today().strftime("%Y-%m-%d")
+        start_dt = (
+            start.strftime("%Y-%m-%d")
+            if start
+            else (date.today() - timedelta(days=14)).strftime("%Y-%m-%d")
+        )
+        workflow_request = {
+            "confName": conf_name,
+            "confHash": conf_hash,
+            "mode": mode,
+            "branch": branch,
+            "user": user,
+            "start": start_dt,
+            "end": end_dt,
+            "skipLongRunningNodes": skip_long_running,
+        }
+        headers = {"Content-Type": "application/json"}
+        if self.base_url.startswith("https") and hasattr(self, "sa") and self.sa is not None:
+            headers["Authorization"] = f"Bearer {self._sign_jwt(self.sa, url)}"
+        elif self.base_url.startswith("https"):
+            headers["Authorization"] = f"Bearer {self.id_token}"
+        try:
+            response = requests.post(url, json=workflow_request, headers=headers)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as e:
+            if e.response is not None and e.response.status_code == 401 and self.sa is None:
+                print(
+                    " ❌  Error calling workflow start API. Unauthorized and no service account provided. Make sure the environment has default credentials set up or provide a service account name as SA_NAME in teams.py."
+                )
+            elif e.response is not None and e.response.status_code == 401 and self.sa is not None:
+                print(
+                    f" ❌  Error calling workflow start API. Unauthorized with provided service account: {self.sa}. Make sure the service account has the 'iap.webServiceVersions.accessViaIap' permission."
+                )
+            else:
+                print(f" ❌ Error calling workflow start API: {e}")
+            raise e

ai/chronon/resources/__init__.py ADDED Viewed

File without changes

ai/chronon/resources/gcp/__init__.py ADDED Viewed

File without changes

ai/chronon/resources/gcp/group_bys/__init__.py ADDED Viewed

File without changes

ai/chronon/resources/gcp/group_bys/test/__init__.py ADDED Viewed

File without changes

ai/chronon/resources/gcp/group_bys/test/data.py ADDED Viewed

@@ -0,0 +1,30 @@
+from sources.test.data import source_v1
+from ai.chronon.group_by import Aggregation, GroupBy, Operation, TimeUnit, Window
+window_sizes = [
+    Window(length=day, time_unit=TimeUnit.DAYS) for day in [3, 14, 30]
+]  # Define some window sizes to use below
+group_by_v1 = GroupBy(
+    backfill_start_date="2023-11-01",
+    sources=[source_v1],
+    keys=["user_id"],  # We are aggregating by user
+    online=True,
+    aggregations=[
+        Aggregation(
+            input_column="purchase_price", operation=Operation.SUM, windows=window_sizes
+        ),  # The sum of purchases prices in various windows
+        Aggregation(
+            input_column="purchase_price", operation=Operation.COUNT, windows=window_sizes
+        ),  # The count of purchases in various windows
+        Aggregation(
+            input_column="purchase_price", operation=Operation.AVERAGE, windows=window_sizes
+        ),  # The average purchases by user in various windows
+        Aggregation(
+            input_column="purchase_price",
+            operation=Operation.LAST_K(10),
+        ),
+    ],
+    version=0,
+)

ai/chronon/resources/gcp/joins/__init__.py ADDED Viewed

File without changes

ai/chronon/resources/gcp/joins/test/__init__.py ADDED Viewed

File without changes

ai/chronon/resources/gcp/joins/test/data.py ADDED Viewed

@@ -0,0 +1,26 @@
+from gen_thrift.api.ttypes import EventSource, Source
+from group_bys.test.data import group_by_v1
+from ai.chronon.join import Join, JoinPart
+from ai.chronon.query import Query, selects
+"""
+This is the "left side" of the join that will comprise our training set. It is responsible for providing the primary keys
+and timestamps for which features will be computed.
+"""
+source = Source(
+    events=EventSource(
+        table="data.checkouts",
+        query=Query(
+            selects=selects("user_id"),  # The primary key used to join various GroupBys together
+            time_column="ts",
+        ),  # The event time used to compute feature values as-of
+    )
+)
+v1 = Join(
+    left=source,
+    right_parts=[JoinPart(group_by=group_by_v1)],
+    row_ids="user_id",
+    version=0,
+)

ai/chronon/resources/gcp/sources/__init__.py ADDED Viewed

File without changes

ai/chronon/resources/gcp/sources/test/__init__.py ADDED Viewed

File without changes

ai/chronon/resources/gcp/sources/test/data.py ADDED Viewed

@@ -0,0 +1,26 @@
+from gen_thrift.api.ttypes import EventSource, Source
+from ai.chronon.query import Query, selects
+"""
+Example: Defining a Chronon Source from a Batch Table
+This example demonstrates how to configure a Chronon `Source` from a BigQuery or Hive table,
+with a clear event time column and selected fields for downstream feature computation.
+"""
+# Define the EventSource using the batch table and query
+# Wrap the EventSource in a Source object
+source_v1 = Source(
+    events=EventSource(
+        table="data.purchases",  # This points to the log table in the warehouse with historical purchase events, updated in batch daily
+        topic=None,  # See the 'returns' GroupBy for an example that has a streaming source configured. In this case, this would be the streaming source topic that can be listened to for realtime events
+        query=Query(
+            selects=selects("user_id", "purchase_price"),  # Select the fields we care about
+            time_column="ts",
+        ),  # The event time
+    )
+)
+# The `source_v1` object can now be used in a Chronon join or pipeline definition

ai/chronon/resources/gcp/teams.py ADDED Viewed

@@ -0,0 +1,58 @@
+from gen_thrift.api.ttypes import Team
+from ai.chronon.repo.constants import RunMode
+from ai.chronon.types import ConfigProperties, EnvironmentVariables
+default = Team(
+    description="Default team",
+    email="<responsible-team-email>",
+    outputNamespace="default",
+    conf=ConfigProperties(
+        common={
+            "spark.chronon.table.format_provider.class": "ai.chronon.integrations.cloud_gcp.GcpFormatProvider",
+            "spark.chronon.table_write.format": "iceberg",
+            "spark.sql.defaultCatalog": "bigquery_catalog",
+            "spark.sql.catalog.bigquery_catalog": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
+            "spark.sql.catalog.bigquery_catalog.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
+            "spark.sql.catalog.bigquery_catalog.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
+            "spark.sql.defaultUrlStreamHandlerFactory.enabled": "false",
+            "spark.kryo.registrator": "ai.chronon.integrations.cloud_gcp.ChrononIcebergKryoRegistrator",
+            "spark.chronon.coalesce.factor": "10",
+            "spark.default.parallelism": "10",
+            "spark.sql.shuffle.partitions": "10",
+            # TODO: Please fill in the following values
+            "spark.sql.catalog.bigquery_catalog.warehouse": "gs://zipline-warehouse-<customer_id>/data/tables/",
+            "spark.sql.catalog.bigquery_catalog.gcp.bigquery.location": "<region>",
+            "spark.sql.catalog.bigquery_catalog.gcp.bigquery.project-id": "<project-id>",
+            "spark.chronon.partition.format": "<date-format>",  # ex: "yyyy-MM-dd",
+            "spark.chronon.partition.column": "<partition-column-name>",  # ex: "ds",
+        },
+    ),
+    env=EnvironmentVariables(
+        common={
+            # TODO: Please fill in the following values
+            "CUSTOMER_ID": "<customer_id>",
+            "GCP_PROJECT_ID": "<project-id>",
+            "GCP_REGION": "<region>",
+            "GCP_DATAPROC_CLUSTER_NAME": "<dataproc-cluster-name>",
+            "GCP_BIGTABLE_INSTANCE_ID": "<bigtable-instance-id>",
+            "ARTIFACT_PREFIX": "<customer-artifact-bucket>",
+            "CLOUD_PROVIDER": "<gcp | aws>",
+        },
+    ),
+)
+test = Team(
+    outputNamespace="data",
+    env=EnvironmentVariables(
+        common={}, modeEnvironments={RunMode.BACKFILL: {}, RunMode.UPLOAD: {}}
+    ),
+)
+team_conf = Team(
+    outputNamespace="test",
+    env=EnvironmentVariables(
+        common={},
+    ),
+)

ai/chronon/source.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""
+Wrappers to directly create Source objects.
+"""
+import gen_thrift.api.ttypes as ttypes
+def EventSource(
+    table: str,
+    query: ttypes.Query,
+    topic: str = None,
+    is_cumulative: bool = None,
+) -> ttypes.Source:
+    """
+    Event Sources represent data that gets generated over-time.
+    Typically, but not necessarily, logged to message buses like kafka, kinesis or google pub/sub.
+    fct tables are also event source worthy.
+    Attributes:
+     - table: Table currently needs to be a 'ds' (date string - yyyy-MM-dd) partitioned hive table.
+              Table names can contain subpartition specs, example db.table/system=mobile/currency=USD
+     - topic: Topic is a kafka table. The table contains all the events historically came through this topic.
+     - query: The logic used to scan both the table and the topic. Contains row level transformations
+              and filtering expressed as Spark SQL statements.
+     - isCumulative: If each new hive partition contains not just the current day's events but the entire set
+                     of events since the begininng. The key property is that the events are not mutated
+                     across partitions.
+    """
+    return ttypes.Source(
+        events=ttypes.EventSource(table=table, topic=topic, query=query, isCumulative=is_cumulative)
+    )
+def EntitySource(
+    snapshot_table: str,
+    query: ttypes.Query,
+    mutation_table: str = None,
+    mutation_topic: str = None,
+) -> ttypes.Source:
+    """
+    Entity Sources represent data that gets mutated over-time - at row-level. This is a group of three data elements.
+    snapshotTable, mutationTable and mutationTopic. mutationTable and mutationTopic are only necessary if we are trying
+    to create realtime or point-in-time aggregations over these sources. Entity sources usually map 1:1 with a database
+    tables in your OLTP store that typically serves live application traffic. When mutation data is absent they map 1:1
+    to `dim` tables in star schema.
+    Attributes:
+     - snapshotTable: Snapshot table currently needs to be a 'ds' (date string - yyyy-MM-dd) partitioned hive table.
+     - mutationTable: Topic is a kafka table. The table contains
+                      all the events that historically came through this topic.
+                      We need all the fields present in the snapshot table, PLUS two additional fields,
+                      `mutation_time` - milliseconds since epoch of type Long that represents the time of the mutation
+                      `is_before` - a boolean flag that represents whether
+                                    this row contains values before or after the mutation.
+     - mutationTopic: The logic used to scan both the table and the topic. Contains row level transformations
+                      and filtering expressed as Spark SQL statements.
+     - query: If each new hive partition contains not just the current day's events but the entire set
+              of events since the begininng. The key property is that the events are not mutated across partitions.
+    """
+    return ttypes.Source(
+        entities=ttypes.EntitySource(
+            snapshotTable=snapshot_table,
+            mutationTable=mutation_table,
+            mutationTopic=mutation_topic,
+            query=query,
+        )
+    )
+def JoinSource(join: ttypes.Join, query: ttypes.Query) -> ttypes.Source:
+    """
+    The output of a join can be used as a source for `GroupBy`.
+    Useful for expressing complex computation in chronon.
+    Offline this simply means that we will compute the necessary date ranges of the join
+    before we start computing the `GroupBy`.
+    Online we will:
+    1. enrich the stream/topic of `join.left` with all the columns defined by the join
+    2. apply the selects & wheres defined in the `query`
+    3. perform aggregations defined in the *downstream* `GroupBy`
+    4. write the result to the kv store.
+    """
+    return ttypes.Source(joinSource=ttypes.JoinSource(join=join, query=query))