PyPI - omniload - Versions diffs - 0.0.0.dev0__py3-none-any.whl - Mend

omniload 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (218) hide show

omniload/conftest.py +72 -0
omniload/main.py +810 -0
omniload/src/.gitignore +10 -0
omniload/src/adjust/__init__.py +108 -0
omniload/src/adjust/adjust_helpers.py +122 -0
omniload/src/airtable/__init__.py +84 -0
omniload/src/allium/__init__.py +128 -0
omniload/src/anthropic/__init__.py +277 -0
omniload/src/anthropic/helpers.py +525 -0
omniload/src/applovin/__init__.py +316 -0
omniload/src/applovin_max/__init__.py +117 -0
omniload/src/appsflyer/__init__.py +325 -0
omniload/src/appsflyer/client.py +110 -0
omniload/src/appstore/__init__.py +142 -0
omniload/src/appstore/client.py +126 -0
omniload/src/appstore/errors.py +15 -0
omniload/src/appstore/models.py +117 -0
omniload/src/appstore/resources.py +179 -0
omniload/src/arrow/__init__.py +81 -0
omniload/src/asana_source/__init__.py +281 -0
omniload/src/asana_source/helpers.py +30 -0
omniload/src/asana_source/settings.py +158 -0
omniload/src/attio/__init__.py +102 -0
omniload/src/attio/helpers.py +65 -0
omniload/src/blob.py +95 -0
omniload/src/bruin/__init__.py +76 -0
omniload/src/chess/__init__.py +180 -0
omniload/src/chess/helpers.py +35 -0
omniload/src/chess/settings.py +18 -0
omniload/src/clickup/__init__.py +85 -0
omniload/src/clickup/helpers.py +47 -0
omniload/src/collector/spinner.py +43 -0
omniload/src/couchbase_source/__init__.py +118 -0
omniload/src/couchbase_source/helpers.py +135 -0
omniload/src/cursor/__init__.py +83 -0
omniload/src/cursor/helpers.py +188 -0
omniload/src/customer_io/__init__.py +486 -0
omniload/src/customer_io/helpers.py +530 -0
omniload/src/destinations.py +982 -0
omniload/src/docebo/__init__.py +589 -0
omniload/src/docebo/client.py +435 -0
omniload/src/docebo/helpers.py +97 -0
omniload/src/dune/__init__.py +104 -0
omniload/src/dune/helpers.py +108 -0
omniload/src/dynamodb/__init__.py +86 -0
omniload/src/elasticsearch/__init__.py +80 -0
omniload/src/elasticsearch/helpers.py +141 -0
omniload/src/errors.py +26 -0
omniload/src/facebook_ads/__init__.py +403 -0
omniload/src/facebook_ads/exceptions.py +19 -0
omniload/src/facebook_ads/helpers.py +296 -0
omniload/src/facebook_ads/settings.py +224 -0
omniload/src/facebook_ads/utils.py +53 -0
omniload/src/factory.py +305 -0
omniload/src/filesystem/__init__.py +133 -0
omniload/src/filesystem/helpers.py +114 -0
omniload/src/filesystem/readers.py +187 -0
omniload/src/filters.py +62 -0
omniload/src/fireflies/__init__.py +151 -0
omniload/src/fireflies/helpers.py +753 -0
omniload/src/fluxx/__init__.py +10013 -0
omniload/src/fluxx/helpers.py +233 -0
omniload/src/frankfurter/__init__.py +157 -0
omniload/src/frankfurter/helpers.py +48 -0
omniload/src/freshdesk/__init__.py +103 -0
omniload/src/freshdesk/freshdesk_client.py +151 -0
omniload/src/freshdesk/settings.py +23 -0
omniload/src/fundraiseup/__init__.py +95 -0
omniload/src/fundraiseup/client.py +81 -0
omniload/src/github/__init__.py +202 -0
omniload/src/github/helpers.py +207 -0
omniload/src/github/queries.py +129 -0
omniload/src/github/settings.py +24 -0
omniload/src/google_ads/__init__.py +198 -0
omniload/src/google_ads/field.py +17 -0
omniload/src/google_ads/metrics.py +254 -0
omniload/src/google_ads/predicates.py +37 -0
omniload/src/google_ads/reports.py +411 -0
omniload/src/google_ads/test_google_ads.py +184 -0
omniload/src/google_analytics/__init__.py +144 -0
omniload/src/google_analytics/helpers.py +312 -0
omniload/src/google_sheets/README.md +95 -0
omniload/src/google_sheets/__init__.py +166 -0
omniload/src/google_sheets/helpers/__init__.py +15 -0
omniload/src/google_sheets/helpers/api_calls.py +160 -0
omniload/src/google_sheets/helpers/data_processing.py +316 -0
omniload/src/gorgias/__init__.py +595 -0
omniload/src/gorgias/helpers.py +166 -0
omniload/src/hostaway/__init__.py +302 -0
omniload/src/hostaway/client.py +288 -0
omniload/src/http/__init__.py +38 -0
omniload/src/http/readers.py +146 -0
omniload/src/http_client.py +24 -0
omniload/src/hubspot/__init__.py +800 -0
omniload/src/hubspot/helpers.py +417 -0
omniload/src/hubspot/settings.py +329 -0
omniload/src/indeed/__init__.py +153 -0
omniload/src/indeed/helpers.py +228 -0
omniload/src/influxdb/__init__.py +46 -0
omniload/src/influxdb/client.py +34 -0
omniload/src/intercom/__init__.py +142 -0
omniload/src/intercom/helpers.py +674 -0
omniload/src/intercom/settings.py +279 -0
omniload/src/isoc_pulse/__init__.py +159 -0
omniload/src/jira_source/__init__.py +377 -0
omniload/src/jira_source/helpers.py +510 -0
omniload/src/jira_source/settings.py +184 -0
omniload/src/kafka/__init__.py +120 -0
omniload/src/kafka/helpers.py +241 -0
omniload/src/kinesis/__init__.py +153 -0
omniload/src/kinesis/helpers.py +96 -0
omniload/src/klaviyo/__init__.py +237 -0
omniload/src/klaviyo/client.py +212 -0
omniload/src/klaviyo/helpers.py +19 -0
omniload/src/linear/__init__.py +634 -0
omniload/src/linear/helpers.py +111 -0
omniload/src/linkedin_ads/__init__.py +266 -0
omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
omniload/src/linkedin_ads/helpers.py +246 -0
omniload/src/loader.py +69 -0
omniload/src/mailchimp/__init__.py +126 -0
omniload/src/mailchimp/helpers.py +226 -0
omniload/src/mailchimp/settings.py +164 -0
omniload/src/masking.py +344 -0
omniload/src/mixpanel/__init__.py +62 -0
omniload/src/mixpanel/client.py +104 -0
omniload/src/monday/__init__.py +246 -0
omniload/src/monday/helpers.py +392 -0
omniload/src/monday/settings.py +325 -0
omniload/src/mongodb/__init__.py +281 -0
omniload/src/mongodb/helpers.py +975 -0
omniload/src/notion/__init__.py +69 -0
omniload/src/notion/helpers/__init__.py +14 -0
omniload/src/notion/helpers/client.py +178 -0
omniload/src/notion/helpers/database.py +92 -0
omniload/src/notion/settings.py +17 -0
omniload/src/partition.py +32 -0
omniload/src/personio/__init__.py +345 -0
omniload/src/personio/helpers.py +100 -0
omniload/src/phantombuster/__init__.py +65 -0
omniload/src/phantombuster/client.py +87 -0
omniload/src/pinterest/__init__.py +82 -0
omniload/src/pipedrive/__init__.py +212 -0
omniload/src/pipedrive/helpers/__init__.py +37 -0
omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
omniload/src/pipedrive/helpers/pages.py +129 -0
omniload/src/pipedrive/settings.py +41 -0
omniload/src/pipedrive/typing.py +17 -0
omniload/src/plusvibeai/__init__.py +335 -0
omniload/src/plusvibeai/helpers.py +544 -0
omniload/src/plusvibeai/settings.py +252 -0
omniload/src/primer/__init__.py +45 -0
omniload/src/primer/helpers.py +79 -0
omniload/src/quickbooks/__init__.py +117 -0
omniload/src/reddit_ads/__init__.py +183 -0
omniload/src/reddit_ads/helpers.py +232 -0
omniload/src/resource.py +40 -0
omniload/src/revenuecat/__init__.py +83 -0
omniload/src/revenuecat/helpers.py +237 -0
omniload/src/salesforce/__init__.py +170 -0
omniload/src/salesforce/helpers.py +78 -0
omniload/src/shopify/__init__.py +1953 -0
omniload/src/shopify/exceptions.py +17 -0
omniload/src/shopify/helpers.py +202 -0
omniload/src/shopify/settings.py +19 -0
omniload/src/slack/__init__.py +290 -0
omniload/src/slack/helpers.py +218 -0
omniload/src/slack/settings.py +36 -0
omniload/src/smartsheets/__init__.py +82 -0
omniload/src/snapchat_ads/__init__.py +455 -0
omniload/src/snapchat_ads/client.py +72 -0
omniload/src/snapchat_ads/helpers.py +630 -0
omniload/src/snapchat_ads/settings.py +130 -0
omniload/src/socrata_source/__init__.py +83 -0
omniload/src/socrata_source/helpers.py +85 -0
omniload/src/socrata_source/settings.py +8 -0
omniload/src/solidgate/__init__.py +219 -0
omniload/src/solidgate/helpers.py +154 -0
omniload/src/sources.py +5408 -0
omniload/src/sql_database/__init__.py +0 -0
omniload/src/sql_database/callbacks.py +66 -0
omniload/src/stripe_analytics/__init__.py +183 -0
omniload/src/stripe_analytics/helpers.py +386 -0
omniload/src/stripe_analytics/settings.py +80 -0
omniload/src/table_definition.py +15 -0
omniload/src/testdata/fakebqcredentials.json +14 -0
omniload/src/tiktok_ads/__init__.py +150 -0
omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
omniload/src/time.py +11 -0
omniload/src/trustpilot/__init__.py +48 -0
omniload/src/trustpilot/client.py +48 -0
omniload/src/version.py +6 -0
omniload/src/wise/__init__.py +68 -0
omniload/src/wise/client.py +63 -0
omniload/src/zendesk/__init__.py +480 -0
omniload/src/zendesk/helpers/__init__.py +39 -0
omniload/src/zendesk/helpers/api_helpers.py +119 -0
omniload/src/zendesk/helpers/credentials.py +68 -0
omniload/src/zendesk/helpers/talk_api.py +132 -0
omniload/src/zendesk/settings.py +71 -0
omniload/src/zoom/__init__.py +99 -0
omniload/src/zoom/helpers.py +102 -0
omniload/testdata/.gitignore +2 -0
omniload/testdata/create_replace.csv +21 -0
omniload/testdata/delete_insert_expected.csv +6 -0
omniload/testdata/delete_insert_part1.csv +5 -0
omniload/testdata/delete_insert_part2.csv +6 -0
omniload/testdata/merge_expected.csv +5 -0
omniload/testdata/merge_part1.csv +4 -0
omniload/testdata/merge_part2.csv +5 -0
omniload/tests/unit/test_smartsheets.py +133 -0
omniload-0.0.0.dev0.dist-info/METADATA +439 -0
omniload-0.0.0.dev0.dist-info/RECORD +218 -0
omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0

omniload/src/asana_source/settings.py ADDED Viewed

@@ -0,0 +1,158 @@
+# Copyright 2022-2025 ScaleVector
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Asana source settings and constants"""
+# Default start date for Asana API requests, only tasks started after this date will be collected
+DEFAULT_START_DATE = "2010-01-01T00:00:00.000Z"
+# Asana API request timeout
+REQUEST_TIMEOUT = 300
+# list of workspace fields to be retrieved from Asana API
+WORKSPACE_FIELDS = ("gid", "name", "is_organization", "resource_type", "email_domains")
+# List of project fields to be retrieved from Asana API
+PROJECT_FIELDS = (
+    "name",
+    "gid",
+    "owner",
+    "current_status",
+    "custom_fields",
+    "default_view",
+    "due_date",
+    "due_on",
+    "is_template",
+    "created_at",
+    "modified_at",
+    "start_on",
+    "archived",
+    "public",
+    "members",
+    "followers",
+    "color",
+    "notes",
+    "icon",
+    "permalink_url",
+    "workspace",
+    "team",
+    "resource_type",
+    "current_status_update",
+    "custom_field_settings",
+    "completed",
+    "completed_at",
+    "completed_by",
+    "created_from_template",
+    "project_brief",
+)
+# List of section fields to be retrieved from Asana API
+SECTION_FIELDS = (
+    "gid",
+    "resource_type",
+    "name",
+    "created_at",
+    "project",
+    "projects",
+)
+# List of tag fields to be retrieved from Asana API
+TAG_FIELDS = (
+    "gid",
+    "resource_type",
+    "created_at",
+    "followers",
+    "name",
+    "color",
+    "notes",
+    "permalink_url",
+    "workspace",
+)
+# List of task fields to be retrieved from Asana API
+TASK_FIELDS = (
+    "gid",
+    "resource_type",
+    "name",
+    "approval_status",
+    "assignee_status",
+    "created_at",
+    "assignee",
+    "start_on",
+    "start_at",
+    "due_on",
+    "due_at",
+    "completed",
+    "completed_at",
+    "completed_by",
+    "modified_at",
+    "dependencies",
+    "dependents",
+    "external",
+    "notes",
+    "num_subtasks",
+    "resource_subtype",
+    "followers",
+    "parent",
+    "permalink_url",
+    "tags",
+    "workspace",
+    "custom_fields",
+    "project",
+    "memberships",
+    "memberships.project.name",
+    "memberships.section.name",
+)
+# List of story fields to be retrieved from Asana API
+STORY_FIELDS = (
+    "gid",
+    "resource_type",
+    "created_at",
+    "created_by",
+    "resource_subtype",
+    "text",
+    "is_pinned",
+    "assignee",
+    "dependency",
+    "follower",
+    "new_section",
+    "old_section",
+    "new_text_value",
+    "old_text_value",
+    "preview",
+    "project",
+    "source",
+    "story",
+    "tag",
+    "target",
+    "task",
+    "sticker_name",
+    "custom_field",
+    "type",
+)
+# List of team fields to be retrieved from Asana API
+TEAMS_FIELD = (
+    "gid",
+    "resource_type",
+    "name",
+    "description",
+    "organization",
+    "permalink_url",
+    "visibility",
+)
+# List of user fields to be retrieved from Asana API
+USER_FIELDS = ("gid", "resource_type", "name", "email", "photo", "workspaces")

omniload/src/attio/__init__.py ADDED Viewed

@@ -0,0 +1,102 @@
+from typing import Iterable, Iterator
+import dlt
+from dlt.sources import DltResource
+from .helpers import AttioClient
+@dlt.source(max_table_nesting=0)
+def attio_source(
+    api_key: str,
+    params: list[str],
+) -> Iterable[DltResource]:
+    attio_client = AttioClient(api_key)
+    @dlt.resource(
+        name="objects",
+        write_disposition="replace",
+        columns={
+            "created_at": {"data_type": "timestamp", "partition": True},
+        },
+    )
+    # https://docs.attio.com/rest-api/endpoint-reference/objects/list-objects - does not support pagination
+    def fetch_objects() -> Iterator[dict]:
+        if len(params) != 0:
+            raise ValueError("Objects table must be in the format `objects`")
+        path = "objects"
+        yield attio_client.fetch_all(path, "get")
+    # https://docs.attio.com/rest-api/endpoint-reference/records/list-records
+    @dlt.resource(
+        name="records",
+        write_disposition="replace",
+        columns={
+            "created_at": {"data_type": "timestamp", "partition": True},
+        },
+    )
+    def fetch_records() -> Iterator[dict]:
+        if len(params) != 1:
+            raise ValueError(
+                "Records table must be in the format `records:{object_api_slug}`"
+            )
+        object_id = params[0]
+        path = f"objects/{object_id}/records/query"
+        yield attio_client.fetch_paginated(path, "post")
+    # https://docs.attio.com/rest-api/endpoint-reference/lists/list-all-lists -- does not support pagination
+    @dlt.resource(
+        name="lists",
+        write_disposition="replace",
+        columns={
+            "created_at": {"data_type": "timestamp", "partition": True},
+        },
+    )
+    def fetch_lists() -> Iterator[dict]:
+        path = "lists"
+        yield attio_client.fetch_all(path, "get")
+    # https://docs.attio.com/rest-api/endpoint-reference/entries/list-entries
+    @dlt.resource(
+        name="list_entries",
+        write_disposition="replace",
+        columns={
+            "created_at": {"data_type": "timestamp", "partition": True},
+        },
+    )
+    def fetch_list_entries() -> Iterator[dict]:
+        if len(params) != 1:
+            raise ValueError(
+                "List entries table must be in the format `list_entries:{list_id}`"
+            )
+        path = f"lists/{params[0]}/entries/query"
+        yield attio_client.fetch_paginated(path, "post")
+    @dlt.resource(
+        name="all_list_entries",
+        write_disposition="replace",
+        columns={
+            "created_at": {"data_type": "timestamp", "partition": True},
+        },
+    )
+    def fetch_all_list_entries() -> Iterator[dict]:
+        if len(params) != 1:
+            raise ValueError(
+                "All list entries table must be in the format `all_list_entries:{object_api_slug}`"
+            )
+        path = "lists"
+        for lst in attio_client.fetch_all(path, "get"):
+            if params[0] in lst["parent_object"]:
+                path = f"lists/{lst['id']['list_id']}/entries/query"
+                yield from attio_client.fetch_paginated(path, "post")
+    return (
+        fetch_objects,
+        fetch_records,
+        fetch_lists,
+        fetch_list_entries,
+        fetch_all_list_entries,
+    )

omniload/src/attio/helpers.py ADDED Viewed

@@ -0,0 +1,65 @@
+from omniload.src.http_client import create_client
+class AttioClient:
+    def __init__(self, api_key: str):
+        self.base_url = "https://api.attio.com/v2"
+        self.headers = {
+            "Accept": "application/json",
+            "Authorization": f"Bearer {api_key}",
+        }
+        self.client = create_client()
+    def fetch_paginated(self, path: str, method: str, limit: int = 1000, params=None):
+        url = f"{self.base_url}/{path}"
+        if params is None:
+            params = {}
+        offset = 0
+        while True:
+            query_params = {"limit": limit, "offset": offset, **params}
+            if method == "get":
+                response = self.client.get(
+                    url, headers=self.headers, params=query_params
+                )
+            else:
+                json_body = {**params, "limit": limit, "offset": offset}
+                response = self.client.post(url, headers=self.headers, json=json_body)
+            if response.status_code != 200:
+                raise Exception(f"HTTP {response.status_code} error: {response.text}")
+            response_data = response.json()
+            if "data" not in response_data:
+                raise Exception(
+                    "Attio API returned a response without the expected data"
+                )
+            data = response_data["data"]
+            for item in data:
+                flat_item = flatten_item(item)
+                yield flat_item
+            if len(data) < limit:
+                break
+            offset += limit
+    def fetch_all(self, path: str, method: str = "get", params=None):
+        url = f"{self.base_url}/{path}"
+        params = params or {}
+        if method == "get":
+            response = self.client.get(url, headers=self.headers, params=params)
+        else:
+            response = self.client.post(url, headers=self.headers, json=params)
+        response.raise_for_status()
+        data = response.json().get("data", [])
+        for item in data:
+            yield flatten_item(item)
+def flatten_item(item: dict) -> dict:
+    if "id" in item:
+        for key, value in item["id"].items():
+            item[key] = value
+    return item

omniload/src/blob.py ADDED Viewed

@@ -0,0 +1,95 @@
+import warnings
+from typing import Tuple, TypeAlias
+from urllib.parse import ParseResult, urlparse
+BucketName: TypeAlias = str
+FileGlob: TypeAlias = str
+class UnsupportedEndpointError(Exception):
+    pass
+def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
+    """
+    parse the URI of a blob storage and
+    return the bucket name and the file glob.
+    Supports the following Forms:
+    - uri: "gs://"
+      table: "bucket-name/file-glob"
+    - uri: "gs://uri-bucket-name" (uri-bucket-name is preferred)
+      table: "gs://table-bucket-name/file-glob"
+    - uri: "gs://"
+      table: "gs://bucket-name/file-glob"
+    - uri: gs://bucket-name/file-glob
+      table: None
+    - uri: "gs://bucket-name"
+      table: "file-glob"
+    The first form is the prefered method. Other forms are supported but discouraged.
+    """
+    table = table.strip()
+    host = uri.netloc.strip()
+    if table == "" or uri.path.strip() != "":
+        warnings.warn(
+            f"Using the form '{uri.scheme}://bucket-name/file-glob' is deprecated and will be removed in future versions.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return host, uri.path.lstrip("/")
+    table_uri = urlparse(table)
+    if host != "":
+        return host, table_uri.path.lstrip("/")
+    if table_uri.hostname:
+        return table_uri.hostname, table_uri.path.lstrip("/")
+    parts = table_uri.path.lstrip("/").split("/", maxsplit=1)
+    if len(parts) != 2:
+        return "", parts[0]
+    return parts[0], parts[1]
+def parse_endpoint(path: str) -> str:
+    """
+    Parse the endpoint kind from the URI.
+    kind is a file format. one of [csv, jsonl, parquet]
+    """
+    file_extension = path.split(".")[-1]
+    if file_extension == "gz":
+        file_extension = path.split(".")[-2]
+    if file_extension == "csv":
+        endpoint = "read_csv"
+    elif file_extension == "jsonl":
+        endpoint = "read_jsonl"
+    elif file_extension == "parquet":
+        endpoint = "read_parquet"
+    else:
+        raise UnsupportedEndpointError(f"Unsupported file format: {file_extension}")
+    return endpoint
+def determine_endpoint(table: str, path: str) -> str:
+    """
+    determines the endpoint/method to use for reading data from a blob source
+    """
+    if "#" in table:
+        _, endpoint = table.split("#")
+        if endpoint not in ["csv", "csv_headless", "jsonl", "parquet"]:
+            raise UnsupportedEndpointError(f"Unsupported file format: {endpoint}")
+        endpoint = f"read_{endpoint}"
+    else:
+        try:
+            endpoint = parse_endpoint(path)
+        except Exception as e:
+            raise ValueError(f"Failed to parse endpoint from path: {path}") from e
+    return endpoint

omniload/src/bruin/__init__.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Bruin source for fetching pipeline and asset data from Bruin Cloud API"""
+from typing import Iterator
+import dlt
+from dlt.sources.helpers import requests
+BASE_URL = "https://cloud.getbruin.com/api/v1"
+def _fetch_pipelines(headers: dict) -> list:
+    """Fetch pipelines data from API."""
+    response = requests.get(f"{BASE_URL}/pipelines", headers=headers)
+    response.raise_for_status()
+    return response.json()
+@dlt.source(name="bruin", max_table_nesting=0)
+def bruin_source(api_token: str):
+    """
+    A dlt source for the Bruin Cloud API.
+    Args:
+        api_token (str): The API token for authentication.
+    Returns:
+        DltResource: Resources for pipelines and assets data.
+    """
+    headers = {"Authorization": f"Bearer {api_token}"}
+    @dlt.resource(write_disposition="replace")
+    def pipelines() -> Iterator[dict]:
+        """
+        Fetches all pipelines and yields pipeline_id and pipeline_name for each.
+        """
+        data = _fetch_pipelines(headers)
+        for pipeline in data:
+            yield {
+                "name": pipeline.get("name"),
+                "description": pipeline.get("description"),
+                "project": pipeline.get("project"),
+                "owner": pipeline.get("owner"),
+                "default_connections": pipeline.get("default_connections"),
+                "schedule": pipeline.get("schedule"),
+                "commit": pipeline.get("commit"),
+                "start_date": pipeline.get("start_date"),
+            }
+    @dlt.resource(write_disposition="replace")
+    def assets() -> Iterator[dict]:
+        """
+        Fetches all assets from all pipelines (same endpoint as pipelines).
+        """
+        data = _fetch_pipelines(headers)
+        for pipeline in data:
+            pipeline_assets = pipeline.get("assets", [])
+            for asset in pipeline_assets:
+                yield {
+                    "name": asset.get("name"),
+                    "type": asset.get("type"),
+                    "pipeline": asset.get("pipeline"),
+                    "project": asset.get("project"),
+                    "uri": asset.get("uri"),
+                    "description": asset.get("description"),
+                    "upstreams": asset.get("upstreams"),
+                    "downstream": asset.get("downstream"),
+                    "owner": asset.get("owner"),
+                    "content": asset.get("content"),
+                    "columns": asset.get("columns"),
+                    "materialization": asset.get("materialization"),
+                    "parameters": asset.get("parameters"),
+                }
+    return pipelines, assets

omniload/src/chess/__init__.py ADDED Viewed

@@ -0,0 +1,180 @@
+# Copyright 2022-2025 ScaleVector
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A source loading player profiles and games from chess.com api"""
+from typing import Any, Callable, Dict, Iterator, List, Sequence
+import dlt
+from dlt.common import pendulum
+from dlt.common.typing import TDataItem
+from dlt.sources import DltResource
+from dlt.sources.helpers import requests
+from .helpers import get_path_with_retry, get_url_with_retry, validate_month_string
+from .settings import UNOFFICIAL_CHESS_API_URL
+@dlt.source(name="chess", max_table_nesting=0)
+def source(
+    players: List[str], start_month: str = None, end_month: str = None
+) -> Sequence[DltResource]:
+    """
+    A dlt source for the chess.com api. It groups several resources (in this case chess.com API endpoints) containing
+    various types of data: user profiles or chess match results
+    Args:
+        players (List[str]): A list of the player usernames for which to get the data.
+        start_month (str, optional): Filters out all the matches happening before `start_month`. Defaults to None.
+        end_month (str, optional): Filters out all the matches happening after `end_month`. Defaults to None.
+    Returns:
+        Sequence[DltResource]: A sequence of resources that can be selected from including players_profiles,
+        players_archives, players_games, players_online_status
+    """
+    return (
+        players_profiles(players),
+        players_archives(players),
+        players_games(players, start_month=start_month, end_month=end_month),
+        players_online_status(players),
+    )
+@dlt.resource(
+    write_disposition="replace",
+    columns={
+        "last_online": {"data_type": "timestamp"},
+        "joined": {"data_type": "timestamp"},
+    },
+)
+def players_profiles(players: List[str]) -> Iterator[TDataItem]:
+    """
+    Yields player profiles for a list of player usernames.
+    Args:
+        players (List[str]): List of player usernames to retrieve profiles for.
+    Yields:
+        Iterator[TDataItem]: An iterator over player profiles data.
+    """
+    # get archives in parallel by decorating the http request with defer
+    @dlt.defer
+    def _get_profile(username: str) -> TDataItem:
+        return get_path_with_retry(f"player/{username}")
+    for username in players:
+        yield _get_profile(username)
+@dlt.resource(write_disposition="replace", selected=False)
+def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
+    """
+    Yields url to game archives for specified players.
+    Args:
+        players (List[str]): List of player usernames to retrieve archives for.
+    Yields:
+        Iterator[List[TDataItem]]: An iterator over list of player archive data.
+    """
+    for username in players:
+        data = get_path_with_retry(f"player/{username}/games/archives")
+        yield data.get("archives", [])
+@dlt.resource(
+    write_disposition="replace", columns={"end_time": {"data_type": "timestamp"}}
+)
+def players_games(
+    players: List[str], start_month: str = None, end_month: str = None
+) -> Iterator[Callable[[], List[TDataItem]]]:
+    """
+    Yields `players` games that happened between `start_month` and `end_month`.
+    Args:
+        players (List[str]): List of player usernames to retrieve games for.
+        start_month (str, optional): The starting month in the format "YYYY/MM". Defaults to None.
+        end_month (str, optional): The ending month in the format "YYYY/MM". Defaults to None.
+    Yields:
+        Iterator[Callable[[], List[TDataItem]]]: An iterator over callables that return a list of games for each player.
+    """  # do a simple validation to prevent common mistakes in month format
+    validate_month_string(start_month)
+    validate_month_string(end_month)
+    # get a list of already checked archives
+    # from your point of view, the state is python dictionary that will have the same content the next time this function is called
+    checked_archives = dlt.current.resource_state().setdefault("archives", [])
+    # get player archives, note that you can call the resource like any other function and just iterate it like a list
+    archives = players_archives(players)
+    # get archives in parallel by decorating the http request with defer
+    @dlt.defer
+    def _get_archive(url: str) -> List[TDataItem]:
+        try:
+            games = get_url_with_retry(url).get("games", [])
+            return games  # type: ignore
+        except requests.HTTPError as http_err:
+            # sometimes archives are not available and the error seems to be permanent
+            if http_err.response.status_code == 404:
+                return []
+            raise
+    # enumerate the archives
+    for url in archives:
+        # the `url` format is https://api.chess.com/pub/player/{username}/games/{YYYY}/{MM}
+        if start_month and url[-7:] < start_month:
+            continue
+        if end_month and url[-7:] > end_month:
+            continue
+        # do not download archive again
+        if url in checked_archives:
+            continue
+        checked_archives.append(url)
+        # get the filtered archive
+        yield _get_archive(url)
+@dlt.resource(write_disposition="append")
+def players_online_status(players: List[str]) -> Iterator[TDataItem]:
+    """
+    Returns current online status for a list of players.
+    Args:
+        players (List[str]): List of player usernames to check online status for.
+    Yields:
+        Iterator[TDataItem]: An iterator over the online status of each player.
+    """
+    # we'll use unofficial endpoint to get online status, the official seems to be removed
+    for player in players:
+        status = get_url_with_retry(f"{UNOFFICIAL_CHESS_API_URL}user/popup/{player}")
+        # return just relevant selection
+        yield {
+            "username": player,
+            "onlineStatus": status["onlineStatus"],
+            "lastLoginDate": status["lastLoginDate"],
+            "check_time": pendulum.now(),  # dlt can deal with native python dates
+        }
+@dlt.source
+def chess_dlt_config_example(
+    secret_str: str = dlt.secrets.value,
+    secret_dict: Dict[str, Any] = dlt.secrets.value,
+    config_int: int = dlt.config.value,
+) -> DltResource:
+    """
+    An example of a source that uses dlt to provide secrets and config values.
+    Args:
+        secret_str (str, optional): Secret string provided by dlt.secrets.value. Defaults to dlt.secrets.value.
+        secret_dict (Dict[str, Any], optional): Secret dictionary provided by dlt.secrets.value. Defaults to dlt.secrets.value.
+        config_int (int, optional): Config integer provided by dlt.config.value. Defaults to dlt.config.value.
+    Returns:
+        DltResource: Returns a resource yielding the configured values.
+    """
+    # returns a resource yielding the configured values - it is just a test
+    return dlt.resource([secret_str, secret_dict, config_int], name="config_values")