PyPI - ingestr - Versions diffs - 0.14.1__py3-none-any.whl → 0.14.3__py3-none-any.whl - Mend

ingestr 0.14.1py3-none-any.whl → 0.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (19) hide show

ingestr/src/buildinfo.py +1 -1
ingestr/src/destinations.py +1 -24
ingestr/src/elasticsearch/helpers.py +35 -9
ingestr/src/factory.py +4 -0
ingestr/src/fluxx/__init__.py +9 -0
ingestr/src/hubspot/__init__.py +6 -12
ingestr/src/intercom/__init__.py +142 -0
ingestr/src/intercom/helpers.py +674 -0
ingestr/src/intercom/settings.py +279 -0
ingestr/src/jira_source/__init__.py +314 -0
ingestr/src/jira_source/helpers.py +452 -0
ingestr/src/jira_source/settings.py +170 -0
ingestr/src/mongodb/helpers.py +34 -6
ingestr/src/sources.py +120 -0
{ingestr-0.14.1.dist-info → ingestr-0.14.3.dist-info}/METADATA +1 -1
{ingestr-0.14.1.dist-info → ingestr-0.14.3.dist-info}/RECORD +19 -13
{ingestr-0.14.1.dist-info → ingestr-0.14.3.dist-info}/WHEEL +0 -0
{ingestr-0.14.1.dist-info → ingestr-0.14.3.dist-info}/entry_points.txt +0 -0
{ingestr-0.14.1.dist-info → ingestr-0.14.3.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/intercom/settings.py ADDED Viewed

@@ -0,0 +1,279 @@
+"""
+Configuration settings and constants for Intercom API integration.
+"""
+from datetime import datetime
+from typing import Dict, List, Tuple
+# API Version - REQUIRED for all requests
+API_VERSION = "2.14"
+# Default start date for incremental loading
+DEFAULT_START_DATE = datetime(2020, 1, 1)
+# Pagination settings
+DEFAULT_PAGE_SIZE = 150
+MAX_PAGE_SIZE = 150  # Intercom's maximum
+SCROLL_EXPIRY_SECONDS = 60  # Scroll sessions expire after 1 minute
+# Rate limiting settings
+RATE_LIMIT_PER_10_SECONDS = 166
+RATE_LIMIT_RETRY_AFTER_DEFAULT = 10
+# Regional API endpoints
+REGIONAL_ENDPOINTS = {
+    "us": "https://api.intercom.io",
+    "eu": "https://api.eu.intercom.io",
+    "au": "https://api.au.intercom.io",
+}
+# Resource configuration for automatic generation
+# Format: resource_name -> config dict
+RESOURCE_CONFIGS = {
+    # Search-based incremental resources
+    "contacts": {
+        "type": "search",
+        "incremental": True,
+        "transform_func": "transform_contact",
+        "columns": {
+            "custom_attributes": {"data_type": "json"},
+            "tags": {"data_type": "json"},
+        },
+    },
+    "conversations": {
+        "type": "search",
+        "incremental": True,
+        "transform_func": "transform_conversation",
+        "columns": {
+            "custom_attributes": {"data_type": "json"},
+            "tags": {"data_type": "json"},
+        },
+    },
+    # Pagination-based incremental resources
+    "companies": {
+        "type": "pagination",
+        "endpoint": "/companies",
+        "data_key": "data",
+        "pagination_type": "cursor",
+        "incremental": True,
+        "transform_func": "transform_company",
+        "params": {"per_page": 50},
+        "columns": {
+            "custom_attributes": {"data_type": "json"},
+            "tags": {"data_type": "json"},
+        },
+    },
+    "articles": {
+        "type": "pagination",
+        "endpoint": "/articles",
+        "data_key": "data",
+        "pagination_type": "cursor",
+        "incremental": True,
+        "transform_func": None,
+        "params": None,
+        "columns": {},
+    },
+    # Special case - tickets
+    "tickets": {
+        "type": "tickets",
+        "incremental": True,
+        "transform_func": None,
+        "columns": {
+            "ticket_attributes": {"data_type": "json"},
+        },
+    },
+    # Simple replace resources (non-incremental)
+    "tags": {
+        "type": "simple",
+        "endpoint": "/tags",
+        "data_key": "data",
+        "pagination_type": "simple",
+        "incremental": False,
+        "transform_func": None,
+        "columns": {},
+    },
+    "segments": {
+        "type": "simple",
+        "endpoint": "/segments",
+        "data_key": "segments",
+        "pagination_type": "cursor",
+        "incremental": False,
+        "transform_func": None,
+        "columns": {},
+    },
+    "teams": {
+        "type": "simple",
+        "endpoint": "/teams",
+        "data_key": "teams",
+        "pagination_type": "simple",
+        "incremental": False,
+        "transform_func": None,
+        "columns": {},
+    },
+    "admins": {
+        "type": "simple",
+        "endpoint": "/admins",
+        "data_key": "admins",
+        "pagination_type": "simple",
+        "incremental": False,
+        "transform_func": None,
+        "columns": {},
+    },
+    "data_attributes": {
+        "type": "simple",
+        "endpoint": "/data_attributes",
+        "data_key": "data",
+        "pagination_type": "cursor",
+        "incremental": False,
+        "transform_func": None,
+        "columns": {
+            "id": {"data_type": "bigint", "nullable": True},
+        },
+    },
+}
+# Core endpoints with their configuration (kept for backwards compatibility)
+# Format: (endpoint_path, data_key, supports_incremental, pagination_type)
+CORE_ENDPOINTS: Dict[str, Tuple[str, str, bool, str]] = {
+    "contacts": ("/contacts", "data", True, "cursor"),
+    "companies": ("/companies", "data", True, "cursor"),
+    "conversations": ("/conversations", "conversations", True, "cursor"),
+    "tickets": ("/tickets", "tickets", True, "cursor"),
+    "admins": ("/admins", "admins", False, "simple"),
+    "teams": ("/teams", "teams", False, "simple"),
+    "tags": ("/tags", "data", False, "simple"),
+    "segments": ("/segments", "segments", False, "cursor"),
+    "articles": ("/articles", "data", True, "cursor"),
+    "collections": ("/help_center/collections", "data", False, "cursor"),
+    "data_attributes": ("/data_attributes", "data", False, "cursor"),
+}
+# Incremental endpoints using search API
+SEARCH_ENDPOINTS: Dict[str, str] = {
+    "contacts_search": "/contacts/search",
+    "companies_search": "/companies/search",
+    "conversations_search": "/conversations/search",
+}
+# Special endpoints requiring different handling
+SCROLL_ENDPOINTS: List[str] = [
+    "companies",  # Can use scroll for large exports
+]
+# Event tracking endpoint
+EVENTS_ENDPOINT = "/events"
+# Ticket fields endpoint for custom field mapping
+TICKET_FIELDS_ENDPOINT = "/ticket_types/{ticket_type_id}/attributes"
+# Default fields to retrieve for each resource type
+DEFAULT_CONTACT_FIELDS = [
+    "id",
+    "type",
+    "external_id",
+    "email",
+    "phone",
+    "name",
+    "created_at",
+    "updated_at",
+    "signed_up_at",
+    "last_seen_at",
+    "last_contacted_at",
+    "last_email_opened_at",
+    "last_email_clicked_at",
+    "browser",
+    "browser_language",
+    "browser_version",
+    "location",
+    "os",
+    "role",
+    "custom_attributes",
+    "tags",
+    "companies",
+]
+DEFAULT_COMPANY_FIELDS = [
+    "id",
+    "type",
+    "company_id",
+    "name",
+    "plan",
+    "size",
+    "website",
+    "industry",
+    "created_at",
+    "updated_at",
+    "monthly_spend",
+    "session_count",
+    "user_count",
+    "custom_attributes",
+    "tags",
+]
+DEFAULT_CONVERSATION_FIELDS = [
+    "id",
+    "type",
+    "created_at",
+    "updated_at",
+    "waiting_since",
+    "snoozed_until",
+    "state",
+    "open",
+    "read",
+    "priority",
+    "admin_assignee_id",
+    "team_assignee_id",
+    "tags",
+    "conversation_rating",
+    "source",
+    "contacts",
+    "teammates",
+    "custom_attributes",
+    "first_contact_reply",
+    "sla_applied",
+    "statistics",
+    "conversation_parts",
+]
+DEFAULT_TICKET_FIELDS = [
+    "id",
+    "type",
+    "ticket_id",
+    "category",
+    "ticket_attributes",
+    "ticket_state",
+    "ticket_type",
+    "created_at",
+    "updated_at",
+    "ticket_parts",
+    "contacts",
+    "admin_assignee_id",
+    "team_assignee_id",
+    "open",
+    "snoozed_until",
+]
+# Resources that support custom attributes
+SUPPORTS_CUSTOM_ATTRIBUTES = [
+    "contacts",
+    "companies",
+    "conversations",
+]
+# Maximum limits
+MAX_CUSTOM_ATTRIBUTES_PER_RESOURCE = 100
+MAX_EVENT_TYPES_PER_WORKSPACE = 120
+MAX_CONVERSATION_PARTS = 500
+MAX_SEARCH_RESULTS = 10000
+# Field type mapping for custom attributes
+INTERCOM_TO_DLT_TYPE_MAPPING = {
+    "string": "text",
+    "integer": "bigint",
+    "float": "double",
+    "boolean": "bool",
+    "date": "timestamp",
+    "datetime": "timestamp",
+    "object": "json",
+    "list": "json",
+}

ingestr/src/jira_source/__init__.py ADDED Viewed

@@ -0,0 +1,314 @@
+"""
+This source provides data extraction from Jira Cloud via the REST API v3.
+It defines several functions to fetch data from different parts of Jira including
+projects, issues, users, boards, sprints, and various configuration objects like
+issue types, statuses, and priorities.
+"""
+from typing import Any, Iterable, Optional
+import dlt
+from dlt.common.typing import TDataItem
+from .helpers import get_client
+from .settings import (
+    DEFAULT_PAGE_SIZE,
+    DEFAULT_START_DATE,
+    ISSUE_FIELDS,
+)
+@dlt.source
+def jira_source() -> Any:
+    """
+    The main function that runs all the other functions to fetch data from Jira.
+    Returns:
+        Sequence[DltResource]: A sequence of DltResource objects containing the fetched data.
+    """
+    return [
+        projects,
+        issues,
+        users,
+        issue_types,
+        statuses,
+        priorities,
+        resolutions,
+        project_versions,
+        project_components,
+    ]
+@dlt.resource(write_disposition="replace")
+def projects(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+    expand: Optional[str] = None,
+    recent: Optional[int] = None,
+) -> Iterable[TDataItem]:
+    """
+    Fetches and returns a list of projects from Jira.
+    Args:
+        base_url (str): Jira instance URL (e.g., https://your-domain.atlassian.net)
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+        expand (str): Comma-separated list of fields to expand
+        recent (int): Number of recent projects to return
+    Yields:
+        dict: The project data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_projects(expand=expand, recent=recent)
+@dlt.resource(write_disposition="merge", primary_key="id")
+def issues(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+    jql: str = "order by updated DESC",
+    fields: Optional[str] = None,
+    expand: Optional[str] = None,
+    max_results: Optional[int] = None,
+    updated: dlt.sources.incremental[str] = dlt.sources.incremental(
+        "fields.updated",
+        initial_value=DEFAULT_START_DATE,
+        range_end="closed",
+        range_start="closed",
+    ),
+) -> Iterable[TDataItem]:
+    """
+    Fetches issues from Jira using JQL search.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+        jql (str): JQL query string
+        fields (str): Comma-separated list of fields to return
+        expand (str): Comma-separated list of fields to expand
+        max_results (int): Maximum number of results to return
+        updated (str): The date from which to fetch updated issues
+    Yields:
+        dict: The issue data.
+    """
+    client = get_client(base_url, email, api_token)
+    # Build JQL with incremental filter
+    incremental_jql = jql
+    if updated.start_value:
+        date_filter = f"updated >= '{updated.start_value}'"
+        # Check if JQL has ORDER BY clause and handle it properly
+        jql_upper = jql.upper()
+        if "ORDER BY" in jql_upper:
+            # Split at ORDER BY and add filter before it
+            order_by_index = jql_upper.find("ORDER BY")
+            main_query = jql[:order_by_index].strip()
+            order_clause = jql[order_by_index:].strip()
+            if main_query and (
+                "WHERE" in main_query.upper()
+                or "AND" in main_query.upper()
+                or "OR" in main_query.upper()
+            ):
+                incremental_jql = f"({main_query}) AND {date_filter} {order_clause}"
+            else:
+                if main_query:
+                    incremental_jql = f"{main_query} AND {date_filter} {order_clause}"
+                else:
+                    incremental_jql = f"{date_filter} {order_clause}"
+        else:
+            # No ORDER BY clause, use original logic
+            if "WHERE" in jql_upper or "AND" in jql_upper or "OR" in jql_upper:
+                incremental_jql = f"({jql}) AND {date_filter}"
+            else:
+                incremental_jql = f"{jql} AND {date_filter}"
+    # Use default fields if not specified
+    if fields is None:
+        fields = ",".join(ISSUE_FIELDS)
+    yield from client.search_issues(
+        jql=incremental_jql, fields=fields, expand=expand, max_results=max_results
+    )
+@dlt.resource(write_disposition="replace")
+def users(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+    username: Optional[str] = None,
+    account_id: Optional[str] = None,
+    max_results: int = DEFAULT_PAGE_SIZE,
+) -> Iterable[TDataItem]:
+    """
+    Fetches users from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+        username (str): Username to search for
+        account_id (str): Account ID to search for
+        max_results (int): Maximum results per page
+    Yields:
+        dict: The user data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_users(
+        username=username, account_id=account_id, max_results=max_results
+    )
+@dlt.resource(write_disposition="replace")
+def issue_types(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all issue types from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Yields:
+        dict: The issue type data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_issue_types()
+@dlt.resource(write_disposition="replace")
+def statuses(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all statuses from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Yields:
+        dict: The status data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_statuses()
+@dlt.resource(write_disposition="replace")
+def priorities(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all priorities from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Yields:
+        dict: The priority data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_priorities()
+@dlt.resource(write_disposition="replace")
+def resolutions(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all resolutions from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Yields:
+        dict: The resolution data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_resolutions()
+@dlt.transformer(
+    data_from=projects,
+    write_disposition="replace",
+)
+@dlt.defer
+def project_versions(
+    project: TDataItem,
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches versions for each project from Jira.
+    Args:
+        project (dict): The project data.
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Returns:
+        list[dict]: The version data for the given project.
+    """
+    client = get_client(base_url, email, api_token)
+    project_key = project.get("key")
+    if not project_key:
+        return []
+    return list(client.get_project_versions(project_key))
+@dlt.transformer(
+    data_from=projects,
+    write_disposition="replace",
+)
+@dlt.defer
+def project_components(
+    project: TDataItem,
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches components for each project from Jira.
+    Args:
+        project (dict): The project data.
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Returns:
+        list[dict]: The component data for the given project.
+    """
+    client = get_client(base_url, email, api_token)
+    project_key = project.get("key")
+    if not project_key:
+        return []
+    return list(client.get_project_components(project_key))

ingestr 0.14.1__py3-none-any.whl → 0.14.3__py3-none-any.whl

Potentially problematic release.

ingestr 0.14.1py3-none-any.whl → 0.14.3py3-none-any.whl