PyPI - ingestr - Versions diffs - 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl - Mend

ingestr 0.14.2py3-none-any.whl → 0.14.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (19) hide show

ingestr/src/buildinfo.py +1 -1
ingestr/src/destinations.py +1 -24
ingestr/src/elasticsearch/helpers.py +35 -9
ingestr/src/factory.py +2 -0
ingestr/src/fluxx/__init__.py +9 -0
ingestr/src/freshdesk/__init__.py +2 -0
ingestr/src/freshdesk/freshdesk_client.py +15 -1
ingestr/src/hubspot/__init__.py +6 -12
ingestr/src/intercom/settings.py +3 -1
ingestr/src/jira_source/__init__.py +314 -0
ingestr/src/jira_source/helpers.py +452 -0
ingestr/src/jira_source/settings.py +170 -0
ingestr/src/mongodb/helpers.py +34 -6
ingestr/src/sources.py +55 -0
{ingestr-0.14.2.dist-info → ingestr-0.14.4.dist-info}/METADATA +1 -1
{ingestr-0.14.2.dist-info → ingestr-0.14.4.dist-info}/RECORD +19 -16
{ingestr-0.14.2.dist-info → ingestr-0.14.4.dist-info}/WHEEL +0 -0
{ingestr-0.14.2.dist-info → ingestr-0.14.4.dist-info}/entry_points.txt +0 -0
{ingestr-0.14.2.dist-info → ingestr-0.14.4.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/buildinfo.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "v0.14.2"
1	+ version = "v0.14.4"

ingestr/src/destinations.py CHANGED Viewed

@@ -813,30 +813,7 @@ class ElasticsearchDestination:
 class MongoDBDestination:
     def dlt_dest(self, uri: str, **kwargs):
-        from urllib.parse import urlparse
-        parsed_uri = urlparse(uri)
-        # Extract connection details from URI
-        host = parsed_uri.hostname or "localhost"
-        port = parsed_uri.port or 27017
-        username = parsed_uri.username
-        password = parsed_uri.password
-        database = (
-            parsed_uri.path.lstrip("/") if parsed_uri.path.lstrip("/") else "ingestr_db"
-        )
-        # Build connection string
-        if username and password:
-            connection_string = f"mongodb://{username}:{password}@{host}:{port}"
-        else:
-            connection_string = f"mongodb://{host}:{port}"
-        # Add query parameters if any
-        if parsed_uri.query:
-            connection_string += f"?{parsed_uri.query}"
-        return mongodb_insert(connection_string, database)
+        return mongodb_insert(uri)
     def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
         return {

ingestr/src/elasticsearch/helpers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Elasticsearch destination helpers"""
 import json
+import logging
 from typing import Any, Dict, Iterator
 from urllib.parse import urlparse
@@ -9,6 +10,10 @@ import dlt
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import bulk
+# Suppress Elasticsearch transport logging
+logging.getLogger("elasticsearch.transport").setLevel(logging.WARNING)
+logging.getLogger("elastic_transport.transport").setLevel(logging.WARNING)
 def process_file_items(file_path: str) -> Iterator[Dict[str, Any]]:
     """Process items from a file path (JSONL format)."""
@@ -52,15 +57,36 @@ def elasticsearch_insert(
     parsed = urlparse(connection_string)
     # Build Elasticsearch client configuration
-    hosts = [
-        {
-            "host": parsed.hostname or "localhost",
-            "port": parsed.port or 9200,
-            "scheme": parsed.scheme or "http",
-        }
-    ]
-    es_config: Dict[str, Any] = {"hosts": hosts}
+    actual_url = connection_string
+    secure = True  # Default to HTTPS (secure by default)
+    if connection_string.startswith("elasticsearch://"):
+        actual_url = connection_string.replace("elasticsearch://", "")
+        # Parse to check for query parameters
+        temp_parsed = urlparse("http://" + actual_url)
+        from urllib.parse import parse_qs
+        query_params = parse_qs(temp_parsed.query)
+        # Check ?secure parameter (defaults to true)
+        if "secure" in query_params:
+            secure = query_params["secure"][0].lower() in ["true", "1", "yes"]
+        # Remove query params from URL for ES client
+        actual_url = actual_url.split("?")[0]
+        # Add scheme
+        scheme = "https" if secure else "http"
+        actual_url = f"{scheme}://{actual_url}"
+        parsed = urlparse(actual_url)
+    es_config: Dict[str, Any] = {
+        "hosts": [actual_url],
+        "verify_certs": secure,
+        "ssl_show_warn": False,
+    }
     # Add authentication if present
     if parsed.username and parsed.password:

ingestr/src/factory.py CHANGED Viewed

@@ -56,6 +56,7 @@ from ingestr.src.sources import (
     InfluxDBSource,
     IntercomSource,
     IsocPulseSource,
+    JiraSource,
     KafkaSource,
     KinesisSource,
     KlaviyoSource,
@@ -168,6 +169,7 @@ class SourceDestinationFactory:
         "slack": SlackSource,
         "hubspot": HubspotSource,
         "intercom": IntercomSource,
+        "jira": JiraSource,
         "airtable": AirtableSource,
         "klaviyo": KlaviyoSource,
         "mixpanel": MixpanelSource,

ingestr/src/fluxx/__init__.py CHANGED Viewed

@@ -96,6 +96,15 @@ FLUXX_RESOURCES = {
             "workflow_events": {"data_type": "json", "field_type": "relation"},
         },
     },
+    "alert_email": {
+        "endpoint": "alert_email",
+        "fields": {
+            "alert_id": {"data_type": "bigint", "field_type": "column"},
+            "created_at": {"data_type": "timestamp", "field_type": "column"},
+            "id": {"data_type": "bigint", "field_type": "column"},
+            "updated_at": {"data_type": "timestamp", "field_type": "column"},
+        },
+    },
     "affiliate": {
         "endpoint": "affiliate",
         "fields": {

ingestr/src/freshdesk/__init__.py CHANGED Viewed

@@ -20,6 +20,7 @@ def freshdesk_source(
     end_date: Optional[pendulum.DateTime] = None,
     per_page: int = 100,
     endpoints: Optional[List[str]] = None,
+    query: Optional[str] = None,
 ) -> Iterable[DltResource]:
     """
     Retrieves data from specified Freshdesk API endpoints.
@@ -72,6 +73,7 @@ def freshdesk_source(
             per_page=per_page,
             start_date=start_date,
             end_date=end_date,
+            query=query,
         )
     # Set default endpoints if not provided

ingestr/src/freshdesk/freshdesk_client.py CHANGED Viewed

@@ -2,7 +2,7 @@
 import logging
 import time
-from typing import Any, Dict, Iterable
+from typing import Any, Dict, Iterable, Optional
 import pendulum
 from dlt.common.typing import TDataItem
@@ -70,6 +70,7 @@ class FreshdeskClient:
         per_page: int,
         start_date: pendulum.DateTime,
         end_date: pendulum.DateTime,
+        query: Optional[str] = None,
     ) -> Iterable[TDataItem]:
         """
         Fetches a paginated response from a specified endpoint.
@@ -79,6 +80,9 @@ class FreshdeskClient:
         updated at the specified timestamp.
         """
         page = 1
+        if query is not None:
+            query = query.replace('"', "").strip()
         while True:
             # Construct the URL for the specific endpoint
             url = f"{self.base_url}/{endpoint}"
@@ -93,11 +97,21 @@ class FreshdeskClient:
                 params[param_key] = start_date.to_iso8601_string()
+            if query and endpoint == "tickets":
+                url = f"{self.base_url}/search/tickets"
+                params = {
+                    "query": f'"{query}"',
+                    "page": page,
+                }
             # Handle requests with rate-limiting
             # A maximum of 300 pages (30000 tickets) will be returned.
             response = self._request_with_rate_limit(url, params=params)
             data = response.json()
+            if query and endpoint == "tickets":
+                data = data["results"]
             if not data:
                 break  # Stop if no data or max page limit reached

ingestr/src/hubspot/__init__.py CHANGED Viewed

@@ -93,7 +93,6 @@ def hubspot(
     def companies(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_COMPANY_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot companies resource"""
@@ -101,7 +100,7 @@ def hubspot(
             "company",
             api_key,
             include_history=include_history,
-            props=props,
+            props=DEFAULT_COMPANY_PROPS,
             include_custom_props=include_custom_props,
         )
@@ -109,7 +108,6 @@ def hubspot(
     def contacts(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_CONTACT_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot contacts resource"""
@@ -117,7 +115,7 @@ def hubspot(
             "contact",
             api_key,
             include_history,
-            props,
+            DEFAULT_CONTACT_PROPS,
             include_custom_props,
         )
@@ -125,7 +123,6 @@ def hubspot(
     def deals(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_DEAL_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot deals resource"""
@@ -133,7 +130,7 @@ def hubspot(
             "deal",
             api_key,
             include_history,
-            props,
+            DEFAULT_DEAL_PROPS,
             include_custom_props,
         )
@@ -141,7 +138,6 @@ def hubspot(
     def tickets(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_TICKET_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot tickets resource"""
@@ -149,7 +145,7 @@ def hubspot(
             "ticket",
             api_key,
             include_history,
-            props,
+            DEFAULT_TICKET_PROPS,
             include_custom_props,
         )
@@ -157,7 +153,6 @@ def hubspot(
     def products(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_PRODUCT_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot products resource"""
@@ -165,7 +160,7 @@ def hubspot(
             "product",
             api_key,
             include_history,
-            props,
+            DEFAULT_PRODUCT_PROPS,
             include_custom_props,
         )
@@ -180,7 +175,6 @@ def hubspot(
     def quotes(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_QUOTE_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot quotes resource"""
@@ -188,7 +182,7 @@ def hubspot(
             "quote",
             api_key,
             include_history,
-            props,
+            DEFAULT_QUOTE_PROPS,
             include_custom_props,
         )

ingestr/src/intercom/settings.py CHANGED Viewed

@@ -126,7 +126,9 @@ RESOURCE_CONFIGS = {
         "pagination_type": "cursor",
         "incremental": False,
         "transform_func": None,
-        "columns": {},
+        "columns": {
+            "id": {"data_type": "bigint", "nullable": True},
+        },
     },
 }

ingestr/src/jira_source/__init__.py ADDED Viewed

@@ -0,0 +1,314 @@
+"""
+This source provides data extraction from Jira Cloud via the REST API v3.
+It defines several functions to fetch data from different parts of Jira including
+projects, issues, users, boards, sprints, and various configuration objects like
+issue types, statuses, and priorities.
+"""
+from typing import Any, Iterable, Optional
+import dlt
+from dlt.common.typing import TDataItem
+from .helpers import get_client
+from .settings import (
+    DEFAULT_PAGE_SIZE,
+    DEFAULT_START_DATE,
+    ISSUE_FIELDS,
+)
+@dlt.source
+def jira_source() -> Any:
+    """
+    The main function that runs all the other functions to fetch data from Jira.
+    Returns:
+        Sequence[DltResource]: A sequence of DltResource objects containing the fetched data.
+    """
+    return [
+        projects,
+        issues,
+        users,
+        issue_types,
+        statuses,
+        priorities,
+        resolutions,
+        project_versions,
+        project_components,
+    ]
+@dlt.resource(write_disposition="replace")
+def projects(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+    expand: Optional[str] = None,
+    recent: Optional[int] = None,
+) -> Iterable[TDataItem]:
+    """
+    Fetches and returns a list of projects from Jira.
+    Args:
+        base_url (str): Jira instance URL (e.g., https://your-domain.atlassian.net)
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+        expand (str): Comma-separated list of fields to expand
+        recent (int): Number of recent projects to return
+    Yields:
+        dict: The project data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_projects(expand=expand, recent=recent)
+@dlt.resource(write_disposition="merge", primary_key="id")
+def issues(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+    jql: str = "order by updated DESC",
+    fields: Optional[str] = None,
+    expand: Optional[str] = None,
+    max_results: Optional[int] = None,
+    updated: dlt.sources.incremental[str] = dlt.sources.incremental(
+        "fields.updated",
+        initial_value=DEFAULT_START_DATE,
+        range_end="closed",
+        range_start="closed",
+    ),
+) -> Iterable[TDataItem]:
+    """
+    Fetches issues from Jira using JQL search.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+        jql (str): JQL query string
+        fields (str): Comma-separated list of fields to return
+        expand (str): Comma-separated list of fields to expand
+        max_results (int): Maximum number of results to return
+        updated (str): The date from which to fetch updated issues
+    Yields:
+        dict: The issue data.
+    """
+    client = get_client(base_url, email, api_token)
+    # Build JQL with incremental filter
+    incremental_jql = jql
+    if updated.start_value:
+        date_filter = f"updated >= '{updated.start_value}'"
+        # Check if JQL has ORDER BY clause and handle it properly
+        jql_upper = jql.upper()
+        if "ORDER BY" in jql_upper:
+            # Split at ORDER BY and add filter before it
+            order_by_index = jql_upper.find("ORDER BY")
+            main_query = jql[:order_by_index].strip()
+            order_clause = jql[order_by_index:].strip()
+            if main_query and (
+                "WHERE" in main_query.upper()
+                or "AND" in main_query.upper()
+                or "OR" in main_query.upper()
+            ):
+                incremental_jql = f"({main_query}) AND {date_filter} {order_clause}"
+            else:
+                if main_query:
+                    incremental_jql = f"{main_query} AND {date_filter} {order_clause}"
+                else:
+                    incremental_jql = f"{date_filter} {order_clause}"
+        else:
+            # No ORDER BY clause, use original logic
+            if "WHERE" in jql_upper or "AND" in jql_upper or "OR" in jql_upper:
+                incremental_jql = f"({jql}) AND {date_filter}"
+            else:
+                incremental_jql = f"{jql} AND {date_filter}"
+    # Use default fields if not specified
+    if fields is None:
+        fields = ",".join(ISSUE_FIELDS)
+    yield from client.search_issues(
+        jql=incremental_jql, fields=fields, expand=expand, max_results=max_results
+    )
+@dlt.resource(write_disposition="replace")
+def users(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+    username: Optional[str] = None,
+    account_id: Optional[str] = None,
+    max_results: int = DEFAULT_PAGE_SIZE,
+) -> Iterable[TDataItem]:
+    """
+    Fetches users from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+        username (str): Username to search for
+        account_id (str): Account ID to search for
+        max_results (int): Maximum results per page
+    Yields:
+        dict: The user data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_users(
+        username=username, account_id=account_id, max_results=max_results
+    )
+@dlt.resource(write_disposition="replace")
+def issue_types(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all issue types from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Yields:
+        dict: The issue type data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_issue_types()
+@dlt.resource(write_disposition="replace")
+def statuses(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all statuses from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Yields:
+        dict: The status data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_statuses()
+@dlt.resource(write_disposition="replace")
+def priorities(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all priorities from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Yields:
+        dict: The priority data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_priorities()
+@dlt.resource(write_disposition="replace")
+def resolutions(
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all resolutions from Jira.
+    Args:
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Yields:
+        dict: The resolution data.
+    """
+    client = get_client(base_url, email, api_token)
+    yield from client.get_resolutions()
+@dlt.transformer(
+    data_from=projects,
+    write_disposition="replace",
+)
+@dlt.defer
+def project_versions(
+    project: TDataItem,
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches versions for each project from Jira.
+    Args:
+        project (dict): The project data.
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Returns:
+        list[dict]: The version data for the given project.
+    """
+    client = get_client(base_url, email, api_token)
+    project_key = project.get("key")
+    if not project_key:
+        return []
+    return list(client.get_project_versions(project_key))
+@dlt.transformer(
+    data_from=projects,
+    write_disposition="replace",
+)
+@dlt.defer
+def project_components(
+    project: TDataItem,
+    base_url: str = dlt.secrets.value,
+    email: str = dlt.secrets.value,
+    api_token: str = dlt.secrets.value,
+) -> Iterable[TDataItem]:
+    """
+    Fetches components for each project from Jira.
+    Args:
+        project (dict): The project data.
+        base_url (str): Jira instance URL
+        email (str): User email for authentication
+        api_token (str): API token for authentication
+    Returns:
+        list[dict]: The component data for the given project.
+    """
+    client = get_client(base_url, email, api_token)
+    project_key = project.get("key")
+    if not project_key:
+        return []
+    return list(client.get_project_components(project_key))

ingestr 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl

Potentially problematic release.

ingestr 0.14.2py3-none-any.whl → 0.14.4py3-none-any.whl