PyPI - ingestr - Versions diffs - 0.13.75__py3-none-any.whl → 0.14.98__py3-none-any.whl - Mend

ingestr 0.13.75py3-none-any.whl → 0.14.98py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (79) hide show

ingestr/main.py +22 -3
ingestr/src/adjust/__init__.py +4 -4
ingestr/src/allium/__init__.py +128 -0
ingestr/src/anthropic/__init__.py +277 -0
ingestr/src/anthropic/helpers.py +525 -0
ingestr/src/appstore/__init__.py +1 -0
ingestr/src/asana_source/__init__.py +1 -1
ingestr/src/buildinfo.py +1 -1
ingestr/src/chess/__init__.py +1 -1
ingestr/src/couchbase_source/__init__.py +118 -0
ingestr/src/couchbase_source/helpers.py +135 -0
ingestr/src/cursor/__init__.py +83 -0
ingestr/src/cursor/helpers.py +188 -0
ingestr/src/destinations.py +169 -1
ingestr/src/docebo/__init__.py +589 -0
ingestr/src/docebo/client.py +435 -0
ingestr/src/docebo/helpers.py +97 -0
ingestr/src/elasticsearch/helpers.py +138 -0
ingestr/src/errors.py +8 -0
ingestr/src/facebook_ads/__init__.py +26 -23
ingestr/src/facebook_ads/helpers.py +47 -1
ingestr/src/factory.py +48 -0
ingestr/src/filesystem/__init__.py +8 -3
ingestr/src/filters.py +9 -0
ingestr/src/fluxx/__init__.py +9906 -0
ingestr/src/fluxx/helpers.py +209 -0
ingestr/src/frankfurter/__init__.py +157 -163
ingestr/src/frankfurter/helpers.py +3 -3
ingestr/src/freshdesk/__init__.py +25 -8
ingestr/src/freshdesk/freshdesk_client.py +40 -5
ingestr/src/fundraiseup/__init__.py +49 -0
ingestr/src/fundraiseup/client.py +81 -0
ingestr/src/github/__init__.py +6 -4
ingestr/src/google_analytics/__init__.py +1 -1
ingestr/src/hostaway/__init__.py +302 -0
ingestr/src/hostaway/client.py +288 -0
ingestr/src/http/__init__.py +35 -0
ingestr/src/http/readers.py +114 -0
ingestr/src/hubspot/__init__.py +6 -12
ingestr/src/influxdb/__init__.py +1 -0
ingestr/src/intercom/__init__.py +142 -0
ingestr/src/intercom/helpers.py +674 -0
ingestr/src/intercom/settings.py +279 -0
ingestr/src/jira_source/__init__.py +340 -0
ingestr/src/jira_source/helpers.py +439 -0
ingestr/src/jira_source/settings.py +170 -0
ingestr/src/klaviyo/__init__.py +5 -5
ingestr/src/linear/__init__.py +553 -116
ingestr/src/linear/helpers.py +77 -38
ingestr/src/mailchimp/__init__.py +126 -0
ingestr/src/mailchimp/helpers.py +226 -0
ingestr/src/mailchimp/settings.py +164 -0
ingestr/src/masking.py +344 -0
ingestr/src/monday/__init__.py +246 -0
ingestr/src/monday/helpers.py +392 -0
ingestr/src/monday/settings.py +328 -0
ingestr/src/mongodb/__init__.py +5 -2
ingestr/src/mongodb/helpers.py +384 -10
ingestr/src/plusvibeai/__init__.py +335 -0
ingestr/src/plusvibeai/helpers.py +544 -0
ingestr/src/plusvibeai/settings.py +252 -0
ingestr/src/revenuecat/__init__.py +83 -0
ingestr/src/revenuecat/helpers.py +237 -0
ingestr/src/salesforce/__init__.py +15 -8
ingestr/src/shopify/__init__.py +1 -1
ingestr/src/smartsheets/__init__.py +33 -5
ingestr/src/socrata_source/__init__.py +83 -0
ingestr/src/socrata_source/helpers.py +85 -0
ingestr/src/socrata_source/settings.py +8 -0
ingestr/src/sources.py +1418 -54
ingestr/src/stripe_analytics/__init__.py +2 -19
ingestr/src/wise/__init__.py +68 -0
ingestr/src/wise/client.py +63 -0
ingestr/tests/unit/test_smartsheets.py +6 -9
{ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/METADATA +24 -12
{ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/RECORD +79 -37
{ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/WHEEL +0 -0
{ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/entry_points.txt +0 -0
{ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/sources.py CHANGED Viewed

@@ -73,6 +73,20 @@ class SqlSource:
         engine_adapter_callback = None
+        if uri.startswith("md://") or uri.startswith("motherduck://"):
+            parsed_uri = urlparse(uri)
+            query_params = parse_qs(parsed_uri.query)
+            # Convert md:// URI to duckdb:///md: format
+            if parsed_uri.path:
+                db_path = parsed_uri.path
+            else:
+                db_path = ""
+            token = query_params.get("token", [""])[0]
+            if not token:
+                raise ValueError("Token is required for MotherDuck connection")
+            uri = f"duckdb:///md:{db_path}?motherduck_token={token}"
         if uri.startswith("mysql://"):
             uri = uri.replace("mysql://", "mysql+pymysql://")
@@ -223,6 +237,9 @@ class SqlSource:
                 backend_kwargs: Dict[str, Any] = None,  # type: ignore
                 type_adapter_callback: Optional[TTypeAdapter] = None,
                 included_columns: Optional[List[str]] = None,
+                excluded_columns: Optional[
+                    List[str]
+                ] = None,  # Added for dlt 1.16.0 compatibility
                 query_adapter_callback: Optional[TQueryAdapter] = None,
                 resolve_foreign_keys: bool = False,
             ) -> Iterator[TDataItem]:
@@ -409,31 +426,187 @@ class MongoDbSource:
         return False
     def dlt_source(self, uri: str, table: str, **kwargs):
-        table_fields = table_string_to_dataclass(table)
+        # Check if this is a custom query format (collection:query)
+        if ":" in table:
+            collection_name, query_json = table.split(":", 1)
-        incremental = None
-        if kwargs.get("incremental_key"):
-            start_value = kwargs.get("interval_start")
-            end_value = kwargs.get("interval_end")
+            # Parse the query using MongoDB's extended JSON parser
+            # First, convert MongoDB shell syntax to Extended JSON format
+            from bson import json_util
-            incremental = dlt_incremental(
-                kwargs.get("incremental_key", ""),
-                initial_value=start_value,
-                end_value=end_value,
-                range_end="closed",
-                range_start="closed",
+            from ingestr.src.mongodb.helpers import convert_mongo_shell_to_extended_json
+            # Convert MongoDB shell constructs to Extended JSON v2 format
+            converted_query = convert_mongo_shell_to_extended_json(query_json)
+            try:
+                query = json_util.loads(converted_query)
+            except Exception as e:
+                raise ValueError(f"Invalid MongoDB query format: {e}")
+            # Validate that it's a list for aggregation pipeline
+            if not isinstance(query, list):
+                raise ValueError(
+                    "Query must be a JSON array representing a MongoDB aggregation pipeline"
+                )
+            # Check for incremental load requirements
+            incremental = None
+            if kwargs.get("incremental_key"):
+                start_value = kwargs.get("interval_start")
+                end_value = kwargs.get("interval_end")
+                # Validate that incremental key is present in the pipeline
+                incremental_key = kwargs.get("incremental_key")
+                self._validate_incremental_query(query, str(incremental_key))
+                incremental = dlt_incremental(
+                    str(incremental_key),
+                    initial_value=start_value,
+                    end_value=end_value,
+                )
+                # Substitute interval parameters in the query
+                query = self._substitute_interval_params(query, kwargs)
+            # Parse collection name to get database and collection
+            if "." in collection_name:
+                # Handle database.collection format
+                table_fields = table_string_to_dataclass(collection_name)
+                database = table_fields.dataset
+                collection = table_fields.table
+            else:
+                # Single collection name, use default database
+                database = None
+                collection = collection_name
+            table_instance = self.table_builder(
+                connection_url=uri,
+                database=database,
+                collection=collection,
+                parallel=False,
+                incremental=incremental,
+                custom_query=query,
+            )
+            table_instance.max_table_nesting = 1
+            return table_instance
+        else:
+            # Default behavior for simple collection names
+            table_fields = table_string_to_dataclass(table)
+            incremental = None
+            if kwargs.get("incremental_key"):
+                start_value = kwargs.get("interval_start")
+                end_value = kwargs.get("interval_end")
+                incremental = dlt_incremental(
+                    kwargs.get("incremental_key", ""),
+                    initial_value=start_value,
+                    end_value=end_value,
+                )
+            table_instance = self.table_builder(
+                connection_url=uri,
+                database=table_fields.dataset,
+                collection=table_fields.table,
+                parallel=False,
+                incremental=incremental,
             )
+            table_instance.max_table_nesting = 1
+            return table_instance
+    def _validate_incremental_query(self, query: list, incremental_key: str):
+        """Validate that incremental key is projected in the aggregation pipeline"""
+        # Check if there's a $project stage and if incremental_key is included
+        has_project = False
+        incremental_key_projected = False
+        for stage in query:
+            if "$project" in stage:
+                has_project = True
+                project_stage = stage["$project"]
+                if isinstance(project_stage, dict):
+                    # Check if incremental_key is explicitly included
+                    if incremental_key in project_stage:
+                        if project_stage[incremental_key] not in [0, False]:
+                            incremental_key_projected = True
+                    # If there are only inclusions (1 or True values) and incremental_key is not included
+                    elif any(v in [1, True] for v in project_stage.values()):
+                        # This is an inclusion projection, incremental_key must be explicitly included
+                        incremental_key_projected = False
+                    # If there are only exclusions (0 or False values) and incremental_key is not excluded
+                    elif all(
+                        v in [0, False]
+                        for v in project_stage.values()
+                        if v in [0, False, 1, True]
+                    ):
+                        # This is an exclusion projection, incremental_key is included by default
+                        if incremental_key not in project_stage:
+                            incremental_key_projected = True
+                        else:
+                            incremental_key_projected = project_stage[
+                                incremental_key
+                            ] not in [0, False]
+                    else:
+                        # Mixed or unclear projection, assume incremental_key needs to be explicit
+                        incremental_key_projected = False
-        table_instance = self.table_builder(
-            connection_url=uri,
-            database=table_fields.dataset,
-            collection=table_fields.table,
-            parallel=True,
-            incremental=incremental,
-        )
-        table_instance.max_table_nesting = 1
+        # If there's a $project stage but incremental_key is not projected, raise error
+        if has_project and not incremental_key_projected:
+            raise ValueError(
+                f"Incremental key '{incremental_key}' must be included in the projected fields of the aggregation pipeline"
+            )
-        return table_instance
+    def _substitute_interval_params(self, query: list, kwargs: dict):
+        """Substitute :interval_start and :interval_end placeholders with actual datetime values"""
+        from dlt.common.time import ensure_pendulum_datetime
+        # Get interval values and convert them to datetime objects
+        interval_start = kwargs.get("interval_start")
+        interval_end = kwargs.get("interval_end")
+        # Convert string dates to datetime objects if needed
+        if interval_start is not None:
+            if isinstance(interval_start, str):
+                pendulum_dt = ensure_pendulum_datetime(interval_start)
+                interval_start = (
+                    pendulum_dt.to_datetime()
+                    if hasattr(pendulum_dt, "to_datetime")
+                    else pendulum_dt
+                )
+            elif hasattr(interval_start, "to_datetime"):
+                interval_start = interval_start.to_datetime()
+        if interval_end is not None:
+            if isinstance(interval_end, str):
+                pendulum_dt = ensure_pendulum_datetime(interval_end)
+                interval_end = (
+                    pendulum_dt.to_datetime()
+                    if hasattr(pendulum_dt, "to_datetime")
+                    else pendulum_dt
+                )
+            elif hasattr(interval_end, "to_datetime"):
+                interval_end = interval_end.to_datetime()
+        # Deep copy the query and replace placeholders with actual datetime objects
+        def replace_placeholders(obj):
+            if isinstance(obj, dict):
+                result = {}
+                for key, value in obj.items():
+                    if value == ":interval_start" and interval_start is not None:
+                        result[key] = interval_start
+                    elif value == ":interval_end" and interval_end is not None:
+                        result[key] = interval_end
+                    else:
+                        result[key] = replace_placeholders(value)
+                return result
+            elif isinstance(obj, list):
+                return [replace_placeholders(item) for item in obj]
+            else:
+                return obj
+        return replace_placeholders(query)
 class LocalCsvSource:
@@ -538,6 +711,11 @@ class ShopifySource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Shopify takes care of incrementality on its own, you should not provide incremental_key"
+            )
         source_fields = urlparse(uri)
         source_params = parse_qs(source_fields.query)
         api_key = source_params.get("api_key")
@@ -839,6 +1017,16 @@ class FacebookAdsSource:
             facebook_insights_source,
         )
+        insights_max_wait_to_finish_seconds = source_params.get(
+            "insights_max_wait_to_finish_seconds", [60 * 60 * 4]
+        )
+        insights_max_wait_to_start_seconds = source_params.get(
+            "insights_max_wait_to_start_seconds", [60 * 30]
+        )
+        insights_max_async_sleep_seconds = source_params.get(
+            "insights_max_async_sleep_seconds", [20]
+        )
         endpoint = None
         if table in ["campaigns", "ad_sets", "ad_creatives", "ads", "leads"]:
             endpoint = table
@@ -848,6 +1036,13 @@ class FacebookAdsSource:
                 account_id=account_id[0],
                 start_date=kwargs.get("interval_start"),
                 end_date=kwargs.get("interval_end"),
+                insights_max_wait_to_finish_seconds=insights_max_wait_to_finish_seconds[
+                    0
+                ],
+                insights_max_wait_to_start_seconds=insights_max_wait_to_start_seconds[
+                    0
+                ],
+                insights_max_async_sleep_seconds=insights_max_async_sleep_seconds[0],
             ).with_resources("facebook_insights")
         elif table.startswith("facebook_insights:"):
             # Parse custom breakdowns and metrics from table name
@@ -868,35 +1063,19 @@ class FacebookAdsSource:
                 )
             # Validate breakdown type against available options from settings
-            import typing
-            from ingestr.src.facebook_ads.settings import TInsightsBreakdownOptions
-            # Get valid breakdown options from the type definition
-            valid_breakdowns = list(typing.get_args(TInsightsBreakdownOptions))
-            if breakdown_type not in valid_breakdowns:
-                raise ValueError(
-                    f"Invalid breakdown type '{breakdown_type}'. Valid options: {', '.join(valid_breakdowns)}"
-                )
+            from ingestr.src.facebook_ads.helpers import (
+                parse_insights_table_to_source_kwargs,
+            )
             source_kwargs = {
                 "access_token": access_token[0],
                 "account_id": account_id[0],
                 "start_date": kwargs.get("interval_start"),
                 "end_date": kwargs.get("interval_end"),
-                "breakdowns": breakdown_type,
             }
-            # If custom metrics are provided, parse them
-            if len(parts) == 3:
-                fields = [f.strip() for f in parts[2].split(",") if f.strip()]
-                if not fields:
-                    raise ValueError(
-                        "Custom metrics must be provided after the second colon in format: facebook_insights:breakdown_type:metric1,metric2..."
-                    )
-                source_kwargs["fields"] = fields
+            source_kwargs.update(parse_insights_table_to_source_kwargs(table))
             return facebook_insights_source(**source_kwargs).with_resources(
                 "facebook_insights"
             )
@@ -961,7 +1140,7 @@ class SlackSource:
 class HubspotSource:
     def handles_incrementality(self) -> bool:
-        return True
+        return False
     # hubspot://?api_key=<api_key>
     def dlt_source(self, uri: str, table: str, **kwargs):
@@ -1488,6 +1667,11 @@ class TikTokSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "TikTok takes care of incrementality on its own, you should not provide incremental_key"
+            )
         endpoint = "custom_reports"
         parsed_uri = urlparse(uri)
@@ -1634,6 +1818,64 @@ class AsanaSource:
         return src.with_resources(table)
+class JiraSource:
+    resources = [
+        "projects",
+        "issues",
+        "users",
+        "issue_types",
+        "statuses",
+        "priorities",
+        "resolutions",
+        "project_versions",
+        "project_components",
+        "events",
+    ]
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        base_url = f"https://{parsed_uri.netloc}"
+        email = params.get("email")
+        api_token = params.get("api_token")
+        if not email:
+            raise ValueError("email must be specified in the URI query parameters")
+        if not api_token:
+            raise ValueError("api_token is required for connecting to Jira")
+        flags = {
+            "skip_archived": False,
+        }
+        if ":" in table:
+            table, rest = table.split(":", 1)  # type: ignore
+            for k in rest.split(":"):
+                flags[k] = True
+        if table not in self.resources:
+            raise ValueError(
+                f"Resource '{table}' is not supported for Jira source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
+            )
+        import dlt
+        from ingestr.src.jira_source import jira_source
+        dlt.secrets["sources.jira_source.base_url"] = base_url
+        dlt.secrets["sources.jira_source.email"] = email[0]
+        dlt.secrets["sources.jira_source.api_token"] = api_token[0]
+        src = jira_source()
+        if flags["skip_archived"]:
+            src.projects.add_filter(lambda p: not p.get("archived", False))
+        return src.with_resources(table)
 class DynamoDBSource:
     AWS_ENDPOINT_PATTERN = re.compile(".*\.(.+)\.amazonaws\.com")
@@ -1703,6 +1945,72 @@ class DynamoDBSource:
         return dynamodb(table, creds, incremental)
+class DoceboSource:
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        # docebo://?base_url=https://yourcompany.docebosaas.com&client_id=xxx&client_secret=xxx
+        # Optional: &username=xxx&password=xxx for password grant type
+        if kwargs.get("incremental_key"):
+            raise ValueError("Incremental loads are not yet supported for Docebo")
+        parsed_uri = urlparse(uri)
+        source_params = parse_qs(parsed_uri.query)
+        base_url = source_params.get("base_url")
+        if not base_url:
+            raise ValueError("base_url is required to connect to Docebo")
+        client_id = source_params.get("client_id")
+        if not client_id:
+            raise ValueError("client_id is required to connect to Docebo")
+        client_secret = source_params.get("client_secret")
+        if not client_secret:
+            raise ValueError("client_secret is required to connect to Docebo")
+        # Username and password are optional (uses client_credentials grant if not provided)
+        username = source_params.get("username", [None])[0]
+        password = source_params.get("password", [None])[0]
+        # Supported tables
+        supported_tables = [
+            "users",
+            "courses",
+            "user_fields",
+            "branches",
+            "groups",
+            "group_members",
+            "course_fields",
+            "learning_objects",
+            "learning_plans",
+            "learning_plan_enrollments",
+            "learning_plan_course_enrollments",
+            "course_enrollments",
+            "sessions",
+            "categories",
+            "certifications",
+            "external_training",
+            "survey_answers",
+        ]
+        if table not in supported_tables:
+            raise ValueError(
+                f"Resource '{table}' is not supported for Docebo source. Supported tables: {', '.join(supported_tables)}"
+            )
+        from ingestr.src.docebo import docebo_source
+        return docebo_source(
+            base_url=base_url[0],
+            client_id=client_id[0],
+            client_secret=client_secret[0],
+            username=username,
+            password=password,
+        ).with_resources(table)
 class GoogleAnalyticsSource:
     def handles_incrementality(self) -> bool:
         return True
@@ -1710,6 +2018,11 @@ class GoogleAnalyticsSource:
     def dlt_source(self, uri: str, table: str, **kwargs):
         import ingestr.src.google_analytics.helpers as helpers
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Google Analytics takes care of incrementality on its own, you should not provide incremental_key"
+            )
         result = helpers.parse_google_analytics_uri(uri)
         credentials = result["credentials"]
         property_id = result["property_id"]
@@ -1817,7 +2130,7 @@ class GitHubSource:
             start_date = kwargs.get("interval_start") or pendulum.now().subtract(
                 days=30
             )
-            end_date = kwargs.get("interval_end") or pendulum.now()
+            end_date = kwargs.get("interval_end") or None
             if isinstance(start_date, str):
                 start_date = pendulum.parse(start_date)
@@ -2082,6 +2395,11 @@ class LinkedInAdsSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "LinkedIn Ads takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         source_fields = parse_qs(parsed_uri.query)
@@ -2165,6 +2483,11 @@ class ClickupSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "ClickUp takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         params = parse_qs(parsed_uri.query)
         api_token = params.get("api_token")
@@ -2249,6 +2572,11 @@ class ApplovinMaxSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "AppLovin Max takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         params = parse_qs(parsed_uri.query)
@@ -2320,6 +2648,7 @@ class SalesforceSource:
             "username": params.get("username", [None])[0],
             "password": params.get("password", [None])[0],
             "token": params.get("token", [None])[0],
+            "domain": params.get("domain", [None])[0],
         }
         for k, v in creds.items():
             if v is None:
@@ -2329,6 +2658,11 @@ class SalesforceSource:
         src = salesforce_source(**creds)  # type: ignore
+        if table.startswith("custom:"):
+            custom_object = table.split(":")[1]
+            src = salesforce_source(**creds, custom_object=custom_object)
+            return src.with_resources("custom")
         if table not in src.resources:
             raise UnsupportedResourceError(table, "Salesforce")
@@ -2341,6 +2675,11 @@ class PersonioSource:
     # applovin://?client_id=123&client_secret=123
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Personio takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         params = parse_qs(parsed_uri.query)
@@ -2431,6 +2770,11 @@ class PipedriveSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Pipedrive takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         params = parse_qs(parsed_uri.query)
         api_key = params.get("api_token")
@@ -2485,13 +2829,13 @@ class FrankfurterSource:
         if kwargs.get("interval_start"):
             start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
-            if kwargs.get("interval_end"):
-                end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
-            else:
-                end_date = pendulum.now()
         else:
-            start_date = pendulum.now()
-            end_date = pendulum.now()
+            start_date = pendulum.yesterday()
+        if kwargs.get("interval_end"):
+            end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
+        else:
+            end_date = None
         validate_dates(start_date=start_date, end_date=end_date)
@@ -2513,6 +2857,11 @@ class FreshdeskSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Freshdesk takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         domain = parsed_uri.netloc
         query = parsed_uri.query
@@ -2528,6 +2877,22 @@ class FreshdeskSource:
         if api_key is None:
             raise MissingValueError("api_key", "Freshdesk")
+        start_date = kwargs.get("interval_start")
+        if start_date is not None:
+            start_date = ensure_pendulum_datetime(start_date).in_tz("UTC")
+        else:
+            start_date = ensure_pendulum_datetime("2022-01-01T00:00:00Z")
+        end_date = kwargs.get("interval_end")
+        if end_date is not None:
+            end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
+        else:
+            end_date = None
+        custom_query: Optional[str] = None
+        if ":" in table:
+            table, custom_query = table.split(":", 1)
         if table not in [
             "agents",
             "companies",
@@ -2538,10 +2903,17 @@ class FreshdeskSource:
         ]:
             raise UnsupportedResourceError(table, "Freshdesk")
+        if custom_query and table != "tickets":
+            raise ValueError(f"Custom query is not supported for {table}")
         from ingestr.src.freshdesk import freshdesk_source
         return freshdesk_source(
-            api_secret_key=api_key[0], domain=domain
+            api_secret_key=api_key[0],
+            domain=domain,
+            start_date=start_date,
+            end_date=end_date,
+            query=custom_query,
         ).with_resources(table)
@@ -2551,6 +2923,11 @@ class TrustpilotSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Trustpilot takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         business_unit_id = parsed_uri.netloc
         params = parse_qs(parsed_uri.query)
@@ -2591,6 +2968,11 @@ class PhantombusterSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Phantombuster takes care of incrementality on its own, you should not provide incremental_key"
+            )
         # phantombuster://?api_key=<api_key>
         # source table = phantom_results:agent_id
         parsed_uri = urlparse(uri)
@@ -2684,7 +3066,7 @@ class ElasticsearchSource:
 class AttioSource:
     def handles_incrementality(self) -> bool:
-        return True
+        return False
     def dlt_source(self, uri: str, table: str, **kwargs):
         parsed_uri = urlparse(uri)
@@ -2744,6 +3126,11 @@ class SolidgateSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Solidgate takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         query_params = parse_qs(parsed_uri.query)
         public_key = query_params.get("public_key")
@@ -2837,6 +3224,11 @@ class QuickBooksSource:
     # quickbooks://?company_id=<company_id>&client_id=<client_id>&client_secret=<client_secret>&refresh_token=<refresh>&access_token=<access_token>&environment=<env>&minor_version=<version>
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "QuickBooks takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         params = parse_qs(parsed_uri.query)
@@ -2906,6 +3298,11 @@ class IsocPulseSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Internet Society Pulse takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         params = parse_qs(parsed_uri.query)
         token = params.get("token")
@@ -2941,6 +3338,11 @@ class PinterestSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Pinterest takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed = urlparse(uri)
         params = parse_qs(parsed.query)
         access_token = params.get("access_token")
@@ -2970,18 +3372,113 @@ class PinterestSource:
         ).with_resources(table)
+class FluxxSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Fluxx takes care of incrementality on its own, you should not provide incremental_key"
+            )
+        # Parse URI: fluxx://instance?client_id=xxx&client_secret=xxx
+        parsed_uri = urlparse(uri)
+        source_params = parse_qs(parsed_uri.query)
+        instance = parsed_uri.hostname
+        if not instance:
+            raise ValueError(
+                "Instance is required in the URI (e.g., fluxx://mycompany.preprod)"
+            )
+        client_id = source_params.get("client_id")
+        if not client_id:
+            raise ValueError("client_id in the URI is required to connect to Fluxx")
+        client_secret = source_params.get("client_secret")
+        if not client_secret:
+            raise ValueError("client_secret in the URI is required to connect to Fluxx")
+        # Parse date parameters
+        start_date = kwargs.get("interval_start")
+        if start_date:
+            start_date = ensure_pendulum_datetime(start_date)
+        end_date = kwargs.get("interval_end")
+        if end_date:
+            end_date = ensure_pendulum_datetime(end_date)
+        # Import Fluxx source
+        from ingestr.src.fluxx import fluxx_source
+        # Parse table specification for custom column selection
+        # Format: "resource_name:field1,field2,field3" or "resource_name"
+        resources = None
+        custom_fields = {}
+        if table:
+            # Handle single resource with custom fields or multiple resources
+            if ":" in table and table.count(":") == 1:
+                # Single resource with custom fields: "grant_request:id,name,amount"
+                resource_name, field_list = table.split(":", 1)
+                resource_name = resource_name.strip()
+                fields = [f.strip() for f in field_list.split(",")]
+                resources = [resource_name]
+                custom_fields[resource_name] = fields
+            else:
+                # Multiple resources or single resource without custom fields
+                # Support comma-separated list: "grant_request,user"
+                resources = [r.strip() for r in table.split(",")]
+        return fluxx_source(
+            instance=instance,
+            client_id=client_id[0],
+            client_secret=client_secret[0],
+            start_date=start_date,
+            end_date=end_date,
+            resources=resources,
+            custom_fields=custom_fields,
+        )
 class LinearSource:
     def handles_incrementality(self) -> bool:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Linear takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         params = parse_qs(parsed_uri.query)
         api_key = params.get("api_key")
         if api_key is None:
             raise MissingValueError("api_key", "Linear")
-        if table not in ["issues", "projects", "teams", "users"]:
+        if table not in [
+            "issues",
+            "projects",
+            "teams",
+            "users",
+            "workflow_states",
+            "cycles",
+            "attachments",
+            "comments",
+            "documents",
+            "external_users",
+            "initiative",
+            "integrations",
+            "labels",
+            "organization",
+            "project_updates",
+            "team_memberships",
+            "initiative_to_project",
+            "project_milestone",
+            "project_status",
+        ]:
             raise UnsupportedResourceError(table, "Linear")
         start_date = kwargs.get("interval_start")
@@ -3003,12 +3500,67 @@ class LinearSource:
         ).with_resources(table)
-class ZoomSource:
+class RevenueCatSource:
     def handles_incrementality(self) -> bool:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
-        parsed = urlparse(uri)
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "RevenueCat takes care of incrementality on its own, you should not provide incremental_key"
+            )
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        api_key = params.get("api_key")
+        if api_key is None:
+            raise MissingValueError("api_key", "RevenueCat")
+        project_id = params.get("project_id")
+        if project_id is None and table != "projects":
+            raise MissingValueError("project_id", "RevenueCat")
+        if table not in [
+            "customers",
+            "products",
+            "entitlements",
+            "offerings",
+            "subscriptions",
+            "purchases",
+            "projects",
+        ]:
+            raise UnsupportedResourceError(table, "RevenueCat")
+        start_date = kwargs.get("interval_start")
+        if start_date is not None:
+            start_date = ensure_pendulum_datetime(start_date)
+        else:
+            start_date = pendulum.datetime(2020, 1, 1).in_tz("UTC")
+        end_date = kwargs.get("interval_end")
+        if end_date is not None:
+            end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
+        from ingestr.src.revenuecat import revenuecat_source
+        return revenuecat_source(
+            api_key=api_key[0],
+            project_id=project_id[0] if project_id is not None else None,
+        ).with_resources(table)
+class ZoomSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Zoom takes care of incrementality on its own, you should not provide incremental_key"
+            )
+        parsed = urlparse(uri)
         params = parse_qs(parsed.query)
         client_id = params.get("client_id")
         client_secret = params.get("client_secret")
@@ -3049,6 +3601,11 @@ class InfluxDBSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "InfluxDB takes care of incrementality on its own, you should not provide incremental_key"
+            )
         parsed_uri = urlparse(uri)
         params = parse_qs(parsed_uri.query)
         host = parsed_uri.hostname
@@ -3056,7 +3613,7 @@ class InfluxDBSource:
         secure = params.get("secure", ["true"])[0].lower() != "false"
         scheme = "https" if secure else "http"
         if port:
             host_url = f"{scheme}://{host}:{port}"
         else:
@@ -3097,3 +3654,810 @@ class InfluxDBSource:
             start_date=start_date,
             end_date=end_date,
         ).with_resources(table)
+class WiseSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed = urlparse(uri)
+        params = parse_qs(parsed.query)
+        api_key = params.get("api_key")
+        if not api_key:
+            raise MissingValueError("api_key", "Wise")
+        if table not in ["profiles", "transfers", "balances"]:
+            raise ValueError(
+                f"Resource '{table}' is not supported for Wise source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
+            )
+        start_date = kwargs.get("interval_start")
+        if start_date:
+            start_date = ensure_pendulum_datetime(start_date).in_timezone("UTC")
+        else:
+            start_date = pendulum.datetime(2020, 1, 1).in_timezone("UTC")
+        end_date = kwargs.get("interval_end")
+        if end_date:
+            end_date = ensure_pendulum_datetime(end_date).in_timezone("UTC")
+        else:
+            end_date = None
+        from ingestr.src.wise import wise_source
+        return wise_source(
+            api_key=api_key[0],
+            start_date=start_date,
+            end_date=end_date,
+        ).with_resources(table)
+class FundraiseupSource:
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        api_key = params.get("api_key")
+        if api_key is None:
+            raise MissingValueError("api_key", "Fundraiseup")
+        if table not in [
+            "donations",
+            "events",
+            "fundraisers",
+            "recurring_plans",
+            "supporters",
+        ]:
+            raise UnsupportedResourceError(table, "Fundraiseup")
+        from ingestr.src.fundraiseup import fundraiseup_source
+        return fundraiseup_source(
+            api_key=api_key[0],
+        ).with_resources(table)
+class AnthropicSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        # anthropic://?api_key=<admin_api_key>
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        api_key = params.get("api_key")
+        if api_key is None:
+            raise MissingValueError("api_key", "Anthropic")
+        if table not in [
+            "claude_code_usage",
+            "usage_report",
+            "cost_report",
+            "organization",
+            "workspaces",
+            "api_keys",
+            "invites",
+            "users",
+            "workspace_members",
+        ]:
+            raise UnsupportedResourceError(table, "Anthropic")
+        # Get start and end dates from kwargs
+        start_date = kwargs.get("interval_start")
+        if start_date:
+            start_date = ensure_pendulum_datetime(start_date)
+        else:
+            # Default to 2023-01-01
+            start_date = pendulum.datetime(2023, 1, 1)
+        end_date = kwargs.get("interval_end")
+        if end_date:
+            end_date = ensure_pendulum_datetime(end_date)
+        else:
+            end_date = None
+        from ingestr.src.anthropic import anthropic_source
+        return anthropic_source(
+            api_key=api_key[0],
+            initial_start_date=start_date,
+            end_date=end_date,
+        ).with_resources(table)
+class PlusVibeAISource:
+    resources = [
+        "campaigns",
+        "leads",
+        "email_accounts",
+        "emails",
+        "blocklist",
+        "webhooks",
+        "tags",
+    ]
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        # plusvibeai://?api_key=<key>&workspace_id=<id>
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        api_key = params.get("api_key")
+        workspace_id = params.get("workspace_id")
+        if not api_key:
+            raise MissingValueError("api_key", "PlusVibeAI")
+        if not workspace_id:
+            raise MissingValueError("workspace_id", "PlusVibeAI")
+        if table not in self.resources:
+            raise UnsupportedResourceError(table, "PlusVibeAI")
+        import dlt
+        from ingestr.src.plusvibeai import plusvibeai_source
+        dlt.secrets["sources.plusvibeai.api_key"] = api_key[0]
+        dlt.secrets["sources.plusvibeai.workspace_id"] = workspace_id[0]
+        # Handle custom base URL if provided
+        base_url = params.get("base_url", ["https://api.plusvibe.ai"])[0]
+        dlt.secrets["sources.plusvibeai.base_url"] = base_url
+        src = plusvibeai_source()
+        return src.with_resources(table)
+class IntercomSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        # intercom://?access_token=<token>&region=<us|eu|au>
+        # OR intercom://?oauth_token=<token>&region=<us|eu|au>
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        # Check for authentication
+        access_token = params.get("access_token")
+        oauth_token = params.get("oauth_token")
+        region = params.get("region", ["us"])[0]
+        if not access_token and not oauth_token:
+            raise MissingValueError("access_token or oauth_token", "Intercom")
+        # Validate table/resource
+        supported_tables = [
+            "contacts",
+            "companies",
+            "conversations",
+            "tickets",
+            "tags",
+            "segments",
+            "teams",
+            "admins",
+            "articles",
+            "data_attributes",
+        ]
+        if table not in supported_tables:
+            raise UnsupportedResourceError(table, "Intercom")
+        # Get date parameters
+        start_date = kwargs.get("interval_start")
+        if start_date:
+            start_date = ensure_pendulum_datetime(start_date)
+        else:
+            start_date = pendulum.datetime(2020, 1, 1)
+        end_date = kwargs.get("interval_end")
+        if end_date:
+            end_date = ensure_pendulum_datetime(end_date)
+        # Import and initialize the source
+        from ingestr.src.intercom import (
+            IntercomCredentialsAccessToken,
+            IntercomCredentialsOAuth,
+            TIntercomCredentials,
+            intercom_source,
+        )
+        credentials: TIntercomCredentials
+        if access_token:
+            credentials = IntercomCredentialsAccessToken(
+                access_token=access_token[0], region=region
+            )
+        else:
+            if not oauth_token:
+                raise MissingValueError("oauth_token", "Intercom")
+            credentials = IntercomCredentialsOAuth(
+                oauth_token=oauth_token[0], region=region
+            )
+        return intercom_source(
+            credentials=credentials,
+            start_date=start_date,
+            end_date=end_date,
+        ).with_resources(table)
+class HttpSource:
+    """Source for reading CSV, JSON, and Parquet files from HTTP URLs"""
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        """
+        Create a dlt source for reading files from HTTP URLs.
+        URI format: http://example.com/file.csv or https://example.com/file.json
+        Args:
+            uri: HTTP(S) URL to the file
+            table: Not used for HTTP source (files are read directly)
+            **kwargs: Additional arguments:
+                - file_format: Optional file format override ('csv', 'json', 'parquet')
+                - chunksize: Number of records to process at once (default varies by format)
+                - merge_key: Merge key for the resource
+        Returns:
+            DltResource for the HTTP file
+        """
+        from ingestr.src.http import http_source
+        # Extract the actual URL (remove the http:// or https:// scheme if duplicated)
+        url = uri
+        if uri.startswith("http://http://") or uri.startswith("https://https://"):
+            url = uri.split("://", 1)[1]
+        file_format = kwargs.get("file_format")
+        chunksize = kwargs.get("chunksize")
+        merge_key = kwargs.get("merge_key")
+        reader_kwargs = {}
+        if chunksize is not None:
+            reader_kwargs["chunksize"] = chunksize
+        source = http_source(url=url, file_format=file_format, **reader_kwargs)
+        if merge_key:
+            source.apply_hints(merge_key=merge_key)
+        return source
+class MondaySource:
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        query_params = parse_qs(parsed_uri.query)
+        api_token = query_params.get("api_token")
+        if api_token is None:
+            raise MissingValueError("api_token", "Monday")
+        parts = table.replace(" ", "").split(":")
+        table_name = parts[0]
+        params = parts[1:]
+        # Get interval_start and interval_end from kwargs (command line args)
+        interval_start = kwargs.get("interval_start")
+        interval_end = kwargs.get("interval_end")
+        # Convert datetime to string format YYYY-MM-DD
+        start_date = interval_start.strftime("%Y-%m-%d") if interval_start else None
+        end_date = interval_end.strftime("%Y-%m-%d") if interval_end else None
+        from ingestr.src.monday import monday_source
+        try:
+            return monday_source(
+                api_token=api_token[0],
+                params=params,
+                start_date=start_date,
+                end_date=end_date,
+            ).with_resources(table_name)
+        except ResourcesNotFoundError:
+            raise UnsupportedResourceError(table_name, "Monday")
+class MailchimpSource:
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        query_params = parse_qs(parsed_uri.query)
+        api_key = query_params.get("api_key")
+        server = query_params.get("server")
+        if api_key is None:
+            raise MissingValueError("api_key", "Mailchimp")
+        if server is None:
+            raise MissingValueError("server", "Mailchimp")
+        from ingestr.src.mailchimp import mailchimp_source
+        try:
+            return mailchimp_source(
+                api_key=api_key[0],
+                server=server[0],
+            ).with_resources(table)
+        except ResourcesNotFoundError:
+            raise UnsupportedResourceError(table, "Mailchimp")
+class AlliumSource:
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        query_params = parse_qs(parsed_uri.query)
+        api_key = query_params.get("api_key")
+        if api_key is None:
+            raise MissingValueError("api_key", "Allium")
+        # Extract query_id and custom parameters from table parameter
+        # Format: query_id or query:query_id or query:query_id:param1=value1&param2=value2
+        query_id = table
+        custom_params = {}
+        limit = None
+        compute_profile = None
+        if ":" in table:
+            parts = table.split(":", 2)  # Split into max 3 parts
+            if len(parts) >= 2:
+                query_id = parts[1]
+            if len(parts) == 3:
+                # Parse custom parameters from query string format
+                param_string = parts[2]
+                for param in param_string.split("&"):
+                    if "=" in param:
+                        key, value = param.split("=", 1)
+                        # Extract run_config parameters
+                        if key == "limit":
+                            limit = int(value)
+                        elif key == "compute_profile":
+                            compute_profile = value
+                        else:
+                            custom_params[key] = value
+        # Extract parameters from interval_start and interval_end
+        # Default: 2 days ago 00:00 to yesterday 00:00
+        now = pendulum.now()
+        default_start = now.subtract(days=2).start_of("day")
+        default_end = now.subtract(days=1).start_of("day")
+        parameters = {}
+        interval_start = kwargs.get("interval_start")
+        interval_end = kwargs.get("interval_end")
+        start_date = interval_start if interval_start is not None else default_start
+        end_date = interval_end if interval_end is not None else default_end
+        parameters["start_date"] = start_date.strftime("%Y-%m-%d")
+        parameters["end_date"] = end_date.strftime("%Y-%m-%d")
+        parameters["start_timestamp"] = str(int(start_date.timestamp()))
+        parameters["end_timestamp"] = str(int(end_date.timestamp()))
+        # Merge custom parameters (they override default parameters)
+        parameters.update(custom_params)
+        from ingestr.src.allium import allium_source
+        return allium_source(
+            api_key=api_key[0],
+            query_id=query_id,
+            parameters=parameters if parameters else None,
+            limit=limit,
+            compute_profile=compute_profile,
+        )
+class CouchbaseSource:
+    table_builder: Callable
+    def __init__(self, table_builder=None) -> None:
+        if table_builder is None:
+            from ingestr.src.couchbase_source import couchbase_collection
+            table_builder = couchbase_collection
+        self.table_builder = table_builder
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        """
+        Create a dlt source for reading data from Couchbase.
+        URI formats:
+            - couchbase://username:password@host
+            - couchbase://username:password@host/bucket
+            - couchbase://username:password@host?ssl=true
+            - couchbases://username:password@host (SSL enabled)
+        Table formats:
+            - bucket.scope.collection (when bucket not in URI)
+            - scope.collection (when bucket specified in URI path)
+        Note: If password contains special characters (@, :, /, etc.), they must be URL-encoded.
+        Examples:
+            Local/Self-hosted:
+            - couchbase://admin:password123@localhost with table "mybucket.myscope.mycollection"
+            - couchbase://admin:password123@localhost/mybucket with table "myscope.mycollection"
+            - couchbase://admin:password123@localhost?ssl=true with table "mybucket._default._default"
+            Capella (Cloud):
+            - couchbases://user:pass@cb.xxx.cloud.couchbase.com with table "travel-sample.inventory.airport"
+            - couchbase://user:pass@cb.xxx.cloud.couchbase.com/travel-sample?ssl=true with table "inventory.airport"
+        To encode password in Python:
+            from urllib.parse import quote
+            encoded_pwd = quote("MyPass@123!", safe='')
+            uri = f"couchbase://admin:{encoded_pwd}@localhost?ssl=true"
+        Args:
+            uri: Couchbase connection URI (can include /bucket path and ?ssl=true query parameter)
+            table: Format depends on URI:
+                - bucket.scope.collection (if bucket not in URI)
+                - scope.collection (if bucket in URI path)
+            **kwargs: Additional arguments:
+                - limit: Maximum number of documents to fetch
+                - incremental_key: Field to use for incremental loading
+                - interval_start: Start value for incremental loading
+                - interval_end: End value for incremental loading
+        Returns:
+            DltResource for the Couchbase collection
+        """
+        # Parse the URI to extract connection details
+        # urlparse automatically decodes URL-encoded credentials
+        parsed = urlparse(uri)
+        # Extract username and password from URI
+        # Note: urlparse automatically decodes URL-encoded characters in username/password
+        from urllib.parse import unquote
+        username = parsed.username
+        password = unquote(parsed.password) if parsed.password else None
+        if not username or not password:
+            raise ValueError(
+                "Username and password must be provided in the URI.\n"
+                "Format: couchbase://username:password@host\n"
+                "If password has special characters (@, :, /), URL-encode them.\n"
+                "Example: couchbase://admin:MyPass%40123@localhost for password 'MyPass@123'"
+            )
+        # Reconstruct connection string without credentials
+        scheme = parsed.scheme
+        netloc = parsed.netloc
+        # Remove username:password@ from netloc if present
+        if "@" in netloc:
+            netloc = netloc.split("@", 1)[1]
+        # Parse query parameters from URI
+        from urllib.parse import parse_qs
+        query_params = parse_qs(parsed.query)
+        # Check if SSL is requested via URI query parameter (?ssl=true)
+        if "ssl" in query_params:
+            ssl_value = query_params["ssl"][0].lower()
+            use_ssl = ssl_value in ("true", "1", "yes")
+            # Apply SSL scheme based on parameter
+            if use_ssl and scheme == "couchbase":
+                scheme = "couchbases"
+        connection_string = f"{scheme}://{netloc}"
+        # Extract bucket from URI path if present (e.g., couchbase://host/bucket)
+        bucket_from_uri = None
+        if parsed.path and parsed.path.strip("/"):
+            bucket_from_uri = parsed.path.strip("/").split("/")[0]
+        # Parse table format: can be "scope.collection" or "bucket.scope.collection"
+        table_parts = table.split(".")
+        if len(table_parts) == 3:
+            # Format: bucket.scope.collection
+            bucket, scope, collection = table_parts
+        elif len(table_parts) == 2:
+            # Format: scope.collection (bucket from URI)
+            if bucket_from_uri:
+                bucket = bucket_from_uri
+                scope, collection = table_parts
+            else:
+                raise ValueError(
+                    "Table format is 'scope.collection' but no bucket specified in URI.\n"
+                    f"Either use URI format: couchbase://user:pass@host/bucket\n"
+                    f"Or use table format: bucket.scope.collection\n"
+                    f"Got table: {table}"
+                )
+        else:
+            raise ValueError(
+                "Table format must be 'bucket.scope.collection' or 'scope.collection' (with bucket in URI). "
+                f"Got: {table}\n"
+                "Examples:\n"
+                "  - URI: couchbase://user:pass@host, Table: travel-sample.inventory.airport\n"
+                "  - URI: couchbase://user:pass@host/travel-sample, Table: inventory.airport"
+            )
+        # Handle incremental loading
+        incremental = None
+        if kwargs.get("incremental_key"):
+            start_value = kwargs.get("interval_start")
+            end_value = kwargs.get("interval_end")
+            incremental = dlt_incremental(
+                kwargs.get("incremental_key", ""),
+                initial_value=start_value,
+                end_value=end_value,
+                range_end="closed",
+                range_start="closed",
+            )
+        # Get optional parameters
+        limit = kwargs.get("limit")
+        table_instance = self.table_builder(
+            connection_string=connection_string,
+            username=username,
+            password=password,
+            bucket=bucket,
+            scope=scope,
+            collection=collection,
+            incremental=incremental,
+            limit=limit,
+        )
+        table_instance.max_table_nesting = 1
+        return table_instance
+class CursorSource:
+    resources = [
+        "team_members",
+        "daily_usage_data",
+        "team_spend",
+        "filtered_usage_events",
+    ]
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        # cursor://?api_key=<api_key>
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        api_key = params.get("api_key")
+        if not api_key:
+            raise MissingValueError("api_key", "Cursor")
+        if table not in self.resources:
+            raise UnsupportedResourceError(table, "Cursor")
+        import dlt
+        from ingestr.src.cursor import cursor_source
+        dlt.secrets["sources.cursor.api_key"] = api_key[0]
+        # Handle interval_start and interval_end for daily_usage_data and filtered_usage_events (optional)
+        if table in ["daily_usage_data", "filtered_usage_events"]:
+            interval_start = kwargs.get("interval_start")
+            interval_end = kwargs.get("interval_end")
+            # Both are optional, but if one is provided, both should be provided
+            if interval_start is not None and interval_end is not None:
+                # Convert datetime to epoch milliseconds
+                start_ms = int(interval_start.timestamp() * 1000)
+                end_ms = int(interval_end.timestamp() * 1000)
+                dlt.config["sources.cursor.start_date"] = start_ms
+                dlt.config["sources.cursor.end_date"] = end_ms
+        src = cursor_source()
+        return src.with_resources(table)
+class SocrataSource:
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        """
+        Creates a DLT source for Socrata open data platform.
+        URI format: socrata://domain?app_token=TOKEN
+        Table: dataset_id (e.g., "6udu-fhnu")
+        Args:
+            uri: Socrata connection URI with domain and optional auth params
+            table: Dataset ID (e.g., "6udu-fhnu")
+            **kwargs: Additional arguments:
+                - incremental_key: Field to use for incremental loading (e.g., ":updated_at")
+                - interval_start: Start date for initial load
+                - interval_end: End date for load
+                - primary_key: Primary key field for merge operations
+        Returns:
+            DltResource for the Socrata dataset
+        """
+        from urllib.parse import parse_qs, urlparse
+        parsed = urlparse(uri)
+        domain = parsed.netloc
+        if not domain:
+            raise ValueError(
+                "Domain must be provided in the URI.\n"
+                "Format: socrata://domain?app_token=TOKEN\n"
+                "Example: socrata://evergreen.data.socrata.com?app_token=mytoken"
+            )
+        query_params = parse_qs(parsed.query)
+        dataset_id = table
+        if not dataset_id:
+            raise ValueError(
+                "Dataset ID must be provided as the table parameter.\n"
+                "Example: --source-table 6udu-fhnu"
+            )
+        app_token = query_params.get("app_token", [None])[0]
+        username = query_params.get("username", [None])[0]
+        password = query_params.get("password", [None])[0]
+        incremental = None
+        if kwargs.get("incremental_key"):
+            start_value = kwargs.get("interval_start")
+            end_value = kwargs.get("interval_end")
+            if start_value:
+                start_value = (
+                    start_value.isoformat()
+                    if hasattr(start_value, "isoformat")
+                    else str(start_value)
+                )
+            if end_value:
+                end_value = (
+                    end_value.isoformat()
+                    if hasattr(end_value, "isoformat")
+                    else str(end_value)
+                )
+            incremental = dlt_incremental(
+                kwargs.get("incremental_key", ""),
+                initial_value=start_value,
+                end_value=end_value,
+                range_end="open",
+                range_start="closed",
+            )
+        primary_key = kwargs.get("primary_key")
+        from ingestr.src.socrata_source import source
+        return source(
+            domain=domain,
+            dataset_id=dataset_id,
+            app_token=app_token,
+            username=username,
+            password=password,
+            incremental=incremental,
+            primary_key=primary_key,
+        ).with_resources("dataset")
+class HostawaySource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Hostaway takes care of incrementality on its own, you should not provide incremental_key"
+            )
+        source_parts = urlparse(uri)
+        source_params = parse_qs(source_parts.query)
+        api_key = source_params.get("api_key")
+        if not api_key:
+            raise ValueError("api_key in the URI is required to connect to Hostaway")
+        match table:
+            case "listings":
+                resource_name = "listings"
+            case "listing_fee_settings":
+                resource_name = "listing_fee_settings"
+            case "listing_agreements":
+                resource_name = "listing_agreements"
+            case "listing_pricing_settings":
+                resource_name = "listing_pricing_settings"
+            case "cancellation_policies":
+                resource_name = "cancellation_policies"
+            case "cancellation_policies_airbnb":
+                resource_name = "cancellation_policies_airbnb"
+            case "cancellation_policies_marriott":
+                resource_name = "cancellation_policies_marriott"
+            case "cancellation_policies_vrbo":
+                resource_name = "cancellation_policies_vrbo"
+            case "reservations":
+                resource_name = "reservations"
+            case "finance_fields":
+                resource_name = "finance_fields"
+            case "reservation_payment_methods":
+                resource_name = "reservation_payment_methods"
+            case "reservation_rental_agreements":
+                resource_name = "reservation_rental_agreements"
+            case "listing_calendars":
+                resource_name = "listing_calendars"
+            case "conversations":
+                resource_name = "conversations"
+            case "message_templates":
+                resource_name = "message_templates"
+            case "bed_types":
+                resource_name = "bed_types"
+            case "property_types":
+                resource_name = "property_types"
+            case "countries":
+                resource_name = "countries"
+            case "account_tax_settings":
+                resource_name = "account_tax_settings"
+            case "user_groups":
+                resource_name = "user_groups"
+            case "guest_payment_charges":
+                resource_name = "guest_payment_charges"
+            case "coupons":
+                resource_name = "coupons"
+            case "webhook_reservations":
+                resource_name = "webhook_reservations"
+            case "tasks":
+                resource_name = "tasks"
+            case _:
+                raise ValueError(
+                    f"Resource '{table}' is not supported for Hostaway source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
+                )
+        start_date = kwargs.get("interval_start")
+        if start_date:
+            start_date = ensure_pendulum_datetime(start_date).in_timezone("UTC")
+        else:
+            start_date = pendulum.datetime(1970, 1, 1).in_timezone("UTC")
+        end_date = kwargs.get("interval_end")
+        if end_date:
+            end_date = ensure_pendulum_datetime(end_date).in_timezone("UTC")
+        from ingestr.src.hostaway import hostaway_source
+        return hostaway_source(
+            api_key=api_key[0],
+            start_date=start_date,
+            end_date=end_date,
+        ).with_resources(resource_name)

ingestr 0.13.75__py3-none-any.whl → 0.14.98__py3-none-any.whl

Potentially problematic release.

ingestr 0.13.75py3-none-any.whl → 0.14.98py3-none-any.whl