ingestr 0.13.13__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin_max/__init__.py +6 -4
  10. ingestr/src/appsflyer/__init__.py +325 -0
  11. ingestr/src/appsflyer/client.py +49 -45
  12. ingestr/src/appstore/__init__.py +1 -0
  13. ingestr/src/arrow/__init__.py +9 -1
  14. ingestr/src/asana_source/__init__.py +1 -1
  15. ingestr/src/attio/__init__.py +102 -0
  16. ingestr/src/attio/helpers.py +65 -0
  17. ingestr/src/blob.py +37 -10
  18. ingestr/src/buildinfo.py +1 -1
  19. ingestr/src/chess/__init__.py +1 -1
  20. ingestr/src/clickup/__init__.py +85 -0
  21. ingestr/src/clickup/helpers.py +47 -0
  22. ingestr/src/collector/spinner.py +43 -0
  23. ingestr/src/couchbase_source/__init__.py +118 -0
  24. ingestr/src/couchbase_source/helpers.py +135 -0
  25. ingestr/src/cursor/__init__.py +83 -0
  26. ingestr/src/cursor/helpers.py +188 -0
  27. ingestr/src/destinations.py +508 -27
  28. ingestr/src/docebo/__init__.py +589 -0
  29. ingestr/src/docebo/client.py +435 -0
  30. ingestr/src/docebo/helpers.py +97 -0
  31. ingestr/src/elasticsearch/__init__.py +80 -0
  32. ingestr/src/elasticsearch/helpers.py +138 -0
  33. ingestr/src/errors.py +8 -0
  34. ingestr/src/facebook_ads/__init__.py +47 -28
  35. ingestr/src/facebook_ads/helpers.py +59 -37
  36. ingestr/src/facebook_ads/settings.py +2 -0
  37. ingestr/src/facebook_ads/utils.py +39 -0
  38. ingestr/src/factory.py +107 -2
  39. ingestr/src/filesystem/__init__.py +8 -3
  40. ingestr/src/filters.py +46 -3
  41. ingestr/src/fluxx/__init__.py +9906 -0
  42. ingestr/src/fluxx/helpers.py +209 -0
  43. ingestr/src/frankfurter/__init__.py +157 -0
  44. ingestr/src/frankfurter/helpers.py +48 -0
  45. ingestr/src/freshdesk/__init__.py +89 -0
  46. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  47. ingestr/src/freshdesk/settings.py +9 -0
  48. ingestr/src/fundraiseup/__init__.py +95 -0
  49. ingestr/src/fundraiseup/client.py +81 -0
  50. ingestr/src/github/__init__.py +41 -6
  51. ingestr/src/github/helpers.py +5 -5
  52. ingestr/src/google_analytics/__init__.py +22 -4
  53. ingestr/src/google_analytics/helpers.py +124 -6
  54. ingestr/src/google_sheets/__init__.py +4 -4
  55. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  56. ingestr/src/hostaway/__init__.py +302 -0
  57. ingestr/src/hostaway/client.py +288 -0
  58. ingestr/src/http/__init__.py +35 -0
  59. ingestr/src/http/readers.py +114 -0
  60. ingestr/src/http_client.py +24 -0
  61. ingestr/src/hubspot/__init__.py +66 -23
  62. ingestr/src/hubspot/helpers.py +52 -22
  63. ingestr/src/hubspot/settings.py +14 -7
  64. ingestr/src/influxdb/__init__.py +46 -0
  65. ingestr/src/influxdb/client.py +34 -0
  66. ingestr/src/intercom/__init__.py +142 -0
  67. ingestr/src/intercom/helpers.py +674 -0
  68. ingestr/src/intercom/settings.py +279 -0
  69. ingestr/src/isoc_pulse/__init__.py +159 -0
  70. ingestr/src/jira_source/__init__.py +340 -0
  71. ingestr/src/jira_source/helpers.py +439 -0
  72. ingestr/src/jira_source/settings.py +170 -0
  73. ingestr/src/kafka/__init__.py +4 -1
  74. ingestr/src/kinesis/__init__.py +139 -0
  75. ingestr/src/kinesis/helpers.py +82 -0
  76. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  77. ingestr/src/linear/__init__.py +634 -0
  78. ingestr/src/linear/helpers.py +111 -0
  79. ingestr/src/linkedin_ads/helpers.py +0 -1
  80. ingestr/src/mailchimp/__init__.py +126 -0
  81. ingestr/src/mailchimp/helpers.py +226 -0
  82. ingestr/src/mailchimp/settings.py +164 -0
  83. ingestr/src/masking.py +344 -0
  84. ingestr/src/mixpanel/__init__.py +62 -0
  85. ingestr/src/mixpanel/client.py +99 -0
  86. ingestr/src/monday/__init__.py +246 -0
  87. ingestr/src/monday/helpers.py +392 -0
  88. ingestr/src/monday/settings.py +328 -0
  89. ingestr/src/mongodb/__init__.py +72 -8
  90. ingestr/src/mongodb/helpers.py +915 -38
  91. ingestr/src/partition.py +32 -0
  92. ingestr/src/phantombuster/__init__.py +65 -0
  93. ingestr/src/phantombuster/client.py +87 -0
  94. ingestr/src/pinterest/__init__.py +82 -0
  95. ingestr/src/pipedrive/__init__.py +198 -0
  96. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  97. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  98. ingestr/src/pipedrive/helpers/pages.py +115 -0
  99. ingestr/src/pipedrive/settings.py +27 -0
  100. ingestr/src/pipedrive/typing.py +3 -0
  101. ingestr/src/plusvibeai/__init__.py +335 -0
  102. ingestr/src/plusvibeai/helpers.py +544 -0
  103. ingestr/src/plusvibeai/settings.py +252 -0
  104. ingestr/src/quickbooks/__init__.py +117 -0
  105. ingestr/src/resource.py +40 -0
  106. ingestr/src/revenuecat/__init__.py +83 -0
  107. ingestr/src/revenuecat/helpers.py +237 -0
  108. ingestr/src/salesforce/__init__.py +15 -8
  109. ingestr/src/shopify/__init__.py +1 -17
  110. ingestr/src/smartsheets/__init__.py +82 -0
  111. ingestr/src/snapchat_ads/__init__.py +489 -0
  112. ingestr/src/snapchat_ads/client.py +72 -0
  113. ingestr/src/snapchat_ads/helpers.py +535 -0
  114. ingestr/src/socrata_source/__init__.py +83 -0
  115. ingestr/src/socrata_source/helpers.py +85 -0
  116. ingestr/src/socrata_source/settings.py +8 -0
  117. ingestr/src/solidgate/__init__.py +219 -0
  118. ingestr/src/solidgate/helpers.py +154 -0
  119. ingestr/src/sources.py +2933 -245
  120. ingestr/src/stripe_analytics/__init__.py +49 -21
  121. ingestr/src/stripe_analytics/helpers.py +286 -1
  122. ingestr/src/stripe_analytics/settings.py +62 -10
  123. ingestr/src/telemetry/event.py +10 -9
  124. ingestr/src/tiktok_ads/__init__.py +12 -6
  125. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  126. ingestr/src/trustpilot/__init__.py +48 -0
  127. ingestr/src/trustpilot/client.py +48 -0
  128. ingestr/src/wise/__init__.py +68 -0
  129. ingestr/src/wise/client.py +63 -0
  130. ingestr/src/zoom/__init__.py +99 -0
  131. ingestr/src/zoom/helpers.py +102 -0
  132. ingestr/tests/unit/test_smartsheets.py +133 -0
  133. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/METADATA +229 -19
  134. ingestr-0.14.104.dist-info/RECORD +203 -0
  135. ingestr/src/appsflyer/_init_.py +0 -24
  136. ingestr-0.13.13.dist-info/RECORD +0 -115
  137. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  138. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  139. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/blob.py CHANGED
@@ -1,11 +1,15 @@
1
1
  import warnings
2
2
  from typing import Tuple, TypeAlias
3
- from urllib.parse import ParseResult
3
+ from urllib.parse import ParseResult, urlparse
4
4
 
5
5
  BucketName: TypeAlias = str
6
6
  FileGlob: TypeAlias = str
7
7
 
8
8
 
9
+ class UnsupportedEndpointError(Exception):
10
+ pass
11
+
12
+
9
13
  def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
10
14
  """
11
15
  parse the URI of a blob storage and
@@ -14,13 +18,16 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
14
18
  Supports the following Forms:
15
19
  - uri: "gs://"
16
20
  table: "bucket-name/file-glob"
21
+ - uri: "gs://uri-bucket-name" (uri-bucket-name is preferred)
22
+ table: "gs://table-bucket-name/file-glob"
23
+ - uri: "gs://"
24
+ table: "gs://bucket-name/file-glob"
17
25
  - uri: gs://bucket-name/file-glob
18
26
  table: None
19
27
  - uri: "gs://bucket-name"
20
28
  table: "file-glob"
21
29
 
22
- The first form is the prefered method. Other forms are supported
23
- for backward compatibility, but discouraged.
30
+ The first form is the prefered method. Other forms are supported but discouraged.
24
31
  """
25
32
 
26
33
  table = table.strip()
@@ -34,16 +41,36 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
34
41
  )
35
42
  return host, uri.path.lstrip("/")
36
43
 
44
+ table_uri = urlparse(table)
45
+
37
46
  if host != "":
38
- warnings.warn(
39
- f"Using the form '{uri.scheme}://bucket-name' is deprecated and will be removed in future versions.",
40
- DeprecationWarning,
41
- stacklevel=2,
42
- )
43
- return host, table.lstrip("/")
47
+ return host, table_uri.path.lstrip("/")
48
+
49
+ if table_uri.hostname:
50
+ return table_uri.hostname, table_uri.path.lstrip("/")
44
51
 
45
- parts = table.lstrip("/").split("/", maxsplit=1)
52
+ parts = table_uri.path.lstrip("/").split("/", maxsplit=1)
46
53
  if len(parts) != 2:
47
54
  return "", parts[0]
48
55
 
49
56
  return parts[0], parts[1]
57
+
58
+
59
+ def parse_endpoint(path: str) -> str:
60
+ """
61
+ Parse the endpoint kind from the URI.
62
+
63
+ kind is a file format. one of [csv, jsonl, parquet]
64
+ """
65
+ file_extension = path.split(".")[-1]
66
+ if file_extension == "gz":
67
+ file_extension = path.split(".")[-2]
68
+ if file_extension == "csv":
69
+ endpoint = "read_csv"
70
+ elif file_extension == "jsonl":
71
+ endpoint = "read_jsonl"
72
+ elif file_extension == "parquet":
73
+ endpoint = "read_parquet"
74
+ else:
75
+ raise UnsupportedEndpointError(f"Unsupported file format: {file_extension}")
76
+ return endpoint
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.13"
1
+ version = "v0.14.104"
@@ -75,7 +75,7 @@ def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
75
75
 
76
76
 
77
77
  @dlt.resource(
78
- write_disposition="append", columns={"end_time": {"data_type": "timestamp"}}
78
+ write_disposition="replace", columns={"end_time": {"data_type": "timestamp"}}
79
79
  )
80
80
  def players_games(
81
81
  players: List[str], start_month: str = None, end_month: str = None
@@ -0,0 +1,85 @@
1
+ """Simple ClickUp source."""
2
+
3
+ from datetime import datetime
4
+ from typing import Iterable
5
+
6
+ import dlt
7
+ import pendulum
8
+ from dlt.common.time import ensure_pendulum_datetime
9
+ from dlt.sources import DltResource
10
+
11
+ from .helpers import ClickupClient
12
+
13
+
14
+ @dlt.source(max_table_nesting=0)
15
+ def clickup_source(
16
+ api_token: str = dlt.secrets.value,
17
+ start_date: datetime = None,
18
+ end_date: datetime = None,
19
+ ) -> Iterable[DltResource]:
20
+ client = ClickupClient(api_token)
21
+
22
+ @dlt.resource(
23
+ name="user",
24
+ primary_key="id",
25
+ write_disposition="merge",
26
+ )
27
+ def user() -> Iterable[dict]:
28
+ data = client.get("/user")
29
+ yield data["user"]
30
+
31
+ @dlt.resource(name="teams", primary_key="id", write_disposition="merge")
32
+ def teams() -> Iterable[dict]:
33
+ for team in client.get_teams():
34
+ yield team
35
+
36
+ @dlt.resource(name="spaces", primary_key="id", write_disposition="merge")
37
+ def spaces() -> Iterable[dict]:
38
+ for space in client.get_spaces():
39
+ yield space
40
+
41
+ @dlt.resource(name="lists", write_disposition="merge", primary_key="id")
42
+ def lists() -> Iterable[dict]:
43
+ for list in client.get_lists():
44
+ yield list
45
+
46
+ @dlt.resource(
47
+ name="tasks",
48
+ write_disposition="merge",
49
+ primary_key="id",
50
+ columns={"date_updated": {"data_type": "timestamp"}},
51
+ )
52
+ def tasks(
53
+ date_updated: dlt.sources.incremental[str] = dlt.sources.incremental(
54
+ "date_updated",
55
+ initial_value=ensure_pendulum_datetime(start_date).in_timezone("UTC"),
56
+ range_end="closed",
57
+ range_start="closed",
58
+ ),
59
+ ) -> Iterable[dict]:
60
+ if date_updated.last_value:
61
+ start = ensure_pendulum_datetime(date_updated.last_value).in_timezone("UTC")
62
+ else:
63
+ start = ensure_pendulum_datetime(start_date).in_timezone("UTC")
64
+
65
+ if date_updated.end_value is None:
66
+ end = pendulum.now("UTC")
67
+ else:
68
+ end = date_updated.end_value.in_timezone("UTC")
69
+
70
+ for list_obj in client.get_lists():
71
+ for task in client.paginated(
72
+ f"/list/{list_obj['id']}/task", "tasks", {"page_size": 100}
73
+ ):
74
+ task_dt = ensure_pendulum_datetime(int(task["date_updated"]) / 1000)
75
+ if task_dt >= start and task_dt <= end:
76
+ task["date_updated"] = task_dt
77
+ yield task
78
+
79
+ return (
80
+ user,
81
+ teams,
82
+ spaces,
83
+ lists,
84
+ tasks,
85
+ )
@@ -0,0 +1,47 @@
1
+ from typing import Iterable, Optional
2
+
3
+ from ..http_client import create_client
4
+
5
+
6
+ class ClickupClient:
7
+ def __init__(self, api_token: str):
8
+ self.session = create_client()
9
+ self.base_url = "https://api.clickup.com/api/v2"
10
+ self.headers = {"Authorization": api_token}
11
+
12
+ def get(self, endpoint: str, params: Optional[dict] = None) -> dict:
13
+ url = f"{self.base_url}{endpoint}"
14
+ resp = self.session.get(url, headers=self.headers, params=params or {})
15
+ resp.raise_for_status()
16
+ return resp.json()
17
+
18
+ def paginated(
19
+ self, endpoint: str, key: str, params: Optional[dict] = None
20
+ ) -> Iterable[dict]:
21
+ page = 0
22
+ params = params or {}
23
+ while True:
24
+ params["page"] = page
25
+ data = self.get(endpoint, params)
26
+ items = data.get(key, data)
27
+ if not items:
28
+ break
29
+ for item in items:
30
+ yield item
31
+ if data.get("last_page") or len(items) < params.get("page_size", 100):
32
+ break
33
+ page += 1
34
+
35
+ def get_teams(self):
36
+ data = self.get("/team")
37
+ return data.get("teams", [])
38
+
39
+ def get_spaces(self):
40
+ for team in self.get_teams():
41
+ for space in self.paginated(f"/team/{team['id']}/space", "spaces"):
42
+ yield space
43
+
44
+ def get_lists(self):
45
+ for space in self.get_spaces():
46
+ for lst in self.paginated(f"/space/{space['id']}/list", "lists"):
47
+ yield lst
@@ -0,0 +1,43 @@
1
+ from typing import Optional
2
+
3
+ from dlt.common.runtime.collector import Collector
4
+ from rich.status import Status
5
+
6
+
7
+ class SpinnerCollector(Collector):
8
+ status: Status
9
+ current_step: str
10
+ started: bool
11
+
12
+ def __init__(self) -> None:
13
+ self.status = Status("Ingesting data...", spinner="dots")
14
+ self.started = False
15
+
16
+ def update(
17
+ self,
18
+ name: str,
19
+ inc: int = 1,
20
+ total: Optional[int] = None,
21
+ message: Optional[str] = None, # type: ignore
22
+ label: str = "",
23
+ **kwargs,
24
+ ) -> None:
25
+ self.status.update(self.current_step)
26
+
27
+ def _start(self, step: str) -> None:
28
+ self.current_step = self.__step_to_label(step)
29
+ self.status.start()
30
+
31
+ def __step_to_label(self, step: str) -> str:
32
+ verb = step.split(" ")[0].lower()
33
+ if verb.startswith("normalize"):
34
+ return "Normalizing the data"
35
+ elif verb.startswith("load"):
36
+ return "Loading the data to the destination"
37
+ elif verb.startswith("extract"):
38
+ return "Extracting the data from the source"
39
+
40
+ return f"{verb.capitalize()} the data"
41
+
42
+ def _stop(self) -> None:
43
+ self.status.stop()
@@ -0,0 +1,118 @@
1
+ """Source that loads data from Couchbase buckets, supports incremental loads."""
2
+
3
+ from typing import Optional
4
+
5
+ import dlt
6
+ from dlt.sources import DltResource
7
+
8
+ from .helpers import (
9
+ CouchbaseConfiguration,
10
+ client_from_credentials,
11
+ fetch_documents,
12
+ )
13
+
14
+
15
+ @dlt.source(max_table_nesting=0)
16
+ def couchbase_source(
17
+ connection_string: str = dlt.secrets.value,
18
+ username: str = dlt.secrets.value,
19
+ password: str = dlt.secrets.value,
20
+ bucket: str = dlt.config.value,
21
+ scope: Optional[str] = dlt.config.value,
22
+ collection: Optional[str] = dlt.config.value,
23
+ incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
24
+ write_disposition: Optional[str] = dlt.config.value,
25
+ limit: Optional[int] = None,
26
+ ) -> DltResource:
27
+ """
28
+ A DLT source which loads data from a Couchbase bucket using Couchbase Python SDK.
29
+
30
+ Args:
31
+ connection_string (str): Couchbase connection string (e.g., 'couchbase://localhost')
32
+ username (str): Couchbase username
33
+ password (str): Couchbase password
34
+ bucket (str): Bucket name to load data from
35
+ scope (Optional[str]): Scope name (defaults to '_default')
36
+ collection (Optional[str]): Collection name (defaults to '_default')
37
+ incremental (Optional[dlt.sources.incremental]): Option to enable incremental loading.
38
+ E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
39
+ write_disposition (str): Write disposition of the resource.
40
+ limit (Optional[int]): The maximum number of documents to load.
41
+
42
+ Returns:
43
+ DltResource: A DLT resource for the Couchbase collection.
44
+ """
45
+ # Set up Couchbase client
46
+ cluster = client_from_credentials(connection_string, username, password)
47
+
48
+ resource_name = f"{bucket}_{scope}_{collection}"
49
+
50
+ return dlt.resource( # type: ignore[call-overload, arg-type]
51
+ fetch_documents,
52
+ name=resource_name,
53
+ primary_key="id",
54
+ write_disposition=write_disposition or "replace",
55
+ spec=CouchbaseConfiguration,
56
+ max_table_nesting=0,
57
+ )(
58
+ cluster=cluster,
59
+ bucket_name=bucket,
60
+ scope_name=scope,
61
+ collection_name=collection,
62
+ incremental=incremental,
63
+ limit=limit,
64
+ )
65
+
66
+
67
+ @dlt.resource(
68
+ name=lambda args: f"{args['bucket']}_{args['scope']}_{args['collection']}",
69
+ standalone=True,
70
+ spec=CouchbaseConfiguration, # type: ignore[arg-type]
71
+ )
72
+ def couchbase_collection(
73
+ connection_string: str = dlt.secrets.value,
74
+ username: str = dlt.secrets.value,
75
+ password: str = dlt.secrets.value,
76
+ bucket: str = dlt.config.value,
77
+ scope: Optional[str] = dlt.config.value,
78
+ collection: Optional[str] = dlt.config.value,
79
+ incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
80
+ write_disposition: Optional[str] = dlt.config.value,
81
+ limit: Optional[int] = None,
82
+ chunk_size: Optional[int] = 1000,
83
+ ) -> DltResource:
84
+ """
85
+ A DLT resource which loads a collection from Couchbase.
86
+
87
+ Args:
88
+ connection_string (str): Couchbase connection string (e.g., 'couchbase://localhost')
89
+ username (str): Couchbase username
90
+ password (str): Couchbase password
91
+ bucket (str): Bucket name to load data from
92
+ scope (Optional[str]): Scope name (defaults to '_default')
93
+ collection (Optional[str]): Collection name (defaults to '_default')
94
+ incremental (Optional[dlt.sources.incremental]): Option to enable incremental loading.
95
+ write_disposition (str): Write disposition of the resource.
96
+ limit (Optional[int]): The maximum number of documents to load.
97
+ chunk_size (Optional[int]): The number of documents to load in each batch.
98
+
99
+ Returns:
100
+ DltResource: A DLT resource for the Couchbase collection.
101
+ """
102
+ # Set up Couchbase client
103
+ cluster = client_from_credentials(connection_string, username, password)
104
+
105
+ return dlt.resource( # type: ignore[call-overload]
106
+ fetch_documents,
107
+ name=f"{bucket}_{scope}_{collection}",
108
+ primary_key="id",
109
+ write_disposition=write_disposition or "replace",
110
+ )(
111
+ cluster=cluster,
112
+ bucket_name=bucket,
113
+ scope_name=scope,
114
+ collection_name=collection,
115
+ incremental=incremental,
116
+ limit=limit,
117
+ chunk_size=chunk_size,
118
+ )
@@ -0,0 +1,135 @@
1
+ """Helper functions for Couchbase source."""
2
+
3
+ from datetime import datetime, timedelta
4
+ from typing import Any, Dict, Iterator, Optional
5
+
6
+ import dlt
7
+ from couchbase.auth import PasswordAuthenticator # type: ignore[import-untyped]
8
+ from couchbase.cluster import Cluster # type: ignore[import-untyped]
9
+ from couchbase.options import ( # type: ignore[import-untyped]
10
+ ClusterOptions,
11
+ QueryOptions,
12
+ )
13
+ from dlt.common.configuration import configspec
14
+ from dlt.common.time import ensure_pendulum_datetime
15
+
16
+
17
+ @configspec
18
+ class CouchbaseConfiguration:
19
+ """Configuration for Couchbase source."""
20
+
21
+ connection_string: str = dlt.secrets.value
22
+ username: str = dlt.secrets.value
23
+ password: str = dlt.secrets.value
24
+ bucket: str = dlt.config.value
25
+ scope: Optional[str] = dlt.config.value
26
+ collection: Optional[str] = dlt.config.value
27
+
28
+
29
+ def client_from_credentials(
30
+ connection_string: str, username: str, password: str
31
+ ) -> Cluster:
32
+ """
33
+ Create a Couchbase cluster client from credentials.
34
+
35
+ Args:
36
+ connection_string: Couchbase connection string
37
+ - Local/self-hosted: 'couchbase://localhost'
38
+ - Capella (cloud): 'couchbases://your-instance.cloud.couchbase.com'
39
+ username: Couchbase username
40
+ password: Couchbase password
41
+
42
+ Returns:
43
+ Cluster: Connected Couchbase cluster instance
44
+ """
45
+ auth = PasswordAuthenticator(username, password)
46
+ options = ClusterOptions(auth)
47
+
48
+ # Apply wan_development profile for Capella (couchbases://) connections
49
+ # This helps avoid latency issues when accessing from different networks
50
+ if connection_string.startswith("couchbases://"):
51
+ options.apply_profile("wan_development")
52
+
53
+ cluster = Cluster(connection_string, options)
54
+ cluster.wait_until_ready(timedelta(seconds=30))
55
+
56
+ return cluster
57
+
58
+
59
+ def fetch_documents(
60
+ cluster: Cluster,
61
+ bucket_name: str,
62
+ scope_name: str,
63
+ collection_name: str,
64
+ incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
65
+ limit: Optional[int] = None,
66
+ chunk_size: Optional[int] = 1000,
67
+ ) -> Iterator[Dict[str, Any]]:
68
+ """
69
+ Fetch documents from a Couchbase collection using N1QL queries.
70
+
71
+ Args:
72
+ cluster: Couchbase cluster instance
73
+ bucket_name: Name of the bucket
74
+ scope_name: Name of the scope
75
+ collection_name: Name of the collection
76
+ incremental: Incremental loading configuration
77
+ limit: Maximum number of documents to fetch
78
+ chunk_size: Number of documents to fetch per batch
79
+
80
+ Yields:
81
+ Dict[str, Any]: Document data
82
+ """
83
+ # Build N1QL query with full path
84
+ full_collection_path = f"`{bucket_name}`.`{scope_name}`.`{collection_name}`"
85
+ n1ql_query = f"SELECT META().id as id, c.* FROM {full_collection_path} c"
86
+
87
+ # Add incremental filter if provided
88
+ if incremental and incremental.cursor_path:
89
+ where_clause = f" WHERE {incremental.cursor_path} >= $start_value"
90
+ if incremental.end_value is not None:
91
+ where_clause += f" AND {incremental.cursor_path} < $end_value"
92
+ n1ql_query += where_clause
93
+
94
+ # Add limit if provided
95
+ if limit:
96
+ n1ql_query += f" LIMIT {limit}"
97
+
98
+ # Execute query
99
+ try:
100
+ query_options = QueryOptions()
101
+
102
+ # Add parameters if incremental
103
+ if incremental and incremental.cursor_path:
104
+ named_parameters = {"start_value": incremental.last_value}
105
+ if incremental.end_value is not None:
106
+ named_parameters["end_value"] = incremental.end_value
107
+ query_options = QueryOptions(named_parameters=named_parameters)
108
+
109
+ result = cluster.query(n1ql_query, query_options)
110
+
111
+ # Yield documents
112
+ count = 0
113
+ for row in result:
114
+ doc = dict(row)
115
+
116
+ # Convert datetime fields to proper format
117
+ if (
118
+ incremental
119
+ and incremental.cursor_path
120
+ and incremental.cursor_path in doc
121
+ ):
122
+ cursor_value = doc[incremental.cursor_path]
123
+ if isinstance(cursor_value, (str, datetime)):
124
+ doc[incremental.cursor_path] = ensure_pendulum_datetime(
125
+ cursor_value
126
+ )
127
+
128
+ yield doc
129
+
130
+ count += 1
131
+ if limit and count >= limit:
132
+ break
133
+
134
+ except Exception as e:
135
+ raise Exception(f"Error executing Couchbase N1QL query: {str(e)}")
@@ -0,0 +1,83 @@
1
+ """
2
+ This source provides data extraction from Cursor via the REST API.
3
+
4
+ It fetches team member information from the Cursor API.
5
+ """
6
+
7
+ from typing import Any, Iterable, Optional
8
+
9
+ import dlt
10
+ from dlt.common.typing import TDataItem
11
+
12
+ from .helpers import CursorClient
13
+
14
+
15
+ @dlt.source
16
+ def cursor_source() -> Any:
17
+ """
18
+ The main function that fetches data from Cursor API.
19
+
20
+ Returns:
21
+ Sequence[DltResource]: A sequence of DltResource objects containing the fetched data.
22
+ """
23
+ return [
24
+ team_members,
25
+ daily_usage_data,
26
+ team_spend,
27
+ filtered_usage_events,
28
+ ]
29
+
30
+
31
+ @dlt.resource(
32
+ write_disposition="replace",
33
+ max_table_nesting=0,
34
+ )
35
+ def team_members(
36
+ api_key: str = dlt.secrets.value,
37
+ ) -> Iterable[TDataItem]:
38
+ client = CursorClient(api_key=api_key)
39
+
40
+ members = client.get_team_members()
41
+ yield from members
42
+
43
+
44
+ @dlt.resource(
45
+ write_disposition="replace",
46
+ max_table_nesting=0,
47
+ )
48
+ def daily_usage_data(
49
+ api_key: str = dlt.secrets.value,
50
+ start_date: Optional[int] = dlt.config.value,
51
+ end_date: Optional[int] = dlt.config.value,
52
+ ) -> Iterable[TDataItem]:
53
+ client = CursorClient(api_key=api_key)
54
+
55
+ yield from client.get_daily_usage_data(start_date=start_date, end_date=end_date)
56
+
57
+
58
+ @dlt.resource(
59
+ write_disposition="replace",
60
+ max_table_nesting=0,
61
+ )
62
+ def team_spend(
63
+ api_key: str = dlt.secrets.value,
64
+ ) -> Iterable[TDataItem]:
65
+ client = CursorClient(api_key=api_key)
66
+
67
+ yield from client.get_team_spend()
68
+
69
+
70
+ @dlt.resource(
71
+ write_disposition="replace",
72
+ max_table_nesting=0,
73
+ )
74
+ def filtered_usage_events(
75
+ api_key: str = dlt.secrets.value,
76
+ start_date: Optional[int] = dlt.config.value,
77
+ end_date: Optional[int] = dlt.config.value,
78
+ ) -> Iterable[TDataItem]:
79
+ client = CursorClient(api_key=api_key)
80
+
81
+ yield from client.get_filtered_usage_events(
82
+ start_date=start_date, end_date=end_date
83
+ )