ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -4,19 +4,6 @@ import requests
4
4
  from dlt.sources.helpers.requests import Client
5
5
  from requests.exceptions import HTTPError
6
6
 
7
- DEFAULT_GROUPING = ["c", "geo", "app_id", "install_time"]
8
- DEFAULT_KPIS = [
9
- "impressions",
10
- "clicks",
11
- "installs",
12
- "cost",
13
- "revenue",
14
- "average_ecpi",
15
- "loyal_users",
16
- "uninstalls",
17
- "roi",
18
- ]
19
-
20
7
 
21
8
  class AppsflyerClient:
22
9
  def __init__(self, api_key: str):
@@ -33,15 +20,20 @@ class AppsflyerClient:
33
20
  self,
34
21
  from_date: str,
35
22
  to_date: str,
23
+ dimensions: list[str],
24
+ metrics: list[str],
36
25
  maximum_rows=1000000,
37
- dimensions=DEFAULT_GROUPING,
38
- metrics=DEFAULT_KPIS,
39
26
  ):
27
+ excluded_metrics = exclude_metrics_for_date_range(metrics, from_date, to_date)
28
+ included_metrics = [
29
+ metric for metric in metrics if metric not in excluded_metrics
30
+ ]
31
+
40
32
  params = {
41
33
  "from": from_date,
42
34
  "to": to_date,
43
35
  "groupings": ",".join(dimensions),
44
- "kpis": ",".join(metrics),
36
+ "kpis": ",".join(included_metrics),
45
37
  "format": "json",
46
38
  "maximum_rows": maximum_rows,
47
39
  }
@@ -54,7 +46,6 @@ class AppsflyerClient:
54
46
  )
55
47
 
56
48
  request_client = Client(
57
- request_timeout=10.0,
58
49
  raise_for_status=False,
59
50
  retry_condition=retry_on_limit,
60
51
  request_max_attempts=12,
@@ -68,39 +59,52 @@ class AppsflyerClient:
68
59
 
69
60
  if response.status_code == 200:
70
61
  result = response.json()
71
- yield result
62
+ yield standardize_keys(result, excluded_metrics)
72
63
  else:
73
64
  raise HTTPError(
74
- f"Request failed with status code: {response.status_code}"
65
+ f"Request failed with status code: {response.status_code}: {response.text}"
75
66
  )
76
67
 
77
68
  except requests.RequestException as e:
78
69
  raise HTTPError(f"Request failed: {e}")
79
70
 
80
- def fetch_campaigns(
81
- self,
82
- start_date: str,
83
- end_date: str,
84
- ):
85
- metrics = DEFAULT_KPIS + [
86
- "cohort_day_1_revenue_per_user",
87
- "cohort_day_1_total_revenue_per_user",
88
- "cohort_day_3_revenue_per_user",
89
- "cohort_day_3_total_revenue_per_user",
90
- "cohort_day_7_total_revenue_per_user",
91
- "cohort_day_7_revenue_per_user",
92
- "cohort_day_14_total_revenue_per_user",
93
- "cohort_day_14_revenue_per_user",
94
- "cohort_day_21_total_revenue_per_user",
95
- "cohort_day_21_revenue_per_user",
96
- "retention_day_7",
97
- ]
98
- return self._fetch_data(start_date, end_date, metrics=metrics)
99
71
 
100
- def fetch_creatives(
101
- self,
102
- start_date: str,
103
- end_date: str,
104
- ):
105
- dimensions = DEFAULT_GROUPING + ["af_adset_id", "af_adset", "af_ad_id"]
106
- return self._fetch_data(start_date, end_date, dimensions=dimensions)
72
+ def standardize_keys(data: list[dict], excluded_metrics: list[str]) -> list[dict]:
73
+ def fix_key(key: str) -> str:
74
+ return key.lower().replace("-", "").replace(" ", "_").replace(" ", "_")
75
+
76
+ standardized = []
77
+ for item in data:
78
+ standardized_item = {}
79
+ for key, value in item.items():
80
+ standardized_item[fix_key(key)] = value
81
+
82
+ for metric in excluded_metrics:
83
+ if metric not in standardized_item:
84
+ standardized_item[fix_key(metric)] = None
85
+
86
+ standardized.append(standardized_item)
87
+
88
+ return standardized
89
+
90
+
91
+ def exclude_metrics_for_date_range(
92
+ metrics: list[str], from_date: str, to_date: str
93
+ ) -> list[str]:
94
+ """
95
+ Some of the cohort metrics are not available if there hasn't been enough time to have data for that cohort.
96
+ This means if you request data for yesterday with cohort day 7 metrics, you will get an error because 7 days hasn't passed yet.
97
+ One would expect the API to handle this gracefully, but it doesn't.
98
+
99
+ This function will exclude the metrics that are not available for the given date range.
100
+ """
101
+ import pendulum
102
+
103
+ excluded_metrics = []
104
+ days_between_today_and_end = (pendulum.now() - pendulum.parse(to_date)).days # type: ignore
105
+ for metric in metrics:
106
+ if "cohort_day_" in metric:
107
+ day_count = int(metric.split("_")[2])
108
+ if days_between_today_and_end <= day_count:
109
+ excluded_metrics.append(metric)
110
+ return excluded_metrics
@@ -38,6 +38,7 @@ def app_store(
38
38
  name=resource.name,
39
39
  primary_key=resource.primary_key,
40
40
  columns=resource.columns,
41
+ write_disposition="merge",
41
42
  )(client, app_ids, resource.report_name, start_date, end_date)
42
43
 
43
44
 
@@ -1,6 +1,7 @@
1
1
  from typing import Any, Optional
2
2
 
3
3
  import dlt
4
+ import pyarrow as pa # type: ignore
4
5
  from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
5
6
  from dlt.extract.items import TTableHintTemplate
6
7
 
@@ -21,7 +22,6 @@ def memory_mapped_arrow(
21
22
  def arrow_mmap(
22
23
  incremental: Optional[dlt.sources.incremental[Any]] = incremental,
23
24
  ):
24
- import pyarrow as pa # type: ignore
25
25
  import pyarrow.ipc as ipc # type: ignore
26
26
 
27
27
  with pa.memory_map(path, "rb") as mmap:
@@ -71,3 +71,11 @@ def memory_mapped_arrow(
71
71
  yield table
72
72
 
73
73
  return arrow_mmap
74
+
75
+
76
+ BATCH_SIZE = 1000
77
+
78
+
79
+ def as_list(table: pa.Table):
80
+ for batch in table.to_batches(BATCH_SIZE):
81
+ yield from batch.to_pylist()
@@ -182,7 +182,7 @@ def tasks(
182
182
 
183
183
  @dlt.transformer(
184
184
  data_from=tasks,
185
- write_disposition="append",
185
+ write_disposition="replace",
186
186
  )
187
187
  @dlt.defer
188
188
  def stories(
@@ -0,0 +1,102 @@
1
+ from typing import Iterable, Iterator
2
+
3
+ import dlt
4
+ from dlt.sources import DltResource
5
+
6
+ from .helpers import AttioClient
7
+
8
+
9
+ @dlt.source(max_table_nesting=0)
10
+ def attio_source(
11
+ api_key: str,
12
+ params: list[str],
13
+ ) -> Iterable[DltResource]:
14
+ attio_client = AttioClient(api_key)
15
+
16
+ @dlt.resource(
17
+ name="objects",
18
+ write_disposition="replace",
19
+ columns={
20
+ "created_at": {"data_type": "timestamp", "partition": True},
21
+ },
22
+ )
23
+ # https://docs.attio.com/rest-api/endpoint-reference/objects/list-objects - does not support pagination
24
+ def fetch_objects() -> Iterator[dict]:
25
+ if len(params) != 0:
26
+ raise ValueError("Objects table must be in the format `objects`")
27
+
28
+ path = "objects"
29
+ yield attio_client.fetch_all(path, "get")
30
+
31
+ # https://docs.attio.com/rest-api/endpoint-reference/records/list-records
32
+ @dlt.resource(
33
+ name="records",
34
+ write_disposition="replace",
35
+ columns={
36
+ "created_at": {"data_type": "timestamp", "partition": True},
37
+ },
38
+ )
39
+ def fetch_records() -> Iterator[dict]:
40
+ if len(params) != 1:
41
+ raise ValueError(
42
+ "Records table must be in the format `records:{object_api_slug}`"
43
+ )
44
+ object_id = params[0]
45
+ path = f"objects/{object_id}/records/query"
46
+
47
+ yield attio_client.fetch_paginated(path, "post")
48
+
49
+ # https://docs.attio.com/rest-api/endpoint-reference/lists/list-all-lists -- does not support pagination
50
+ @dlt.resource(
51
+ name="lists",
52
+ write_disposition="replace",
53
+ columns={
54
+ "created_at": {"data_type": "timestamp", "partition": True},
55
+ },
56
+ )
57
+ def fetch_lists() -> Iterator[dict]:
58
+ path = "lists"
59
+ yield attio_client.fetch_all(path, "get")
60
+
61
+ # https://docs.attio.com/rest-api/endpoint-reference/entries/list-entries
62
+ @dlt.resource(
63
+ name="list_entries",
64
+ write_disposition="replace",
65
+ columns={
66
+ "created_at": {"data_type": "timestamp", "partition": True},
67
+ },
68
+ )
69
+ def fetch_list_entries() -> Iterator[dict]:
70
+ if len(params) != 1:
71
+ raise ValueError(
72
+ "List entries table must be in the format `list_entries:{list_id}`"
73
+ )
74
+ path = f"lists/{params[0]}/entries/query"
75
+
76
+ yield attio_client.fetch_paginated(path, "post")
77
+
78
+ @dlt.resource(
79
+ name="all_list_entries",
80
+ write_disposition="replace",
81
+ columns={
82
+ "created_at": {"data_type": "timestamp", "partition": True},
83
+ },
84
+ )
85
+ def fetch_all_list_entries() -> Iterator[dict]:
86
+ if len(params) != 1:
87
+ raise ValueError(
88
+ "All list entries table must be in the format `all_list_entries:{object_api_slug}`"
89
+ )
90
+ path = "lists"
91
+ for lst in attio_client.fetch_all(path, "get"):
92
+ if params[0] in lst["parent_object"]:
93
+ path = f"lists/{lst['id']['list_id']}/entries/query"
94
+ yield from attio_client.fetch_paginated(path, "post")
95
+
96
+ return (
97
+ fetch_objects,
98
+ fetch_records,
99
+ fetch_lists,
100
+ fetch_list_entries,
101
+ fetch_all_list_entries,
102
+ )
@@ -0,0 +1,65 @@
1
+ from ingestr.src.http_client import create_client
2
+
3
+
4
+ class AttioClient:
5
+ def __init__(self, api_key: str):
6
+ self.base_url = "https://api.attio.com/v2"
7
+ self.headers = {
8
+ "Accept": "application/json",
9
+ "Authorization": f"Bearer {api_key}",
10
+ }
11
+ self.client = create_client()
12
+
13
+ def fetch_paginated(self, path: str, method: str, limit: int = 1000, params=None):
14
+ url = f"{self.base_url}/{path}"
15
+ if params is None:
16
+ params = {}
17
+ offset = 0
18
+ while True:
19
+ query_params = {"limit": limit, "offset": offset, **params}
20
+ if method == "get":
21
+ response = self.client.get(
22
+ url, headers=self.headers, params=query_params
23
+ )
24
+ else:
25
+ json_body = {**params, "limit": limit, "offset": offset}
26
+ response = self.client.post(url, headers=self.headers, json=json_body)
27
+
28
+ if response.status_code != 200:
29
+ raise Exception(f"HTTP {response.status_code} error: {response.text}")
30
+
31
+ response_data = response.json()
32
+ if "data" not in response_data:
33
+ raise Exception(
34
+ "Attio API returned a response without the expected data"
35
+ )
36
+
37
+ data = response_data["data"]
38
+ for item in data:
39
+ flat_item = flatten_item(item)
40
+ yield flat_item
41
+ if len(data) < limit:
42
+ break
43
+
44
+ offset += limit
45
+
46
+ def fetch_all(self, path: str, method: str = "get", params=None):
47
+ url = f"{self.base_url}/{path}"
48
+ params = params or {}
49
+
50
+ if method == "get":
51
+ response = self.client.get(url, headers=self.headers, params=params)
52
+ else:
53
+ response = self.client.post(url, headers=self.headers, json=params)
54
+
55
+ response.raise_for_status()
56
+ data = response.json().get("data", [])
57
+ for item in data:
58
+ yield flatten_item(item)
59
+
60
+
61
+ def flatten_item(item: dict) -> dict:
62
+ if "id" in item:
63
+ for key, value in item["id"].items():
64
+ item[key] = value
65
+ return item
ingestr/src/blob.py CHANGED
@@ -1,11 +1,15 @@
1
1
  import warnings
2
2
  from typing import Tuple, TypeAlias
3
- from urllib.parse import ParseResult
3
+ from urllib.parse import ParseResult, urlparse
4
4
 
5
5
  BucketName: TypeAlias = str
6
6
  FileGlob: TypeAlias = str
7
7
 
8
8
 
9
+ class UnsupportedEndpointError(Exception):
10
+ pass
11
+
12
+
9
13
  def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
10
14
  """
11
15
  parse the URI of a blob storage and
@@ -14,19 +18,22 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
14
18
  Supports the following Forms:
15
19
  - uri: "gs://"
16
20
  table: "bucket-name/file-glob"
21
+ - uri: "gs://uri-bucket-name" (uri-bucket-name is preferred)
22
+ table: "gs://table-bucket-name/file-glob"
23
+ - uri: "gs://"
24
+ table: "gs://bucket-name/file-glob"
17
25
  - uri: gs://bucket-name/file-glob
18
26
  table: None
19
27
  - uri: "gs://bucket-name"
20
28
  table: "file-glob"
21
29
 
22
- The first form is the prefered method. Other forms are supported
23
- for backward compatibility, but discouraged.
30
+ The first form is the prefered method. Other forms are supported but discouraged.
24
31
  """
25
32
 
26
33
  table = table.strip()
27
34
  host = uri.netloc.strip()
28
35
 
29
- if table == "":
36
+ if table == "" or uri.path.strip() != "":
30
37
  warnings.warn(
31
38
  f"Using the form '{uri.scheme}://bucket-name/file-glob' is deprecated and will be removed in future versions.",
32
39
  DeprecationWarning,
@@ -34,16 +41,36 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
34
41
  )
35
42
  return host, uri.path.lstrip("/")
36
43
 
44
+ table_uri = urlparse(table)
45
+
37
46
  if host != "":
38
- warnings.warn(
39
- f"Using the form '{uri.scheme}://bucket-name' is deprecated and will be removed in future versions.",
40
- DeprecationWarning,
41
- stacklevel=2,
42
- )
43
- return host, table.lstrip("/")
47
+ return host, table_uri.path.lstrip("/")
48
+
49
+ if table_uri.hostname:
50
+ return table_uri.hostname, table_uri.path.lstrip("/")
44
51
 
45
- parts = table.lstrip("/").split("/", maxsplit=1)
52
+ parts = table_uri.path.lstrip("/").split("/", maxsplit=1)
46
53
  if len(parts) != 2:
47
54
  return "", parts[0]
48
55
 
49
56
  return parts[0], parts[1]
57
+
58
+
59
+ def parse_endpoint(path: str) -> str:
60
+ """
61
+ Parse the endpoint kind from the URI.
62
+
63
+ kind is a file format. one of [csv, jsonl, parquet]
64
+ """
65
+ file_extension = path.split(".")[-1]
66
+ if file_extension == "gz":
67
+ file_extension = path.split(".")[-2]
68
+ if file_extension == "csv":
69
+ endpoint = "read_csv"
70
+ elif file_extension == "jsonl":
71
+ endpoint = "read_jsonl"
72
+ elif file_extension == "parquet":
73
+ endpoint = "read_parquet"
74
+ else:
75
+ raise UnsupportedEndpointError(f"Unsupported file format: {file_extension}")
76
+ return endpoint
@@ -0,0 +1 @@
1
+ version = "v0.14.104"
@@ -75,7 +75,7 @@ def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
75
75
 
76
76
 
77
77
  @dlt.resource(
78
- write_disposition="append", columns={"end_time": {"data_type": "timestamp"}}
78
+ write_disposition="replace", columns={"end_time": {"data_type": "timestamp"}}
79
79
  )
80
80
  def players_games(
81
81
  players: List[str], start_month: str = None, end_month: str = None
@@ -0,0 +1,85 @@
1
+ """Simple ClickUp source."""
2
+
3
+ from datetime import datetime
4
+ from typing import Iterable
5
+
6
+ import dlt
7
+ import pendulum
8
+ from dlt.common.time import ensure_pendulum_datetime
9
+ from dlt.sources import DltResource
10
+
11
+ from .helpers import ClickupClient
12
+
13
+
14
+ @dlt.source(max_table_nesting=0)
15
+ def clickup_source(
16
+ api_token: str = dlt.secrets.value,
17
+ start_date: datetime = None,
18
+ end_date: datetime = None,
19
+ ) -> Iterable[DltResource]:
20
+ client = ClickupClient(api_token)
21
+
22
+ @dlt.resource(
23
+ name="user",
24
+ primary_key="id",
25
+ write_disposition="merge",
26
+ )
27
+ def user() -> Iterable[dict]:
28
+ data = client.get("/user")
29
+ yield data["user"]
30
+
31
+ @dlt.resource(name="teams", primary_key="id", write_disposition="merge")
32
+ def teams() -> Iterable[dict]:
33
+ for team in client.get_teams():
34
+ yield team
35
+
36
+ @dlt.resource(name="spaces", primary_key="id", write_disposition="merge")
37
+ def spaces() -> Iterable[dict]:
38
+ for space in client.get_spaces():
39
+ yield space
40
+
41
+ @dlt.resource(name="lists", write_disposition="merge", primary_key="id")
42
+ def lists() -> Iterable[dict]:
43
+ for list in client.get_lists():
44
+ yield list
45
+
46
+ @dlt.resource(
47
+ name="tasks",
48
+ write_disposition="merge",
49
+ primary_key="id",
50
+ columns={"date_updated": {"data_type": "timestamp"}},
51
+ )
52
+ def tasks(
53
+ date_updated: dlt.sources.incremental[str] = dlt.sources.incremental(
54
+ "date_updated",
55
+ initial_value=ensure_pendulum_datetime(start_date).in_timezone("UTC"),
56
+ range_end="closed",
57
+ range_start="closed",
58
+ ),
59
+ ) -> Iterable[dict]:
60
+ if date_updated.last_value:
61
+ start = ensure_pendulum_datetime(date_updated.last_value).in_timezone("UTC")
62
+ else:
63
+ start = ensure_pendulum_datetime(start_date).in_timezone("UTC")
64
+
65
+ if date_updated.end_value is None:
66
+ end = pendulum.now("UTC")
67
+ else:
68
+ end = date_updated.end_value.in_timezone("UTC")
69
+
70
+ for list_obj in client.get_lists():
71
+ for task in client.paginated(
72
+ f"/list/{list_obj['id']}/task", "tasks", {"page_size": 100}
73
+ ):
74
+ task_dt = ensure_pendulum_datetime(int(task["date_updated"]) / 1000)
75
+ if task_dt >= start and task_dt <= end:
76
+ task["date_updated"] = task_dt
77
+ yield task
78
+
79
+ return (
80
+ user,
81
+ teams,
82
+ spaces,
83
+ lists,
84
+ tasks,
85
+ )
@@ -0,0 +1,47 @@
1
+ from typing import Iterable, Optional
2
+
3
+ from ..http_client import create_client
4
+
5
+
6
+ class ClickupClient:
7
+ def __init__(self, api_token: str):
8
+ self.session = create_client()
9
+ self.base_url = "https://api.clickup.com/api/v2"
10
+ self.headers = {"Authorization": api_token}
11
+
12
+ def get(self, endpoint: str, params: Optional[dict] = None) -> dict:
13
+ url = f"{self.base_url}{endpoint}"
14
+ resp = self.session.get(url, headers=self.headers, params=params or {})
15
+ resp.raise_for_status()
16
+ return resp.json()
17
+
18
+ def paginated(
19
+ self, endpoint: str, key: str, params: Optional[dict] = None
20
+ ) -> Iterable[dict]:
21
+ page = 0
22
+ params = params or {}
23
+ while True:
24
+ params["page"] = page
25
+ data = self.get(endpoint, params)
26
+ items = data.get(key, data)
27
+ if not items:
28
+ break
29
+ for item in items:
30
+ yield item
31
+ if data.get("last_page") or len(items) < params.get("page_size", 100):
32
+ break
33
+ page += 1
34
+
35
+ def get_teams(self):
36
+ data = self.get("/team")
37
+ return data.get("teams", [])
38
+
39
+ def get_spaces(self):
40
+ for team in self.get_teams():
41
+ for space in self.paginated(f"/team/{team['id']}/space", "spaces"):
42
+ yield space
43
+
44
+ def get_lists(self):
45
+ for space in self.get_spaces():
46
+ for lst in self.paginated(f"/space/{space['id']}/list", "lists"):
47
+ yield lst
@@ -0,0 +1,43 @@
1
+ from typing import Optional
2
+
3
+ from dlt.common.runtime.collector import Collector
4
+ from rich.status import Status
5
+
6
+
7
+ class SpinnerCollector(Collector):
8
+ status: Status
9
+ current_step: str
10
+ started: bool
11
+
12
+ def __init__(self) -> None:
13
+ self.status = Status("Ingesting data...", spinner="dots")
14
+ self.started = False
15
+
16
+ def update(
17
+ self,
18
+ name: str,
19
+ inc: int = 1,
20
+ total: Optional[int] = None,
21
+ message: Optional[str] = None, # type: ignore
22
+ label: str = "",
23
+ **kwargs,
24
+ ) -> None:
25
+ self.status.update(self.current_step)
26
+
27
+ def _start(self, step: str) -> None:
28
+ self.current_step = self.__step_to_label(step)
29
+ self.status.start()
30
+
31
+ def __step_to_label(self, step: str) -> str:
32
+ verb = step.split(" ")[0].lower()
33
+ if verb.startswith("normalize"):
34
+ return "Normalizing the data"
35
+ elif verb.startswith("load"):
36
+ return "Loading the data to the destination"
37
+ elif verb.startswith("extract"):
38
+ return "Extracting the data from the source"
39
+
40
+ return f"{verb.capitalize()} the data"
41
+
42
+ def _stop(self) -> None:
43
+ self.status.stop()