ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,95 @@
1
+ """Fundraiseup source for ingesting donations, events, fundraisers, recurring plans, and supporters."""
2
+
3
+ from typing import Any, Dict, Generator, Iterable, TypedDict
4
+
5
+ import dlt
6
+ import pendulum
7
+ from dlt.common.time import ensure_pendulum_datetime
8
+ from dlt.sources import DltResource
9
+
10
+ from .client import FundraiseupClient
11
+
12
+
13
+ class DonationCursor(TypedDict):
14
+ id: str
15
+ created_at: pendulum.DateTime
16
+
17
+
18
+ def order_by_created(record) -> DonationCursor:
19
+ last_value = None
20
+ if len(record) == 1:
21
+ (record,) = record
22
+ else:
23
+ record, last_value = record
24
+
25
+ cursor: DonationCursor = {
26
+ "id": record["id"],
27
+ "created_at": ensure_pendulum_datetime(record["created_at"]),
28
+ }
29
+
30
+ if last_value is None:
31
+ return cursor
32
+
33
+ return max(cursor, last_value, key=lambda v: v["created_at"])
34
+
35
+
36
+ @dlt.source(name="fundraiseup", max_table_nesting=0)
37
+ def fundraiseup_source(api_key: str) -> Iterable[DltResource]:
38
+ """
39
+ Return resources for Fundraiseup API.
40
+
41
+ Args:
42
+ api_key: API key for authentication
43
+
44
+ Returns:
45
+ Iterable of DLT resources
46
+ """
47
+ client = FundraiseupClient(api_key=api_key)
48
+
49
+ # Define available resources and their configurations
50
+ resources = {
51
+ "donations": {"write_disposition": "replace", "primary_key": "id"},
52
+ "events": {"write_disposition": "replace", "primary_key": "id"},
53
+ "fundraisers": {"write_disposition": "replace", "primary_key": "id"},
54
+ "recurring_plans": {"write_disposition": "replace", "primary_key": "id"},
55
+ "supporters": {"write_disposition": "replace", "primary_key": "id"},
56
+ }
57
+
58
+ def create_resource(resource_name: str, config: Dict[str, Any]) -> DltResource:
59
+ """Create a DLT resource dynamically."""
60
+
61
+ @dlt.resource(
62
+ name=resource_name,
63
+ write_disposition=config["write_disposition"],
64
+ primary_key=config["primary_key"],
65
+ )
66
+ def generic_resource() -> Generator[Dict[str, Any], None, None]:
67
+ """Generic resource that yields batches directly."""
68
+ for batch in client.get_paginated_data(resource_name):
69
+ yield batch # type: ignore[misc]
70
+
71
+ return generic_resource()
72
+
73
+ @dlt.resource(
74
+ name="donations:incremental",
75
+ write_disposition="merge",
76
+ primary_key="id",
77
+ )
78
+ def donations_incremental(
79
+ last_record: dlt.sources.incremental[DonationCursor] = dlt.sources.incremental(
80
+ "$",
81
+ range_start="closed",
82
+ range_end="closed",
83
+ last_value_func=order_by_created,
84
+ ),
85
+ ):
86
+ params = {}
87
+ if last_record.last_value is not None:
88
+ params["starting_after"] = last_record.last_value["id"]
89
+ for batch in client.get_paginated_data("donations", params=params):
90
+ yield batch # type: ignore[misc]
91
+
92
+ # Return all resources
93
+ return [donations_incremental] + [
94
+ create_resource(name, config) for name, config in resources.items()
95
+ ]
@@ -0,0 +1,81 @@
1
+ """Fundraiseup API Client for handling authentication and paginated requests."""
2
+
3
+ from typing import Any, Dict, Iterator, Optional
4
+
5
+ from ingestr.src.http_client import create_client
6
+
7
+
8
+ class FundraiseupClient:
9
+ """Client for interacting with Fundraiseup API v1."""
10
+
11
+ def __init__(self, api_key: str):
12
+ """
13
+ Initialize Fundraiseup API client.
14
+
15
+ Args:
16
+ api_key: API key for authentication
17
+ """
18
+ self.api_key = api_key
19
+ self.base_url = "https://api.fundraiseup.com/v1"
20
+ # Use shared HTTP client with retry logic for rate limiting
21
+ self.client = create_client(retry_status_codes=[429, 500, 502, 503, 504])
22
+
23
+ def get_paginated_data(
24
+ self,
25
+ endpoint: str,
26
+ params: Optional[Dict[str, Any]] = None,
27
+ page_size: int = 100,
28
+ ) -> Iterator[list[Dict[str, Any]]]:
29
+ """
30
+ Fetch paginated data from a Fundraiseup API endpoint using cursor-based pagination.
31
+
32
+ Args:
33
+ endpoint: API endpoint path (e.g., "donations")
34
+ params: Additional query parameters
35
+ page_size: Number of items per page (default 100)
36
+
37
+ Yields:
38
+ Batches of items from the API
39
+ """
40
+ url = f"{self.base_url}/{endpoint}"
41
+ headers = {
42
+ "Authorization": f"Bearer {self.api_key}",
43
+ "Content-Type": "application/json",
44
+ }
45
+
46
+ if params is None:
47
+ params = {}
48
+
49
+ params["limit"] = page_size
50
+ starting_after = None
51
+
52
+ while True:
53
+ # Add cursor for pagination if not first page
54
+ if starting_after:
55
+ params["starting_after"] = starting_after
56
+
57
+ response = self.client.get(url=url, headers=headers, params=params)
58
+ response.raise_for_status()
59
+
60
+ data = response.json()
61
+
62
+ # Handle both list response and object with data array
63
+ if isinstance(data, list):
64
+ items = data
65
+ has_more = len(items) == page_size
66
+ else:
67
+ items = data.get("data", [])
68
+ has_more = data.get("has_more", False)
69
+
70
+ if not items:
71
+ break
72
+
73
+ yield items
74
+
75
+ # Set cursor for next page
76
+ if has_more and items:
77
+ starting_after = items[-1].get("id")
78
+ if not starting_after:
79
+ break
80
+ else:
81
+ break
@@ -4,13 +4,14 @@ import urllib.parse
4
4
  from typing import Iterator, Optional, Sequence
5
5
 
6
6
  import dlt
7
+ import pendulum
7
8
  from dlt.common.typing import TDataItems
8
9
  from dlt.sources import DltResource
9
10
 
10
11
  from .helpers import get_reactions_data, get_rest_pages, get_stargazers
11
12
 
12
13
 
13
- @dlt.source
14
+ @dlt.source(max_table_nesting=0)
14
15
  def github_reactions(
15
16
  owner: str,
16
17
  name: str,
@@ -67,7 +68,11 @@ def github_reactions(
67
68
 
68
69
  @dlt.source(max_table_nesting=0)
69
70
  def github_repo_events(
70
- owner: str, name: str, access_token: Optional[str] = None
71
+ owner: str,
72
+ name: str,
73
+ access_token: str,
74
+ start_date: pendulum.DateTime,
75
+ end_date: Optional[pendulum.DateTime] = None,
71
76
  ) -> DltResource:
72
77
  """Gets events for repository `name` with owner `owner` incrementally.
73
78
 
@@ -86,11 +91,14 @@ def github_repo_events(
86
91
  """
87
92
 
88
93
  # use naming function in table name to generate separate tables for each event
89
- @dlt.resource(primary_key="id", table_name=lambda i: i["type"])
94
+ @dlt.resource(
95
+ primary_key="id", table_name=lambda i: i["type"], write_disposition="merge"
96
+ )
90
97
  def repo_events(
91
98
  last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
92
99
  "created_at",
93
- initial_value="1970-01-01T00:00:00Z",
100
+ initial_value=start_date.isoformat(),
101
+ end_value=end_date.isoformat() if end_date else None,
94
102
  last_value_func=max,
95
103
  range_end="closed",
96
104
  range_start="closed",
@@ -100,8 +108,35 @@ def github_repo_events(
100
108
  f"/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(name)}/events"
101
109
  )
102
110
 
111
+ # Get the date range from the incremental state
112
+ start_filter = pendulum.parse(
113
+ last_created_at.last_value or last_created_at.initial_value
114
+ )
115
+ end_filter = (
116
+ pendulum.parse(last_created_at.end_value)
117
+ if last_created_at.end_value
118
+ else pendulum.now()
119
+ )
120
+
103
121
  for page in get_rest_pages(access_token, repos_path + "?per_page=100"):
104
- yield page
122
+ # Filter events by date range
123
+ filtered_events = []
124
+ for event in page:
125
+ event_date = pendulum.parse(event["created_at"])
126
+
127
+ # Check if event is within the date range
128
+ if event_date >= start_filter:
129
+ if end_filter is None or event_date <= end_filter:
130
+ filtered_events.append(event)
131
+ elif event_date > end_filter:
132
+ # Skip events that are newer than our end date
133
+ continue
134
+ else:
135
+ # Events are ordered by date desc, so if we hit an older event, we can stop
136
+ break
137
+
138
+ if filtered_events:
139
+ yield filtered_events
105
140
 
106
141
  # stop requesting pages if the last element was already older than initial value
107
142
  # note: incremental will skip those items anyway, we just do not want to use the api limits
@@ -114,7 +149,7 @@ def github_repo_events(
114
149
  return repo_events
115
150
 
116
151
 
117
- @dlt.source
152
+ @dlt.source(max_table_nesting=0)
118
153
  def github_stargazers(
119
154
  owner: str,
120
155
  name: str,
@@ -103,9 +103,9 @@ def get_reactions_data(
103
103
 
104
104
 
105
105
  def _extract_top_connection(data: StrAny, node_type: str) -> StrAny:
106
- assert (
107
- isinstance(data, dict) and len(data) == 1
108
- ), f"The data with list of {node_type} must be a dictionary and contain only one element"
106
+ assert isinstance(data, dict) and len(data) == 1, (
107
+ f"The data with list of {node_type} must be a dictionary and contain only one element"
108
+ )
109
109
  data = next(iter(data.values()))
110
110
  return data[node_type] # type: ignore
111
111
 
@@ -158,7 +158,7 @@ def _get_graphql_pages(
158
158
  )
159
159
  items_count += len(data_items)
160
160
  print(
161
- f'Got {len(data_items)}/{items_count} {node_type}s, query cost {rate_limit["cost"]}, remaining credits: {rate_limit["remaining"]}'
161
+ f"Got {len(data_items)}/{items_count} {node_type}s, query cost {rate_limit['cost']}, remaining credits: {rate_limit['remaining']}"
162
162
  )
163
163
  if data_items:
164
164
  yield data_items
@@ -187,7 +187,7 @@ def _get_comment_reaction(comment_ids: List[str], access_token: str) -> StrAny:
187
187
  # print(query)
188
188
  page, rate_limit = _run_graphql_query(access_token, query, {})
189
189
  print(
190
- f'Got {len(page)} comments, query cost {rate_limit["cost"]}, remaining credits: {rate_limit["remaining"]}'
190
+ f"Got {len(page)} comments, query cost {rate_limit['cost']}, remaining credits: {rate_limit['remaining']}"
191
191
  )
192
192
  data.update(page)
193
193
  return data
@@ -7,15 +7,16 @@ from typing import Iterator, List, Optional, Union
7
7
  import dlt
8
8
  from dlt.common import pendulum
9
9
  from dlt.common.typing import DictStrAny, TDataItem
10
- from dlt.extract import DltResource
10
+ from dlt.sources import DltResource
11
11
  from dlt.sources.credentials import GcpOAuthCredentials, GcpServiceAccountCredentials
12
12
  from google.analytics.data_v1beta import BetaAnalyticsDataClient
13
13
  from google.analytics.data_v1beta.types import (
14
14
  Dimension,
15
15
  Metric,
16
+ MinuteRange,
16
17
  )
17
18
 
18
- from .helpers import get_report
19
+ from .helpers import get_realtime_report, get_report
19
20
 
20
21
 
21
22
  @dlt.source(max_table_nesting=0)
@@ -29,6 +30,7 @@ def google_analytics(
29
30
  start_date: Optional[pendulum.DateTime] = pendulum.datetime(2024, 1, 1),
30
31
  end_date: Optional[pendulum.DateTime] = None,
31
32
  rows_per_page: int = 10000,
33
+ minute_range_objects: List[MinuteRange] | None = None,
32
34
  ) -> List[DltResource]:
33
35
  try:
34
36
  property_id = int(property_id)
@@ -58,7 +60,7 @@ def google_analytics(
58
60
  dimensions = query["dimensions"]
59
61
 
60
62
  @dlt.resource(
61
- name="basic_report",
63
+ name="custom",
62
64
  merge_key=datetime_dimension,
63
65
  write_disposition="merge",
64
66
  )
@@ -87,6 +89,22 @@ def google_analytics(
87
89
  end_date=end_date,
88
90
  )
89
91
 
92
+ # real time report
93
+ @dlt.resource(
94
+ name="realtime",
95
+ merge_key="ingested_at",
96
+ write_disposition="merge",
97
+ )
98
+ def real_time_report() -> Iterator[TDataItem]:
99
+ yield from get_realtime_report(
100
+ client=client,
101
+ property_id=property_id,
102
+ dimension_list=[Dimension(name=dimension) for dimension in dimensions],
103
+ metric_list=[Metric(name=metric) for metric in query["metrics"]],
104
+ per_page=rows_per_page,
105
+ minute_range_objects=minute_range_objects,
106
+ )
107
+
90
108
  # res = dlt.resource(
91
109
  # basic_report, name="basic_report", merge_key=datetime_dimension, write_disposition="merge"
92
110
  # )(
@@ -103,4 +121,4 @@ def google_analytics(
103
121
  # ),
104
122
  # )
105
123
 
106
- return [basic_report]
124
+ return [basic_report, real_time_report]
@@ -2,8 +2,10 @@
2
2
  This module contains helpers that process data and make it ready for loading into the database
3
3
  """
4
4
 
5
+ import base64
5
6
  import json
6
7
  from typing import Any, Iterator, List, Union
8
+ from urllib.parse import parse_qs, urlparse
7
9
 
8
10
  import proto
9
11
  from dlt.common.exceptions import MissingDependencyException
@@ -22,6 +24,8 @@ try:
22
24
  Metric,
23
25
  MetricMetadata, # noqa: F401
24
26
  MetricType,
27
+ MinuteRange,
28
+ RunRealtimeReportRequest,
25
29
  RunReportRequest,
26
30
  RunReportResponse,
27
31
  )
@@ -52,6 +56,53 @@ def to_dict(item: Any) -> Iterator[TDataItem]:
52
56
  yield item
53
57
 
54
58
 
59
+ def get_realtime_report(
60
+ client: Resource,
61
+ property_id: int,
62
+ dimension_list: List[Dimension],
63
+ metric_list: List[Metric],
64
+ per_page: int,
65
+ minute_range_objects: List[MinuteRange] | None = None,
66
+ ) -> Iterator[TDataItem]:
67
+ """
68
+ Gets all the possible pages of reports with the given query parameters.
69
+ Processes every page and yields a dictionary for every row of the report.
70
+
71
+ Args:
72
+ client: The Google Analytics client used to make requests.
73
+ property_id: A reference to the Google Analytics project.
74
+ More info: https://developers.google.com/analytics/devguides/reporting/data/v1/property-id
75
+ dimension_list: A list of all the dimensions requested in the query.
76
+ metric_list: A list of all the metrics requested in the query.
77
+ limit: Describes how many rows there should be per page.
78
+
79
+ Yields:
80
+ Generator of all rows of data in the report.
81
+ """
82
+ offset = 0
83
+ ingest_at = pendulum.now().to_date_string()
84
+
85
+ while True:
86
+ request = RunRealtimeReportRequest(
87
+ property=f"properties/{property_id}",
88
+ dimensions=dimension_list,
89
+ metrics=metric_list,
90
+ limit=per_page,
91
+ minute_ranges=minute_range_objects if minute_range_objects else None,
92
+ )
93
+ response = client.run_realtime_report(request)
94
+
95
+ # process request
96
+ processed_response_generator = process_report(
97
+ response=response, ingest_at=ingest_at
98
+ )
99
+ # import pdb; pdb.set_trace()
100
+ yield from processed_response_generator
101
+ offset += per_page
102
+ if len(response.rows) < per_page or offset > 1000000:
103
+ break
104
+
105
+
55
106
  def get_report(
56
107
  client: Resource,
57
108
  property_id: int,
@@ -79,10 +130,6 @@ def get_report(
79
130
  Generator of all rows of data in the report.
80
131
  """
81
132
 
82
- print(
83
- "fetching for daterange", start_date.to_date_string(), end_date.to_date_string()
84
- )
85
-
86
133
  offset = 0
87
134
  while True:
88
135
  request = RunReportRequest(
@@ -98,9 +145,11 @@ def get_report(
98
145
  )
99
146
  ],
100
147
  )
101
- # process request
102
148
  response = client.run_report(request)
149
+
150
+ # process request
103
151
  processed_response_generator = process_report(response=response)
152
+
104
153
  # import pdb; pdb.set_trace()
105
154
  yield from processed_response_generator
106
155
  offset += per_page
@@ -108,7 +157,9 @@ def get_report(
108
157
  break
109
158
 
110
159
 
111
- def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
160
+ def process_report(
161
+ response: RunReportResponse, ingest_at: str | None = None
162
+ ) -> Iterator[TDataItems]:
112
163
  metrics_headers = [header.name for header in response.metric_headers]
113
164
  dimensions_headers = [header.name for header in response.dimension_headers]
114
165
 
@@ -131,6 +182,8 @@ def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
131
182
  metric_type=metric_type, value=row.metric_values[i].value
132
183
  )
133
184
  response_dict[metrics_headers[i]] = metric_value
185
+ if ingest_at is not None:
186
+ response_dict["ingested_at"] = ingest_at
134
187
 
135
188
  unique_key = "-".join(list(response_dict.keys()))
136
189
  if unique_key not in distinct_key_combinations:
@@ -170,3 +223,68 @@ def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
170
223
  return pendulum.from_format(dimension_value, "YYYYMMDDHHmm", tz="UTC")
171
224
  else:
172
225
  return dimension_value
226
+
227
+
228
+ def convert_minutes_ranges_to_minute_range_objects(
229
+ minutes_ranges: str,
230
+ ) -> List[MinuteRange]:
231
+ minutes_ranges = minutes_ranges.strip()
232
+ minutes = minutes_ranges.replace(" ", "").split(",")
233
+ if minutes == "":
234
+ raise ValueError(
235
+ "Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
236
+ )
237
+
238
+ minute_range_objects = []
239
+ for min_range in minutes:
240
+ if "-" not in min_range:
241
+ raise ValueError(
242
+ "Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
243
+ )
244
+ parts = min_range.split("-")
245
+
246
+ if not parts[0].isdigit() or not parts[1].isdigit():
247
+ raise ValueError(
248
+ f"Invalid input '{min_range}'. Both start and end minutes must be digits. For example: 1-2,5-6"
249
+ )
250
+
251
+ end_minutes_ago = int(parts[0])
252
+ start_minutes_ago = int(parts[1])
253
+ minute_range_objects.append(
254
+ MinuteRange(
255
+ name=f"{end_minutes_ago}-{start_minutes_ago} minutes ago",
256
+ start_minutes_ago=start_minutes_ago,
257
+ end_minutes_ago=end_minutes_ago,
258
+ )
259
+ )
260
+
261
+ return minute_range_objects
262
+
263
+
264
+ def parse_google_analytics_uri(uri: str):
265
+ parse_uri = urlparse(uri)
266
+ source_fields = parse_qs(parse_uri.query)
267
+ cred_path = source_fields.get("credentials_path")
268
+ cred_base64 = source_fields.get("credentials_base64")
269
+
270
+ if not cred_path and not cred_base64:
271
+ raise ValueError(
272
+ "credentials_path or credentials_base64 is required to connect Google Analytics"
273
+ )
274
+ credentials = {}
275
+ if cred_path:
276
+ with open(cred_path[0], "r") as f:
277
+ credentials = json.load(f)
278
+ elif cred_base64:
279
+ credentials = json.loads(base64.b64decode(cred_base64[0]).decode("utf-8"))
280
+
281
+ property_id = source_fields.get("property_id")
282
+ if not property_id:
283
+ raise ValueError("property_id is required to connect to Google Analytics")
284
+
285
+ if (not cred_path and not cred_base64) or (not property_id):
286
+ raise ValueError(
287
+ "credentials_path or credentials_base64 and property_id are required to connect Google Analytics"
288
+ )
289
+
290
+ return {"credentials": credentials, "property_id": property_id[0]}
@@ -70,9 +70,9 @@ def google_spreadsheet(
70
70
  spreadsheet_id=spreadsheet_id,
71
71
  range_names=list(all_range_names),
72
72
  )
73
- assert len(all_range_names) == len(
74
- all_range_data
75
- ), "Google Sheets API must return values for all requested ranges"
73
+ assert len(all_range_names) == len(all_range_data), (
74
+ "Google Sheets API must return values for all requested ranges"
75
+ )
76
76
 
77
77
  # get metadata for two first rows of each range
78
78
  # first should contain headers
@@ -126,7 +126,7 @@ def google_spreadsheet(
126
126
  headers = get_range_headers(headers_metadata, name)
127
127
  if headers is None:
128
128
  # generate automatic headers and treat the first row as data
129
- headers = [f"col_{idx+1}" for idx in range(len(headers_metadata))]
129
+ headers = [f"col_{idx + 1}" for idx in range(len(headers_metadata))]
130
130
  data_row_metadata = headers_metadata
131
131
  rows_data = values[0:]
132
132
  logger.warning(
@@ -149,12 +149,12 @@ def get_range_headers(headers_metadata: List[DictStrAny], range_name: str) -> Li
149
149
  header_val = str(f"col_{idx + 1}")
150
150
  else:
151
151
  logger.warning(
152
- f"In range {range_name}, header value: {header_val} at position {idx+1} is not a string!"
152
+ f"In range {range_name}, header value: {header_val} at position {idx + 1} is not a string!"
153
153
  )
154
154
  return None
155
155
  else:
156
156
  logger.warning(
157
- f"In range {range_name}, header at position {idx+1} is not missing!"
157
+ f"In range {range_name}, header at position {idx + 1} is not missing!"
158
158
  )
159
159
  return None
160
160
  headers.append(header_val)