ingestr 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -57,8 +57,9 @@ class SpinnerCollector(Collector):
57
57
  name: str,
58
58
  inc: int = 1,
59
59
  total: Optional[int] = None,
60
- message: Optional[str] = None,
60
+ message: Optional[str] = None, # type: ignore
61
61
  label: str = "",
62
+ **kwargs,
62
63
  ) -> None:
63
64
  self.status.update(self.current_step)
64
65
 
@@ -443,7 +444,7 @@ def ingest(
443
444
 
444
445
  progressInstance: Collector = SpinnerCollector()
445
446
  if progress == Progress.log:
446
- progressInstance = LogCollector(dump_system_stats=False)
447
+ progressInstance = LogCollector()
447
448
 
448
449
  is_pipelines_dir_temp = False
449
450
  if pipelines_dir is None:
@@ -1,7 +1,3 @@
1
- """Source that loads tables form Airtable.
2
- Supports whitelisting of tables or loading of all tables from a specified base.
3
- """
4
-
5
1
  from typing import Any, Optional
6
2
 
7
3
  import dlt
@@ -150,7 +150,10 @@ def tasks(
150
150
  project_array: t.List[TDataItem],
151
151
  access_token: str = dlt.secrets.value,
152
152
  modified_at: dlt.sources.incremental[str] = dlt.sources.incremental(
153
- "modified_at", initial_value=DEFAULT_START_DATE
153
+ "modified_at",
154
+ initial_value=DEFAULT_START_DATE,
155
+ range_end="closed",
156
+ range_start="closed",
154
157
  ),
155
158
  fields: Iterable[str] = TASK_FIELDS,
156
159
  ) -> Iterable[TDataItem]:
@@ -159,7 +159,10 @@ def facebook_insights_source(
159
159
  )
160
160
  def facebook_insights(
161
161
  date_start: dlt.sources.incremental[str] = dlt.sources.incremental(
162
- "date_start", initial_value=initial_load_start_date_str
162
+ "date_start",
163
+ initial_value=initial_load_start_date_str,
164
+ range_end="closed",
165
+ range_start="closed",
163
166
  ),
164
167
  ) -> Iterator[TDataItems]:
165
168
  start_date = get_start_date(date_start, attribution_window_days_lag)
@@ -38,7 +38,9 @@ def readers(
38
38
  """
39
39
  filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
40
40
  filesystem_resource.apply_hints(
41
- incremental=dlt.sources.incremental("modification_date")
41
+ incremental=dlt.sources.incremental("modification_date"),
42
+ range_end="closed",
43
+ range_start="closed",
42
44
  )
43
45
  return (
44
46
  filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
@@ -14,7 +14,7 @@ from .helpers import get_reactions_data, get_rest_pages, get_stargazers
14
14
  def github_reactions(
15
15
  owner: str,
16
16
  name: str,
17
- access_token: str = dlt.secrets.value,
17
+ access_token: str,
18
18
  items_per_page: int = 100,
19
19
  max_items: Optional[int] = None,
20
20
  ) -> Sequence[DltResource]:
@@ -89,7 +89,11 @@ def github_repo_events(
89
89
  @dlt.resource(primary_key="id", table_name=lambda i: i["type"])
90
90
  def repo_events(
91
91
  last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
92
- "created_at", initial_value="1970-01-01T00:00:00Z", last_value_func=max
92
+ "created_at",
93
+ initial_value="1970-01-01T00:00:00Z",
94
+ last_value_func=max,
95
+ range_end="closed",
96
+ range_start="closed",
93
97
  ),
94
98
  ) -> Iterator[TDataItems]:
95
99
  repos_path = (
@@ -114,7 +118,7 @@ def github_repo_events(
114
118
  def github_stargazers(
115
119
  owner: str,
116
120
  name: str,
117
- access_token: str = dlt.secrets.value,
121
+ access_token: str,
118
122
  items_per_page: int = 100,
119
123
  max_items: Optional[int] = None,
120
124
  ) -> Sequence[DltResource]:
@@ -2,26 +2,32 @@
2
2
  Defines all the sources and resources needed for Google Analytics V4
3
3
  """
4
4
 
5
- from typing import List, Optional, Union
5
+ from typing import Iterator, List, Optional, Union
6
6
 
7
7
  import dlt
8
- from dlt.common.typing import DictStrAny
9
- from dlt.sources import DltResource
8
+ from dlt.common import pendulum
9
+ from dlt.common.typing import DictStrAny, TDataItem
10
+ from dlt.extract import DltResource
10
11
  from dlt.sources.credentials import GcpOAuthCredentials, GcpServiceAccountCredentials
11
12
  from google.analytics.data_v1beta import BetaAnalyticsDataClient
13
+ from google.analytics.data_v1beta.types import (
14
+ Dimension,
15
+ Metric,
16
+ )
12
17
 
13
- from .helpers import basic_report
18
+ from .helpers import get_report
14
19
 
15
20
 
16
21
  @dlt.source(max_table_nesting=0)
17
22
  def google_analytics(
18
- datetime: str,
23
+ datetime_dimension: str,
19
24
  credentials: Union[
20
25
  GcpOAuthCredentials, GcpServiceAccountCredentials
21
26
  ] = dlt.secrets.value,
22
27
  property_id: int = dlt.config.value,
23
28
  queries: List[DictStrAny] = dlt.config.value,
24
- start_date: Optional[str] = "2015-08-14",
29
+ start_date: Optional[pendulum.DateTime] = pendulum.datetime(2024, 1, 1),
30
+ end_date: Optional[pendulum.DateTime] = None,
25
31
  rows_per_page: int = 10000,
26
32
  ) -> List[DltResource]:
27
33
  try:
@@ -50,21 +56,51 @@ def google_analytics(
50
56
 
51
57
  # always add "date" to dimensions so we are able to track the last day of a report
52
58
  dimensions = query["dimensions"]
53
- resource_name = query["resource_name"]
54
59
 
55
- res = dlt.resource(
56
- basic_report, name="basic_report", merge_key=datetime, write_disposition="merge"
57
- )(
58
- client=client,
59
- rows_per_page=rows_per_page,
60
- property_id=property_id,
61
- dimensions=dimensions,
62
- metrics=query["metrics"],
63
- resource_name=resource_name,
64
- start_date=start_date,
65
- last_date=dlt.sources.incremental(
66
- datetime
67
- ), # pass empty primary key to avoid unique checks, a primary key defined by the resource will be used
60
+ @dlt.resource(
61
+ name="basic_report",
62
+ merge_key=datetime_dimension,
63
+ write_disposition="merge",
68
64
  )
65
+ def basic_report(
66
+ incremental=dlt.sources.incremental(
67
+ datetime_dimension,
68
+ initial_value=start_date,
69
+ end_value=end_date,
70
+ range_end="closed",
71
+ range_start="closed",
72
+ ),
73
+ ) -> Iterator[TDataItem]:
74
+ start_date = incremental.last_value
75
+ end_date = incremental.end_value
76
+ if start_date is None:
77
+ start_date = pendulum.datetime(2024, 1, 1)
78
+ if end_date is None:
79
+ end_date = pendulum.yesterday()
80
+ yield from get_report(
81
+ client=client,
82
+ property_id=property_id,
83
+ dimension_list=[Dimension(name=dimension) for dimension in dimensions],
84
+ metric_list=[Metric(name=metric) for metric in query["metrics"]],
85
+ per_page=rows_per_page,
86
+ start_date=start_date,
87
+ end_date=end_date,
88
+ )
89
+
90
+ # res = dlt.resource(
91
+ # basic_report, name="basic_report", merge_key=datetime_dimension, write_disposition="merge"
92
+ # )(
93
+ # client=client,
94
+ # rows_per_page=rows_per_page,
95
+ # property_id=property_id,
96
+ # dimensions=dimensions,
97
+ # metrics=query["metrics"],
98
+ # resource_name=resource_name,
99
+ # last_date=dlt.sources.incremental(
100
+ # datetime_dimension,
101
+ # initial_value=start_date,
102
+ # end_value=end_date,
103
+ # ),
104
+ # )
69
105
 
70
- return [res]
106
+ return [basic_report]
@@ -57,9 +57,9 @@ def get_report(
57
57
  property_id: int,
58
58
  dimension_list: List[Dimension],
59
59
  metric_list: List[Metric],
60
- limit: int,
61
- start_date: str,
62
- end_date: str,
60
+ per_page: int,
61
+ start_date: pendulum.DateTime,
62
+ end_date: pendulum.DateTime,
63
63
  ) -> Iterator[TDataItem]:
64
64
  """
65
65
  Gets all the possible pages of reports with the given query parameters.
@@ -79,30 +79,36 @@ def get_report(
79
79
  Generator of all rows of data in the report.
80
80
  """
81
81
 
82
- request = RunReportRequest(
83
- property=f"properties/{property_id}",
84
- dimensions=dimension_list,
85
- metrics=metric_list,
86
- limit=limit,
87
- date_ranges=[DateRange(start_date=start_date, end_date=end_date)],
82
+ print(
83
+ "fetching for daterange", start_date.to_date_string(), end_date.to_date_string()
88
84
  )
89
- # process request
90
- response = client.run_report(request)
91
- processed_response_generator = process_report(response=response)
92
- yield from processed_response_generator
93
85
 
86
+ offset = 0
87
+ while True:
88
+ request = RunReportRequest(
89
+ property=f"properties/{property_id}",
90
+ dimensions=dimension_list,
91
+ metrics=metric_list,
92
+ limit=per_page,
93
+ offset=offset,
94
+ date_ranges=[
95
+ DateRange(
96
+ start_date=start_date.to_date_string(),
97
+ end_date=end_date.to_date_string(),
98
+ )
99
+ ],
100
+ )
101
+ # process request
102
+ response = client.run_report(request)
103
+ processed_response_generator = process_report(response=response)
104
+ # import pdb; pdb.set_trace()
105
+ yield from processed_response_generator
106
+ offset += per_page
107
+ if len(response.rows) < per_page or offset > 1000000:
108
+ break
94
109
 
95
- def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
96
- """
97
- Receives a single page for a report response, processes it, and returns a generator for every row of data in the report page.
98
-
99
- Args:
100
- response: The API response for a single page of the report.
101
-
102
- Yields:
103
- Generator of dictionaries for every row of the report page.
104
- """
105
110
 
111
+ def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
106
112
  metrics_headers = [header.name for header in response.metric_headers]
107
113
  dimensions_headers = [header.name for header in response.dimension_headers]
108
114
 
@@ -156,16 +162,6 @@ def process_metric_value(metric_type: MetricType, value: str) -> Union[str, int,
156
162
 
157
163
 
158
164
  def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
159
- """
160
- Helper function that receives a dimension's name and value and converts it to a datetime object if needed.
161
-
162
- Args:
163
- dimension_name: Name of the dimension.
164
- dimension_value: Value of the dimension.
165
-
166
- Returns:
167
- The value of the dimension with the correct data type.
168
- """
169
165
  if dimension_name == "date":
170
166
  return pendulum.from_format(dimension_value, "YYYYMMDD", tz="UTC")
171
167
  elif dimension_name == "dateHour":
@@ -116,7 +116,9 @@ def gorgias_source(
116
116
  },
117
117
  )
118
118
  def customers(
119
- updated_datetime=dlt.sources.incremental("updated_datetime", start_date_obj),
119
+ updated_datetime=dlt.sources.incremental(
120
+ "updated_datetime", start_date_obj, range_end="closed", range_start="closed"
121
+ ),
120
122
  ) -> Iterable[TDataItem]:
121
123
  """
122
124
  The resource for customers on your Gorgias domain, supports incremental loading and pagination.
@@ -290,7 +292,9 @@ def gorgias_source(
290
292
  },
291
293
  )
292
294
  def tickets(
293
- updated_datetime=dlt.sources.incremental("updated_datetime", start_date_obj),
295
+ updated_datetime=dlt.sources.incremental(
296
+ "updated_datetime", start_date_obj, range_end="closed", range_start="closed"
297
+ ),
294
298
  ) -> Iterable[TDataItem]:
295
299
  """
296
300
  The resource for tickets on your Gorgias domain, supports incremental loading and pagination.
@@ -481,7 +485,9 @@ def gorgias_source(
481
485
  },
482
486
  )
483
487
  def ticket_messages(
484
- updated_datetime=dlt.sources.incremental("updated_datetime", start_date_obj),
488
+ updated_datetime=dlt.sources.incremental(
489
+ "updated_datetime", start_date_obj, range_end="closed", range_start="closed"
490
+ ),
485
491
  ) -> Iterable[TDataItem]:
486
492
  """
487
493
  The resource for ticket messages on your Gorgias domain, supports incremental loading and pagination.
@@ -566,7 +572,9 @@ def gorgias_source(
566
572
  },
567
573
  )
568
574
  def satisfaction_surveys(
569
- updated_datetime=dlt.sources.incremental("updated_datetime", start_date_obj),
575
+ updated_datetime=dlt.sources.incremental(
576
+ "updated_datetime", start_date_obj, range_end="closed", range_start="closed"
577
+ ),
570
578
  ) -> Iterable[TDataItem]:
571
579
  """
572
580
  The resource for satisfaction surveys on your Gorgias domain, supports incremental loading and pagination.
@@ -278,4 +278,11 @@ def hubspot_events_for_objects(
278
278
  write_disposition="append",
279
279
  selected=True,
280
280
  table_name=lambda e: name + "_" + str(e["eventType"]),
281
- )(dlt.sources.incremental("occurredAt", initial_value=start_date.isoformat()))
281
+ )(
282
+ dlt.sources.incremental(
283
+ "occurredAt",
284
+ initial_value=start_date.isoformat(),
285
+ range_end="closed",
286
+ range_start="closed",
287
+ )
288
+ )
@@ -33,7 +33,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
33
33
 
34
34
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
35
35
  def events(
36
- datetime=dlt.sources.incremental("datetime", start_date_obj.isoformat()),
36
+ datetime=dlt.sources.incremental(
37
+ "datetime",
38
+ start_date_obj.isoformat(),
39
+ range_end="closed",
40
+ range_start="closed",
41
+ ),
37
42
  ) -> Iterable[TDataItem]:
38
43
  intervals = split_date_range(
39
44
  pendulum.parse(datetime.start_value), pendulum.now()
@@ -44,7 +49,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
44
49
 
45
50
  @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
46
51
  def profiles(
47
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
52
+ updated=dlt.sources.incremental(
53
+ "updated",
54
+ start_date_obj.isoformat(),
55
+ range_end="closed",
56
+ range_start="closed",
57
+ ),
48
58
  ) -> Iterable[TDataItem]:
49
59
  intervals = split_date_range(
50
60
  pendulum.parse(updated.start_value), pendulum.now()
@@ -55,7 +65,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
55
65
 
56
66
  @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
57
67
  def campaigns(
58
- updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
68
+ updated_at=dlt.sources.incremental(
69
+ "updated_at",
70
+ start_date_obj.isoformat(),
71
+ range_end="closed",
72
+ range_start="closed",
73
+ ),
59
74
  ) -> Iterable[TDataItem]:
60
75
  intervals = split_date_range(
61
76
  pendulum.parse(updated_at.start_value), pendulum.now()
@@ -69,7 +84,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
69
84
 
70
85
  @dlt.resource(write_disposition="merge", primary_key="id")
71
86
  def metrics(
72
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
87
+ updated=dlt.sources.incremental(
88
+ "updated",
89
+ start_date_obj.isoformat(),
90
+ range_end="closed",
91
+ range_start="closed",
92
+ ),
73
93
  ) -> Iterable[TDataItem]:
74
94
  yield from client.fetch_metrics(create_client(), updated.start_value)
75
95
 
@@ -83,7 +103,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
83
103
 
84
104
  @dlt.resource(write_disposition="merge", primary_key="id", name="catalog-variants")
85
105
  def catalog_variants(
86
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
106
+ updated=dlt.sources.incremental(
107
+ "updated",
108
+ start_date_obj.isoformat(),
109
+ range_end="closed",
110
+ range_start="closed",
111
+ ),
87
112
  ) -> Iterable[TDataItem]:
88
113
  yield from client.fetch_catalog_variant(create_client(), updated.start_value)
89
114
 
@@ -91,19 +116,34 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
91
116
  write_disposition="merge", primary_key="id", name="catalog-categories"
92
117
  )
93
118
  def catalog_categories(
94
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
119
+ updated=dlt.sources.incremental(
120
+ "updated",
121
+ start_date_obj.isoformat(),
122
+ range_end="closed",
123
+ range_start="closed",
124
+ ),
95
125
  ) -> Iterable[TDataItem]:
96
126
  yield from client.fetch_catalog_categories(create_client(), updated.start_value)
97
127
 
98
128
  @dlt.resource(write_disposition="merge", primary_key="id", name="catalog-items")
99
129
  def catalog_items(
100
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
130
+ updated=dlt.sources.incremental(
131
+ "updated",
132
+ start_date_obj.isoformat(),
133
+ range_end="closed",
134
+ range_start="closed",
135
+ ),
101
136
  ) -> Iterable[TDataItem]:
102
137
  yield from client.fetch_catalog_item(create_client(), updated.start_value)
103
138
 
104
139
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
105
140
  def forms(
106
- updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
141
+ updated_at=dlt.sources.incremental(
142
+ "updated_at",
143
+ start_date_obj.isoformat(),
144
+ range_end="closed",
145
+ range_start="closed",
146
+ ),
107
147
  ) -> Iterable[TDataItem]:
108
148
  intervals = split_date_range(
109
149
  pendulum.parse(updated_at.start_value), pendulum.now()
@@ -114,13 +154,23 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
114
154
 
115
155
  @dlt.resource(write_disposition="merge", primary_key="id")
116
156
  def lists(
117
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
157
+ updated=dlt.sources.incremental(
158
+ "updated",
159
+ start_date_obj.isoformat(),
160
+ range_end="closed",
161
+ range_start="closed",
162
+ ),
118
163
  ) -> Iterable[TDataItem]:
119
164
  yield from client.fetch_lists(create_client(), updated.start_value)
120
165
 
121
166
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
122
167
  def images(
123
- updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
168
+ updated_at=dlt.sources.incremental(
169
+ "updated_at",
170
+ start_date_obj.isoformat(),
171
+ range_end="closed",
172
+ range_start="closed",
173
+ ),
124
174
  ) -> Iterable[TDataItem]:
125
175
  intervals = split_date_range(
126
176
  pendulum.parse(updated_at.start_value), pendulum.now()
@@ -130,13 +180,23 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
130
180
 
131
181
  @dlt.resource(write_disposition="merge", primary_key="id")
132
182
  def segments(
133
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
183
+ updated=dlt.sources.incremental(
184
+ "updated",
185
+ start_date_obj.isoformat(),
186
+ range_end="closed",
187
+ range_start="closed",
188
+ ),
134
189
  ) -> Iterable[TDataItem]:
135
190
  yield from client.fetch_segments(create_client(), updated.start_value)
136
191
 
137
192
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
138
193
  def flows(
139
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
194
+ updated=dlt.sources.incremental(
195
+ "updated",
196
+ start_date_obj.isoformat(),
197
+ range_end="closed",
198
+ range_start="closed",
199
+ ),
140
200
  ) -> Iterable[TDataItem]:
141
201
  intervals = split_date_range(
142
202
  pendulum.parse(updated.start_value), pendulum.now()
@@ -146,7 +206,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
146
206
 
147
207
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
148
208
  def templates(
149
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
209
+ updated=dlt.sources.incremental(
210
+ "updated",
211
+ start_date_obj.isoformat(),
212
+ range_end="closed",
213
+ range_start="closed",
214
+ ),
150
215
  ) -> Iterable[TDataItem]:
151
216
  intervals = split_date_range(
152
217
  pendulum.parse(updated.start_value), pendulum.now()
@@ -158,6 +158,8 @@ def shopify_source(
158
158
  initial_value=start_date_obj,
159
159
  end_value=end_date_obj,
160
160
  allow_external_schedulers=True,
161
+ range_end="closed",
162
+ range_start="closed",
161
163
  ),
162
164
  created_at_min: pendulum.DateTime = created_at_min_obj,
163
165
  items_per_page: int = items_per_page,
@@ -606,6 +608,8 @@ def shopify_source(
606
608
  initial_value=start_date_obj,
607
609
  end_value=end_date_obj,
608
610
  allow_external_schedulers=True,
611
+ range_end="closed",
612
+ range_start="closed",
609
613
  ),
610
614
  created_at_min: pendulum.DateTime = created_at_min_obj,
611
615
  items_per_page: int = items_per_page,
@@ -640,6 +644,8 @@ def shopify_source(
640
644
  initial_value=start_date_obj,
641
645
  end_value=end_date_obj,
642
646
  allow_external_schedulers=True,
647
+ range_end="closed",
648
+ range_start="closed",
643
649
  ),
644
650
  created_at_min: pendulum.DateTime = created_at_min_obj,
645
651
  items_per_page: int = items_per_page,
@@ -671,6 +677,8 @@ def shopify_source(
671
677
  "created_at",
672
678
  initial_value=start_date_obj,
673
679
  end_value=end_date_obj,
680
+ range_end="closed",
681
+ range_start="closed",
674
682
  ),
675
683
  items_per_page: int = items_per_page,
676
684
  ) -> Iterable[TDataItem]:
@@ -689,6 +697,8 @@ def shopify_source(
689
697
  "updated_at",
690
698
  initial_value=start_date_obj,
691
699
  end_value=end_date_obj,
700
+ range_end="closed",
701
+ range_start="closed",
692
702
  ),
693
703
  items_per_page: int = items_per_page,
694
704
  ) -> Iterable[TDataItem]:
@@ -730,6 +740,8 @@ def shopify_source(
730
740
  initial_value=start_date_obj,
731
741
  end_value=end_date_obj,
732
742
  allow_external_schedulers=True,
743
+ range_end="closed",
744
+ range_start="closed",
733
745
  ),
734
746
  items_per_page: int = items_per_page,
735
747
  ) -> Iterable[TDataItem]:
@@ -1807,6 +1819,8 @@ query discountNodes($after: String, $query: String, $first: Int) {
1807
1819
  "updatedAt",
1808
1820
  initial_value=start_date_obj,
1809
1821
  end_value=end_date_obj,
1822
+ range_end="closed",
1823
+ range_start="closed",
1810
1824
  ),
1811
1825
  items_per_page: int = items_per_page,
1812
1826
  ) -> Iterable[TDataItem]:
@@ -175,6 +175,8 @@ def slack_source(
175
175
  initial_value=start_dt,
176
176
  end_value=end_dt,
177
177
  allow_external_schedulers=True,
178
+ range_end="closed",
179
+ range_start="closed",
178
180
  ),
179
181
  ) -> Iterable[TDataItem]:
180
182
  """
@@ -198,6 +200,8 @@ def slack_source(
198
200
  initial_value=start_dt,
199
201
  end_value=end_dt,
200
202
  allow_external_schedulers=True,
203
+ range_end="closed",
204
+ range_start="closed",
201
205
  ),
202
206
  ) -> Iterable[TDataItem]:
203
207
  """Yield all messages for a given channel as a DLT resource. Keep blocks column without normalization.
ingestr/src/sources.py CHANGED
@@ -18,7 +18,6 @@ from urllib.parse import ParseResult, parse_qs, quote, urlparse
18
18
 
19
19
  import dlt
20
20
  import pendulum
21
- import sqlalchemy
22
21
  from dlt.common.configuration.specs import (
23
22
  AwsCredentials,
24
23
  )
@@ -42,7 +41,6 @@ from dlt.sources.sql_database.schema_types import (
42
41
  )
43
42
  from sqlalchemy import Column
44
43
  from sqlalchemy import types as sa
45
- from sqlalchemy.dialects import mysql
46
44
 
47
45
  from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
48
46
  from ingestr.src.adjust.adjust_helpers import parse_filters
@@ -67,6 +65,12 @@ from ingestr.src.mongodb import mongodb_collection
67
65
  from ingestr.src.notion import notion_databases
68
66
  from ingestr.src.shopify import shopify_source
69
67
  from ingestr.src.slack import slack_source
68
+ from ingestr.src.sql_database.callbacks import (
69
+ chained_query_adapter_callback,
70
+ custom_query_variable_subsitution,
71
+ limit_callback,
72
+ type_adapter_callback,
73
+ )
70
74
  from ingestr.src.stripe_analytics import stripe_source
71
75
  from ingestr.src.table_definition import TableDefinition, table_string_to_dataclass
72
76
  from ingestr.src.tiktok_ads import tiktok_source
@@ -103,21 +107,18 @@ class SqlSource:
103
107
  kwargs.get("incremental_key", ""),
104
108
  initial_value=start_value,
105
109
  end_value=end_value,
110
+ range_end="closed",
111
+ range_start="closed",
106
112
  )
107
113
 
108
114
  if uri.startswith("mysql://"):
109
115
  uri = uri.replace("mysql://", "mysql+pymysql://")
110
116
 
111
- reflection_level = kwargs.get("sql_reflection_level")
112
-
113
- query_adapter_callback = None
117
+ query_adapters = []
114
118
  if kwargs.get("sql_limit"):
115
-
116
- def query_adapter_callback(query, table):
117
- query = query.limit(kwargs.get("sql_limit"))
118
- if kwargs.get("incremental_key"):
119
- query = query.order_by(kwargs.get("incremental_key"))
120
- return query
119
+ query_adapters.append(
120
+ limit_callback(kwargs.get("sql_limit"), kwargs.get("incremental_key"))
121
+ )
121
122
 
122
123
  defer_table_reflect = False
123
124
  sql_backend = kwargs.get("sql_backend", "sqlalchemy")
@@ -196,38 +197,10 @@ class SqlSource:
196
197
  if getattr(engine, "may_dispose_after_use", False):
197
198
  engine.dispose()
198
199
 
199
- dlt.sources.sql_database.table_rows = table_rows
200
-
201
- def query_adapter_callback(query, table, incremental=None, engine=None):
202
- params = {}
203
- if incremental:
204
- params["interval_start"] = (
205
- incremental.last_value
206
- if incremental.last_value is not None
207
- else datetime(year=1, month=1, day=1)
208
- )
209
- if incremental.end_value is not None:
210
- params["interval_end"] = incremental.end_value
211
- else:
212
- if ":interval_start" in query_value:
213
- params["interval_start"] = (
214
- datetime.min
215
- if kwargs.get("interval_start") is None
216
- else kwargs.get("interval_start")
217
- )
218
- if ":interval_end" in query_value:
219
- params["interval_end"] = (
220
- datetime.max
221
- if kwargs.get("interval_end") is None
222
- else kwargs.get("interval_end")
223
- )
200
+ dlt.sources.sql_database.table_rows = table_rows # type: ignore
224
201
 
225
- return sqlalchemy.text(query_value).bindparams(**params)
226
-
227
- def type_adapter_callback(sql_type):
228
- if isinstance(sql_type, mysql.SET):
229
- return sa.JSON
230
- return sql_type
202
+ # override the query adapters, the only one we want is the one here in the case of custom queries
203
+ query_adapters = [custom_query_variable_subsitution(query_value, kwargs)]
231
204
 
232
205
  builder_res = self.table_builder(
233
206
  credentials=ConnectionStringCredentials(uri),
@@ -236,8 +209,8 @@ class SqlSource:
236
209
  incremental=incremental,
237
210
  backend=sql_backend,
238
211
  chunk_size=kwargs.get("page_size", None),
239
- reflection_level=reflection_level,
240
- query_adapter_callback=query_adapter_callback,
212
+ reflection_level=kwargs.get("sql_reflection_level", None),
213
+ query_adapter_callback=chained_query_adapter_callback(query_adapters),
241
214
  type_adapter_callback=type_adapter_callback,
242
215
  table_adapter_callback=table_adapter_exclude_columns(
243
216
  kwargs.get("sql_exclude_columns", [])
@@ -267,6 +240,8 @@ class ArrowMemoryMappedSource:
267
240
  kwargs.get("incremental_key", ""),
268
241
  initial_value=start_value,
269
242
  end_value=end_value,
243
+ range_end="closed",
244
+ range_start="closed",
270
245
  )
271
246
 
272
247
  file_path = uri.split("://")[1]
@@ -312,6 +287,8 @@ class MongoDbSource:
312
287
  kwargs.get("incremental_key", ""),
313
288
  initial_value=start_value,
314
289
  end_value=end_value,
290
+ range_end="closed",
291
+ range_start="closed",
315
292
  )
316
293
 
317
294
  table_instance = self.table_builder(
@@ -380,6 +357,8 @@ class LocalCsvSource:
380
357
  kwargs.get("incremental_key", ""),
381
358
  initial_value=kwargs.get("interval_start"),
382
359
  end_value=kwargs.get("interval_end"),
360
+ range_end="closed",
361
+ range_start="closed",
383
362
  )
384
363
  )
385
364
 
@@ -1338,6 +1317,8 @@ class DynamoDBSource:
1338
1317
  incremental_key.strip(),
1339
1318
  initial_value=isotime(kwargs.get("interval_start")),
1340
1319
  end_value=isotime(kwargs.get("interval_end")),
1320
+ range_end="closed",
1321
+ range_start="closed",
1341
1322
  )
1342
1323
 
1343
1324
  return dynamodb(table, creds, incremental)
@@ -1363,11 +1344,6 @@ class GoogleAnalyticsSource:
1363
1344
  if not property_id:
1364
1345
  raise ValueError("property_id is required to connect to Google Analytics")
1365
1346
 
1366
- interval_start = kwargs.get("interval_start")
1367
- start_date = (
1368
- interval_start.strftime("%Y-%m-%d") if interval_start else "2015-08-14"
1369
- )
1370
-
1371
1347
  fields = table.split(":")
1372
1348
  if len(fields) != 3:
1373
1349
  raise ValueError(
@@ -1391,10 +1367,19 @@ class GoogleAnalyticsSource:
1391
1367
  {"resource_name": "custom", "dimensions": dimensions, "metrics": metrics}
1392
1368
  ]
1393
1369
 
1370
+ start_date = pendulum.now().subtract(days=30).start_of("day")
1371
+ if kwargs.get("interval_start") is not None:
1372
+ start_date = pendulum.instance(kwargs.get("interval_start")) # type: ignore
1373
+
1374
+ end_date = pendulum.now()
1375
+ if kwargs.get("interval_end") is not None:
1376
+ end_date = pendulum.instance(kwargs.get("interval_end")) # type: ignore
1377
+
1394
1378
  return google_analytics(
1395
1379
  property_id=property_id[0],
1396
1380
  start_date=start_date,
1397
- datetime=datetime,
1381
+ end_date=end_date,
1382
+ datetime_dimension=datetime,
1398
1383
  queries=queries,
1399
1384
  credentials=credentials,
1400
1385
  ).with_resources("basic_report")
@@ -1425,9 +1410,7 @@ class GitHubSource:
1425
1410
  "repo variable is required to retrieve data for a specific repository from GitHub."
1426
1411
  )
1427
1412
 
1428
- access_token = source_fields.get("access_token", [None])[0]
1429
- if not access_token and table not in ["repo_events"]:
1430
- raise ValueError("access_token is required to connect with GitHub")
1413
+ access_token = source_fields.get("access_token", [""])[0]
1431
1414
 
1432
1415
  if table in ["issues", "pull_requests"]:
1433
1416
  return github_reactions(
File without changes
@@ -0,0 +1,66 @@
1
+ from datetime import datetime
2
+
3
+ from sqlalchemy import text
4
+ from sqlalchemy import types as sa
5
+ from sqlalchemy.dialects import mysql
6
+
7
+
8
+ def type_adapter_callback(sql_type):
9
+ if isinstance(sql_type, mysql.SET):
10
+ return sa.JSON
11
+ return sql_type
12
+
13
+
14
+ def chained_query_adapter_callback(query_adapters):
15
+ """
16
+ This function is used to chain multiple query adapters together,.
17
+ This gives us the flexibility to introduce various adapters based on the given command parameters.
18
+ """
19
+
20
+ def callback(query, table):
21
+ for adapter in query_adapters:
22
+ query = adapter(query, table)
23
+
24
+ return query
25
+
26
+ return callback
27
+
28
+
29
+ def limit_callback(sql_limit: int, incremental_key: str):
30
+ def callback(query, table):
31
+ query = query.limit(sql_limit)
32
+ if incremental_key:
33
+ query = query.order_by(incremental_key)
34
+ return query
35
+
36
+ return callback
37
+
38
+
39
+ def custom_query_variable_subsitution(query_value: str, kwargs: dict):
40
+ def callback(query, table, incremental=None, engine=None):
41
+ params = {}
42
+ if incremental:
43
+ params["interval_start"] = (
44
+ incremental.last_value
45
+ if incremental.last_value is not None
46
+ else datetime(year=1, month=1, day=1)
47
+ )
48
+ if incremental.end_value is not None:
49
+ params["interval_end"] = incremental.end_value
50
+ else:
51
+ if ":interval_start" in query_value:
52
+ params["interval_start"] = (
53
+ datetime.min
54
+ if kwargs.get("interval_start") is None
55
+ else kwargs.get("interval_start")
56
+ )
57
+ if ":interval_end" in query_value:
58
+ params["interval_end"] = (
59
+ datetime.max
60
+ if kwargs.get("interval_end") is None
61
+ else kwargs.get("interval_end")
62
+ )
63
+
64
+ return text(query_value).bindparams(**params)
65
+
66
+ return callback
@@ -84,7 +84,10 @@ def incremental_stripe_source(
84
84
  def incremental_resource(
85
85
  endpoint: str,
86
86
  created: Optional[Any] = dlt.sources.incremental(
87
- "created", initial_value=start_date_unix
87
+ "created",
88
+ initial_value=start_date_unix,
89
+ range_end="closed",
90
+ range_start="closed",
88
91
  ),
89
92
  ) -> Generator[Dict[Any, Any], Any, None]:
90
93
  start_value = created.last_value
@@ -110,7 +110,12 @@ def tiktok_source(
110
110
  )
111
111
  def custom_reports(
112
112
  datetime=(
113
- dlt.sources.incremental(incremental_loading_param, start_date)
113
+ dlt.sources.incremental(
114
+ incremental_loading_param,
115
+ start_date,
116
+ range_end="closed",
117
+ range_start="closed",
118
+ )
114
119
  if is_incremental
115
120
  else None
116
121
  ),
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.12.4"
1
+ __version__ = "0.12.6"
@@ -260,6 +260,8 @@ def zendesk_support(
260
260
  initial_value=start_date_ts,
261
261
  end_value=end_date_ts,
262
262
  allow_external_schedulers=True,
263
+ range_end="closed",
264
+ range_start="closed",
263
265
  ),
264
266
  ) -> Iterator[TDataItem]:
265
267
  # URL For ticket events
@@ -294,6 +296,8 @@ def zendesk_support(
294
296
  initial_value=start_date_obj,
295
297
  end_value=end_date_obj,
296
298
  allow_external_schedulers=True,
299
+ range_end="closed",
300
+ range_start="closed",
297
301
  ),
298
302
  ) -> Iterator[TDataItem]:
299
303
  """
@@ -340,6 +344,8 @@ def zendesk_support(
340
344
  initial_value=start_date_iso_str,
341
345
  end_value=end_date_iso_str,
342
346
  allow_external_schedulers=True,
347
+ range_end="closed",
348
+ range_start="closed",
343
349
  ),
344
350
  ) -> Iterator[TDataItem]:
345
351
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.4
3
+ Version: 0.12.6
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -17,15 +17,16 @@ Requires-Python: >=3.9
17
17
  Requires-Dist: asana==3.2.3
18
18
  Requires-Dist: confluent-kafka>=2.6.1
19
19
  Requires-Dist: databricks-sql-connector==2.9.3
20
- Requires-Dist: dlt==1.4.0
20
+ Requires-Dist: dlt==1.5.0
21
21
  Requires-Dist: duckdb-engine==0.13.5
22
22
  Requires-Dist: duckdb==1.1.3
23
23
  Requires-Dist: facebook-business==20.0.0
24
- Requires-Dist: google-analytics-data==0.18.15
24
+ Requires-Dist: google-analytics-data==0.18.16
25
25
  Requires-Dist: google-api-python-client==2.130.0
26
26
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
27
27
  Requires-Dist: mysql-connector-python==9.1.0
28
28
  Requires-Dist: pendulum==3.0.0
29
+ Requires-Dist: psutil==6.1.1
29
30
  Requires-Dist: psycopg2-binary==2.9.10
30
31
  Requires-Dist: py-machineid==0.6.0
31
32
  Requires-Dist: pyairtable==2.3.3
@@ -34,7 +35,7 @@ Requires-Dist: pyathena==3.9.0
34
35
  Requires-Dist: pymongo==4.10.1
35
36
  Requires-Dist: pymysql==1.1.1
36
37
  Requires-Dist: pyrate-limiter==3.7.0
37
- Requires-Dist: redshift-connector==2.1.3
38
+ Requires-Dist: redshift-connector==2.1.5
38
39
  Requires-Dist: rich==13.9.4
39
40
  Requires-Dist: rudder-sdk-python==2.1.4
40
41
  Requires-Dist: s3fs==2024.10.0
@@ -1,52 +1,51 @@
1
- ingestr/main.py,sha256=AG6ycOEpCyBN1qEOzW3j8sKK8KX0mrBAL-A25MdRldY,24712
1
+ ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
4
4
  ingestr/src/factory.py,sha256=aE7TjHzONb4DKYcfh_6-CJJfvs4lmw7iUySvSm4yQbM,4516
5
5
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
6
- ingestr/src/sources.py,sha256=zkK24y3jyucbrW2MU3i0Rx1SImZWatM9_A_8Wa7ExCM,51887
6
+ ingestr/src/sources.py,sha256=GIskUoVL82x_mLerU9cgdixBNNhzBnDN-_MDraqK7hY,51166
7
7
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
8
8
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
9
- ingestr/src/version.py,sha256=DoMS9KOhsApLyuLYhLEsd5nmoLFQ_IvVkEs_jKRzFk8,23
9
+ ingestr/src/version.py,sha256=vb8hPdq1CrFlRl6aBYGOWE4MPv-N84JJm1f3KFvG8o4,23
10
10
  ingestr/src/adjust/__init__.py,sha256=NaRNwDhItG8Q7vUHw7zQvyfWjmT32M0CSc5ufjmBM9U,3067
11
11
  ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
12
12
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
13
13
  ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
14
14
  ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
15
- ingestr/src/arrow/__init__.py,sha256=AgU7S9Ra3ZeeG00Mf32zxO5sgMFfRnTdOSirUJ1Pu10,2976
16
- ingestr/src/asana_source/__init__.py,sha256=Y4Ti_876Yong420fQ2o4A97TdgrZNlZVxlTMLyXdSjA,8116
15
+ ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0,2848
16
+ ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
17
17
  ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
18
18
  ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
19
19
  ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
20
20
  ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
21
21
  ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
22
22
  ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
23
- ingestr/src/facebook_ads/__init__.py,sha256=ZZyogV48gmhDcC3CYQEsC4qT3Q6JI9IOnMff2NS1M-A,9207
23
+ ingestr/src/facebook_ads/__init__.py,sha256=reEpSr4BaKA1wO3qVgCH51gW-TgWkbJ_g24UIhJWbac,9286
24
24
  ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
25
25
  ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
26
26
  ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_tl6UTFk,4881
27
- ingestr/src/filesystem/__init__.py,sha256=wHHaKFuAjsR_ZRjl6g_Flf6FhVs9qhwREthTr03_7cc,4162
27
+ ingestr/src/filesystem/__init__.py,sha256=hcN_sO356ChTPyg72AufrikdkFBBIScTdxtGfDm-W0E,4221
28
28
  ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
29
29
  ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
30
- ingestr/src/github/__init__.py,sha256=csA2VcjOxXrVrvp7zY-JodO9Lpy98bJ4AqRdHCLTcGM,5838
30
+ ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e4,5889
31
31
  ingestr/src/github/helpers.py,sha256=Tmnik9811zBWNO6cJwV9PFQxEx2j32LHAQCvNbubsEI,6759
32
32
  ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
33
33
  ingestr/src/github/settings.py,sha256=N5ahWrDIQ_4IWV9i-hTXxyYduqY9Ym2BTwqsWxcDdJ8,258
34
- ingestr/src/google_analytics/__init__.py,sha256=HjA13wfJm2MGfy3h_DiM5ekkNqM2dgwYCKJ3pprnDtI,2482
35
- ingestr/src/google_analytics/helpers/__init__.py,sha256=y_q7dinlEwNBEpq6kCzjTa8lAhe2bb23bDPP0fcy7fY,2744
36
- ingestr/src/google_analytics/helpers/data_processing.py,sha256=fIdEKr9CmZN_s1T2i9BL8IYTPPqNoK6Vaquq2y8StfE,6072
34
+ ingestr/src/google_analytics/__init__.py,sha256=8Evpmoy464YpNbCI_NmvFHIzWCu7J7SjJw-RrPZ6AL8,3674
35
+ ingestr/src/google_analytics/helpers.py,sha256=vLmFyQ_IEJEK5LlxBJQeJw0VHaE5gRRZdBa54U72CaQ,5965
37
36
  ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
38
37
  ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
39
38
  ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
40
39
  ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9kowZvQUCUxz0G6SlI,5404
41
40
  ingestr/src/google_sheets/helpers/data_processing.py,sha256=WYO6z4XjGcG0Hat2J2enb-eLX5mSNVb2vaqRE83FBWU,11000
42
- ingestr/src/gorgias/__init__.py,sha256=LZ3m6aGuhLVI3eNjvQE0rT4o_wbSPkY_SDKsM-g0V5U,21176
41
+ ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
43
42
  ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
44
- ingestr/src/hubspot/__init__.py,sha256=LshHlFzzs8trAOxSg7C9F7zIBakqsg8XfyNBouip09w,9761
43
+ ingestr/src/hubspot/__init__.py,sha256=DXvn1yGToFUKk-1mMqqoN0OCLNpD16-2mPyEmkhyoVY,9876
45
44
  ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KFE,6727
46
45
  ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
47
46
  ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
48
47
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
49
- ingestr/src/klaviyo/_init_.py,sha256=nq2T1p3Xc7yiwGabsZBp2Jy2fa8_n5oxqxBnUGhKOgg,6592
48
+ ingestr/src/klaviyo/_init_.py,sha256=ucWHqBe8DQvXVpbmxKFAV5ljpCFb4ps_2QTD0OSiWxY,7905
50
49
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
51
50
  ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
52
51
  ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
@@ -56,21 +55,23 @@ ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMO
56
55
  ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
56
  ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falPrnKg_y4,5500
58
57
  ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
59
- ingestr/src/shopify/__init__.py,sha256=Hhv84zRfVsqAGP7pz-PmeopeX9CGu7TXSm3PSXHEwIA,62689
58
+ ingestr/src/shopify/__init__.py,sha256=PF_6VQnS065Br1UzSIekTVXBu3WtrMQL_v5CfbfaX5Y,63151
60
59
  ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
61
60
  ingestr/src/shopify/helpers.py,sha256=NfHD6lWXe88ybR0ri-FCQuh2Vf8l5WG0a0FVjmdoSC4,6296
62
61
  ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSsDk,141
63
- ingestr/src/slack/__init__.py,sha256=UF-ficQ6K32u1EHytW3P35suACo9wuc6nMrAPViyZL8,9981
62
+ ingestr/src/slack/__init__.py,sha256=pyDukxcilqTAe_bBzfWJ8Vxi83S-XEdEFBH2pEgILrM,10113
64
63
  ingestr/src/slack/helpers.py,sha256=08TLK7vhFvH_uekdLVOLF3bTDe1zgH0QxHObXHzk1a8,6545
65
64
  ingestr/src/slack/settings.py,sha256=NhKn4y1zokEa5EmIZ05wtj_-I0GOASXZ5V81M1zXCtY,457
66
- ingestr/src/stripe_analytics/__init__.py,sha256=VEXH4to2vNojN4rk3qsypR7urtTzaxSBB3IBiD5tuoE,4514
65
+ ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
+ ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG4kLd7tMU,2002
67
+ ingestr/src/stripe_analytics/__init__.py,sha256=0HCL0qsrh_si1RR3a4k9XS94VWQ4v9aG7CqXF-V-57M,4593
67
68
  ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
68
69
  ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
69
70
  ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
70
71
  ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
71
- ingestr/src/tiktok_ads/__init__.py,sha256=U4ZHPUW0c4LpKx4hjT2Lz5hgWFgwQSbAAkkYIrxYHZo,4469
72
+ ingestr/src/tiktok_ads/__init__.py,sha256=aEqCl3dTH6_d43s1jgAeG1UasEls_SlorORulYMwIL8,4590
72
73
  ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=cfdPflCeR_mCk5fxq0v4d7pzlvZDiAoz8bWQJYqKALM,3935
73
- ingestr/src/zendesk/__init__.py,sha256=C7HkN195DGdOHId2_Sa_kAlcBrUmnVYZUa_tPkiyf1Q,17564
74
+ ingestr/src/zendesk/__init__.py,sha256=tmJ_jdb6kpwmEKpcv6Im71-bOZI6h-Tcofe18OH4I24,17762
74
75
  ingestr/src/zendesk/settings.py,sha256=Vdj706nTJFQ-3KH4nO97iYCQuba3dV3E9gfnmLK6xwU,2294
75
76
  ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1Ydq5FDH_0c,888
76
77
  ingestr/src/zendesk/helpers/api_helpers.py,sha256=dMkNn4ZQXgJTDOXAAXdmRt41phNFoRhYyPaLJih0pZY,4184
@@ -84,8 +85,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
84
85
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
85
86
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
86
87
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
87
- ingestr-0.12.4.dist-info/METADATA,sha256=VN9cqnH_rmALlSxePi6XOxOxndDGLYWTW0K6eafYVDw,7956
88
- ingestr-0.12.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
89
- ingestr-0.12.4.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
90
- ingestr-0.12.4.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
91
- ingestr-0.12.4.dist-info/RECORD,,
88
+ ingestr-0.12.6.dist-info/METADATA,sha256=y-o_BL8nj7pVQU3sSaz9UJ9XsNVUi8Rjf5G0vNGi6io,7985
89
+ ingestr-0.12.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
90
+ ingestr-0.12.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
91
+ ingestr-0.12.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
92
+ ingestr-0.12.6.dist-info/RECORD,,
@@ -1,70 +0,0 @@
1
- """Google analytics source helpers"""
2
-
3
- from typing import Iterator, List
4
-
5
- import dlt
6
- from apiclient.discovery import Resource # type: ignore
7
- from dlt.common import logger, pendulum
8
- from dlt.common.typing import TDataItem
9
- from google.analytics.data_v1beta.types import (
10
- Dimension,
11
- Metric,
12
- )
13
- from pendulum.datetime import DateTime
14
-
15
- from .data_processing import get_report
16
-
17
-
18
- def basic_report(
19
- client: Resource,
20
- rows_per_page: int,
21
- dimensions: List[str],
22
- metrics: List[str],
23
- property_id: int,
24
- resource_name: str,
25
- start_date: str,
26
- last_date: dlt.sources.incremental[DateTime],
27
- ) -> Iterator[TDataItem]:
28
- """
29
- Retrieves the data for a report given dimensions, metrics, and filters required for the report.
30
-
31
- Args:
32
- client: The Google Analytics client used to make requests.
33
- dimensions: Dimensions for the report. See metadata for the full list of dimensions.
34
- metrics: Metrics for the report. See metadata for the full list of metrics.
35
- property_id: A reference to the Google Analytics project.
36
- More info: https://developers.google.com/analytics/devguides/reporting/data/v1/property-id
37
- rows_per_page: Controls how many rows are retrieved per page in the reports.
38
- Default is 10000, maximum possible is 100000.
39
- resource_name: The resource name used to save incremental into dlt state.
40
- start_date: Incremental load start_date.
41
- Default is taken from dlt state if it exists.
42
- last_date: Incremental load end date.
43
- Default is taken from dlt state if it exists.
44
-
45
- Returns:
46
- Generator of all rows of data in the report.
47
- """
48
-
49
- # grab the start time from last dlt load if not filled, if that is also empty then use the first day of the millennium as the start time instead
50
- if last_date.last_value:
51
- if start_date != "2015-08-14":
52
- logger.warning(
53
- f"Using the starting date: {last_date.last_value} for incremental report: {resource_name} and ignoring start date passed as argument {start_date}"
54
- )
55
- start_date = last_date.last_value.to_date_string()
56
- else:
57
- start_date = start_date or "2015-08-14"
58
-
59
- processed_response = get_report(
60
- client=client,
61
- property_id=property_id,
62
- # fill dimensions and metrics with the proper api client objects
63
- dimension_list=[Dimension(name=dimension) for dimension in dimensions],
64
- metric_list=[Metric(name=metric) for metric in metrics],
65
- limit=rows_per_page,
66
- start_date=start_date,
67
- # configure end_date to yesterday as a date string
68
- end_date=pendulum.now().to_date_string(),
69
- )
70
- yield from processed_response