ingestr 0.12.5__py3-none-any.whl → 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -444,7 +444,7 @@ def ingest(
444
444
 
445
445
  progressInstance: Collector = SpinnerCollector()
446
446
  if progress == Progress.log:
447
- progressInstance = LogCollector(dump_system_stats=False)
447
+ progressInstance = LogCollector()
448
448
 
449
449
  is_pipelines_dir_temp = False
450
450
  if pipelines_dir is None:
@@ -150,7 +150,10 @@ def tasks(
150
150
  project_array: t.List[TDataItem],
151
151
  access_token: str = dlt.secrets.value,
152
152
  modified_at: dlt.sources.incremental[str] = dlt.sources.incremental(
153
- "modified_at", initial_value=DEFAULT_START_DATE
153
+ "modified_at",
154
+ initial_value=DEFAULT_START_DATE,
155
+ range_end="closed",
156
+ range_start="closed",
154
157
  ),
155
158
  fields: Iterable[str] = TASK_FIELDS,
156
159
  ) -> Iterable[TDataItem]:
@@ -159,7 +159,10 @@ def facebook_insights_source(
159
159
  )
160
160
  def facebook_insights(
161
161
  date_start: dlt.sources.incremental[str] = dlt.sources.incremental(
162
- "date_start", initial_value=initial_load_start_date_str
162
+ "date_start",
163
+ initial_value=initial_load_start_date_str,
164
+ range_end="closed",
165
+ range_start="closed",
163
166
  ),
164
167
  ) -> Iterator[TDataItems]:
165
168
  start_date = get_start_date(date_start, attribution_window_days_lag)
@@ -38,7 +38,9 @@ def readers(
38
38
  """
39
39
  filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
40
40
  filesystem_resource.apply_hints(
41
- incremental=dlt.sources.incremental("modification_date")
41
+ incremental=dlt.sources.incremental("modification_date"),
42
+ range_end="closed",
43
+ range_start="closed",
42
44
  )
43
45
  return (
44
46
  filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
@@ -14,7 +14,7 @@ from .helpers import get_reactions_data, get_rest_pages, get_stargazers
14
14
  def github_reactions(
15
15
  owner: str,
16
16
  name: str,
17
- access_token: str = dlt.secrets.value,
17
+ access_token: str,
18
18
  items_per_page: int = 100,
19
19
  max_items: Optional[int] = None,
20
20
  ) -> Sequence[DltResource]:
@@ -89,7 +89,11 @@ def github_repo_events(
89
89
  @dlt.resource(primary_key="id", table_name=lambda i: i["type"])
90
90
  def repo_events(
91
91
  last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
92
- "created_at", initial_value="1970-01-01T00:00:00Z", last_value_func=max
92
+ "created_at",
93
+ initial_value="1970-01-01T00:00:00Z",
94
+ last_value_func=max,
95
+ range_end="closed",
96
+ range_start="closed",
93
97
  ),
94
98
  ) -> Iterator[TDataItems]:
95
99
  repos_path = (
@@ -114,7 +118,7 @@ def github_repo_events(
114
118
  def github_stargazers(
115
119
  owner: str,
116
120
  name: str,
117
- access_token: str = dlt.secrets.value,
121
+ access_token: str,
118
122
  items_per_page: int = 100,
119
123
  max_items: Optional[int] = None,
120
124
  ) -> Sequence[DltResource]:
@@ -2,26 +2,32 @@
2
2
  Defines all the sources and resources needed for Google Analytics V4
3
3
  """
4
4
 
5
- from typing import List, Optional, Union
5
+ from typing import Iterator, List, Optional, Union
6
6
 
7
7
  import dlt
8
- from dlt.common.typing import DictStrAny
9
- from dlt.sources import DltResource
8
+ from dlt.common import pendulum
9
+ from dlt.common.typing import DictStrAny, TDataItem
10
+ from dlt.extract import DltResource
10
11
  from dlt.sources.credentials import GcpOAuthCredentials, GcpServiceAccountCredentials
11
12
  from google.analytics.data_v1beta import BetaAnalyticsDataClient
13
+ from google.analytics.data_v1beta.types import (
14
+ Dimension,
15
+ Metric,
16
+ )
12
17
 
13
- from .helpers import basic_report
18
+ from .helpers import get_report
14
19
 
15
20
 
16
21
  @dlt.source(max_table_nesting=0)
17
22
  def google_analytics(
18
- datetime: str,
23
+ datetime_dimension: str,
19
24
  credentials: Union[
20
25
  GcpOAuthCredentials, GcpServiceAccountCredentials
21
26
  ] = dlt.secrets.value,
22
27
  property_id: int = dlt.config.value,
23
28
  queries: List[DictStrAny] = dlt.config.value,
24
- start_date: Optional[str] = "2015-08-14",
29
+ start_date: Optional[pendulum.DateTime] = pendulum.datetime(2024, 1, 1),
30
+ end_date: Optional[pendulum.DateTime] = None,
25
31
  rows_per_page: int = 10000,
26
32
  ) -> List[DltResource]:
27
33
  try:
@@ -50,21 +56,51 @@ def google_analytics(
50
56
 
51
57
  # always add "date" to dimensions so we are able to track the last day of a report
52
58
  dimensions = query["dimensions"]
53
- resource_name = query["resource_name"]
54
59
 
55
- res = dlt.resource(
56
- basic_report, name="basic_report", merge_key=datetime, write_disposition="merge"
57
- )(
58
- client=client,
59
- rows_per_page=rows_per_page,
60
- property_id=property_id,
61
- dimensions=dimensions,
62
- metrics=query["metrics"],
63
- resource_name=resource_name,
64
- start_date=start_date,
65
- last_date=dlt.sources.incremental(
66
- datetime
67
- ), # pass empty primary key to avoid unique checks, a primary key defined by the resource will be used
60
+ @dlt.resource(
61
+ name="basic_report",
62
+ merge_key=datetime_dimension,
63
+ write_disposition="merge",
68
64
  )
65
+ def basic_report(
66
+ incremental=dlt.sources.incremental(
67
+ datetime_dimension,
68
+ initial_value=start_date,
69
+ end_value=end_date,
70
+ range_end="closed",
71
+ range_start="closed",
72
+ ),
73
+ ) -> Iterator[TDataItem]:
74
+ start_date = incremental.last_value
75
+ end_date = incremental.end_value
76
+ if start_date is None:
77
+ start_date = pendulum.datetime(2024, 1, 1)
78
+ if end_date is None:
79
+ end_date = pendulum.yesterday()
80
+ yield from get_report(
81
+ client=client,
82
+ property_id=property_id,
83
+ dimension_list=[Dimension(name=dimension) for dimension in dimensions],
84
+ metric_list=[Metric(name=metric) for metric in query["metrics"]],
85
+ per_page=rows_per_page,
86
+ start_date=start_date,
87
+ end_date=end_date,
88
+ )
89
+
90
+ # res = dlt.resource(
91
+ # basic_report, name="basic_report", merge_key=datetime_dimension, write_disposition="merge"
92
+ # )(
93
+ # client=client,
94
+ # rows_per_page=rows_per_page,
95
+ # property_id=property_id,
96
+ # dimensions=dimensions,
97
+ # metrics=query["metrics"],
98
+ # resource_name=resource_name,
99
+ # last_date=dlt.sources.incremental(
100
+ # datetime_dimension,
101
+ # initial_value=start_date,
102
+ # end_value=end_date,
103
+ # ),
104
+ # )
69
105
 
70
- return [res]
106
+ return [basic_report]
@@ -57,9 +57,9 @@ def get_report(
57
57
  property_id: int,
58
58
  dimension_list: List[Dimension],
59
59
  metric_list: List[Metric],
60
- limit: int,
61
- start_date: str,
62
- end_date: str,
60
+ per_page: int,
61
+ start_date: pendulum.DateTime,
62
+ end_date: pendulum.DateTime,
63
63
  ) -> Iterator[TDataItem]:
64
64
  """
65
65
  Gets all the possible pages of reports with the given query parameters.
@@ -79,30 +79,36 @@ def get_report(
79
79
  Generator of all rows of data in the report.
80
80
  """
81
81
 
82
- request = RunReportRequest(
83
- property=f"properties/{property_id}",
84
- dimensions=dimension_list,
85
- metrics=metric_list,
86
- limit=limit,
87
- date_ranges=[DateRange(start_date=start_date, end_date=end_date)],
82
+ print(
83
+ "fetching for daterange", start_date.to_date_string(), end_date.to_date_string()
88
84
  )
89
- # process request
90
- response = client.run_report(request)
91
- processed_response_generator = process_report(response=response)
92
- yield from processed_response_generator
93
85
 
86
+ offset = 0
87
+ while True:
88
+ request = RunReportRequest(
89
+ property=f"properties/{property_id}",
90
+ dimensions=dimension_list,
91
+ metrics=metric_list,
92
+ limit=per_page,
93
+ offset=offset,
94
+ date_ranges=[
95
+ DateRange(
96
+ start_date=start_date.to_date_string(),
97
+ end_date=end_date.to_date_string(),
98
+ )
99
+ ],
100
+ )
101
+ # process request
102
+ response = client.run_report(request)
103
+ processed_response_generator = process_report(response=response)
104
+ # import pdb; pdb.set_trace()
105
+ yield from processed_response_generator
106
+ offset += per_page
107
+ if len(response.rows) < per_page or offset > 1000000:
108
+ break
94
109
 
95
- def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
96
- """
97
- Receives a single page for a report response, processes it, and returns a generator for every row of data in the report page.
98
-
99
- Args:
100
- response: The API response for a single page of the report.
101
-
102
- Yields:
103
- Generator of dictionaries for every row of the report page.
104
- """
105
110
 
111
+ def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
106
112
  metrics_headers = [header.name for header in response.metric_headers]
107
113
  dimensions_headers = [header.name for header in response.dimension_headers]
108
114
 
@@ -156,16 +162,6 @@ def process_metric_value(metric_type: MetricType, value: str) -> Union[str, int,
156
162
 
157
163
 
158
164
  def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
159
- """
160
- Helper function that receives a dimension's name and value and converts it to a datetime object if needed.
161
-
162
- Args:
163
- dimension_name: Name of the dimension.
164
- dimension_value: Value of the dimension.
165
-
166
- Returns:
167
- The value of the dimension with the correct data type.
168
- """
169
165
  if dimension_name == "date":
170
166
  return pendulum.from_format(dimension_value, "YYYYMMDD", tz="UTC")
171
167
  elif dimension_name == "dateHour":
@@ -116,7 +116,9 @@ def gorgias_source(
116
116
  },
117
117
  )
118
118
  def customers(
119
- updated_datetime=dlt.sources.incremental("updated_datetime", start_date_obj),
119
+ updated_datetime=dlt.sources.incremental(
120
+ "updated_datetime", start_date_obj, range_end="closed", range_start="closed"
121
+ ),
120
122
  ) -> Iterable[TDataItem]:
121
123
  """
122
124
  The resource for customers on your Gorgias domain, supports incremental loading and pagination.
@@ -290,7 +292,9 @@ def gorgias_source(
290
292
  },
291
293
  )
292
294
  def tickets(
293
- updated_datetime=dlt.sources.incremental("updated_datetime", start_date_obj),
295
+ updated_datetime=dlt.sources.incremental(
296
+ "updated_datetime", start_date_obj, range_end="closed", range_start="closed"
297
+ ),
294
298
  ) -> Iterable[TDataItem]:
295
299
  """
296
300
  The resource for tickets on your Gorgias domain, supports incremental loading and pagination.
@@ -481,7 +485,9 @@ def gorgias_source(
481
485
  },
482
486
  )
483
487
  def ticket_messages(
484
- updated_datetime=dlt.sources.incremental("updated_datetime", start_date_obj),
488
+ updated_datetime=dlt.sources.incremental(
489
+ "updated_datetime", start_date_obj, range_end="closed", range_start="closed"
490
+ ),
485
491
  ) -> Iterable[TDataItem]:
486
492
  """
487
493
  The resource for ticket messages on your Gorgias domain, supports incremental loading and pagination.
@@ -566,7 +572,9 @@ def gorgias_source(
566
572
  },
567
573
  )
568
574
  def satisfaction_surveys(
569
- updated_datetime=dlt.sources.incremental("updated_datetime", start_date_obj),
575
+ updated_datetime=dlt.sources.incremental(
576
+ "updated_datetime", start_date_obj, range_end="closed", range_start="closed"
577
+ ),
570
578
  ) -> Iterable[TDataItem]:
571
579
  """
572
580
  The resource for satisfaction surveys on your Gorgias domain, supports incremental loading and pagination.
@@ -278,4 +278,11 @@ def hubspot_events_for_objects(
278
278
  write_disposition="append",
279
279
  selected=True,
280
280
  table_name=lambda e: name + "_" + str(e["eventType"]),
281
- )(dlt.sources.incremental("occurredAt", initial_value=start_date.isoformat()))
281
+ )(
282
+ dlt.sources.incremental(
283
+ "occurredAt",
284
+ initial_value=start_date.isoformat(),
285
+ range_end="closed",
286
+ range_start="closed",
287
+ )
288
+ )
@@ -33,7 +33,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
33
33
 
34
34
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
35
35
  def events(
36
- datetime=dlt.sources.incremental("datetime", start_date_obj.isoformat()),
36
+ datetime=dlt.sources.incremental(
37
+ "datetime",
38
+ start_date_obj.isoformat(),
39
+ range_end="closed",
40
+ range_start="closed",
41
+ ),
37
42
  ) -> Iterable[TDataItem]:
38
43
  intervals = split_date_range(
39
44
  pendulum.parse(datetime.start_value), pendulum.now()
@@ -44,7 +49,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
44
49
 
45
50
  @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
46
51
  def profiles(
47
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
52
+ updated=dlt.sources.incremental(
53
+ "updated",
54
+ start_date_obj.isoformat(),
55
+ range_end="closed",
56
+ range_start="closed",
57
+ ),
48
58
  ) -> Iterable[TDataItem]:
49
59
  intervals = split_date_range(
50
60
  pendulum.parse(updated.start_value), pendulum.now()
@@ -55,7 +65,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
55
65
 
56
66
  @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
57
67
  def campaigns(
58
- updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
68
+ updated_at=dlt.sources.incremental(
69
+ "updated_at",
70
+ start_date_obj.isoformat(),
71
+ range_end="closed",
72
+ range_start="closed",
73
+ ),
59
74
  ) -> Iterable[TDataItem]:
60
75
  intervals = split_date_range(
61
76
  pendulum.parse(updated_at.start_value), pendulum.now()
@@ -69,7 +84,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
69
84
 
70
85
  @dlt.resource(write_disposition="merge", primary_key="id")
71
86
  def metrics(
72
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
87
+ updated=dlt.sources.incremental(
88
+ "updated",
89
+ start_date_obj.isoformat(),
90
+ range_end="closed",
91
+ range_start="closed",
92
+ ),
73
93
  ) -> Iterable[TDataItem]:
74
94
  yield from client.fetch_metrics(create_client(), updated.start_value)
75
95
 
@@ -83,7 +103,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
83
103
 
84
104
  @dlt.resource(write_disposition="merge", primary_key="id", name="catalog-variants")
85
105
  def catalog_variants(
86
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
106
+ updated=dlt.sources.incremental(
107
+ "updated",
108
+ start_date_obj.isoformat(),
109
+ range_end="closed",
110
+ range_start="closed",
111
+ ),
87
112
  ) -> Iterable[TDataItem]:
88
113
  yield from client.fetch_catalog_variant(create_client(), updated.start_value)
89
114
 
@@ -91,19 +116,34 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
91
116
  write_disposition="merge", primary_key="id", name="catalog-categories"
92
117
  )
93
118
  def catalog_categories(
94
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
119
+ updated=dlt.sources.incremental(
120
+ "updated",
121
+ start_date_obj.isoformat(),
122
+ range_end="closed",
123
+ range_start="closed",
124
+ ),
95
125
  ) -> Iterable[TDataItem]:
96
126
  yield from client.fetch_catalog_categories(create_client(), updated.start_value)
97
127
 
98
128
  @dlt.resource(write_disposition="merge", primary_key="id", name="catalog-items")
99
129
  def catalog_items(
100
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
130
+ updated=dlt.sources.incremental(
131
+ "updated",
132
+ start_date_obj.isoformat(),
133
+ range_end="closed",
134
+ range_start="closed",
135
+ ),
101
136
  ) -> Iterable[TDataItem]:
102
137
  yield from client.fetch_catalog_item(create_client(), updated.start_value)
103
138
 
104
139
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
105
140
  def forms(
106
- updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
141
+ updated_at=dlt.sources.incremental(
142
+ "updated_at",
143
+ start_date_obj.isoformat(),
144
+ range_end="closed",
145
+ range_start="closed",
146
+ ),
107
147
  ) -> Iterable[TDataItem]:
108
148
  intervals = split_date_range(
109
149
  pendulum.parse(updated_at.start_value), pendulum.now()
@@ -114,13 +154,23 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
114
154
 
115
155
  @dlt.resource(write_disposition="merge", primary_key="id")
116
156
  def lists(
117
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
157
+ updated=dlt.sources.incremental(
158
+ "updated",
159
+ start_date_obj.isoformat(),
160
+ range_end="closed",
161
+ range_start="closed",
162
+ ),
118
163
  ) -> Iterable[TDataItem]:
119
164
  yield from client.fetch_lists(create_client(), updated.start_value)
120
165
 
121
166
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
122
167
  def images(
123
- updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
168
+ updated_at=dlt.sources.incremental(
169
+ "updated_at",
170
+ start_date_obj.isoformat(),
171
+ range_end="closed",
172
+ range_start="closed",
173
+ ),
124
174
  ) -> Iterable[TDataItem]:
125
175
  intervals = split_date_range(
126
176
  pendulum.parse(updated_at.start_value), pendulum.now()
@@ -130,13 +180,23 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
130
180
 
131
181
  @dlt.resource(write_disposition="merge", primary_key="id")
132
182
  def segments(
133
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
183
+ updated=dlt.sources.incremental(
184
+ "updated",
185
+ start_date_obj.isoformat(),
186
+ range_end="closed",
187
+ range_start="closed",
188
+ ),
134
189
  ) -> Iterable[TDataItem]:
135
190
  yield from client.fetch_segments(create_client(), updated.start_value)
136
191
 
137
192
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
138
193
  def flows(
139
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
194
+ updated=dlt.sources.incremental(
195
+ "updated",
196
+ start_date_obj.isoformat(),
197
+ range_end="closed",
198
+ range_start="closed",
199
+ ),
140
200
  ) -> Iterable[TDataItem]:
141
201
  intervals = split_date_range(
142
202
  pendulum.parse(updated.start_value), pendulum.now()
@@ -146,7 +206,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
146
206
 
147
207
  @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
148
208
  def templates(
149
- updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
209
+ updated=dlt.sources.incremental(
210
+ "updated",
211
+ start_date_obj.isoformat(),
212
+ range_end="closed",
213
+ range_start="closed",
214
+ ),
150
215
  ) -> Iterable[TDataItem]:
151
216
  intervals = split_date_range(
152
217
  pendulum.parse(updated.start_value), pendulum.now()
@@ -158,6 +158,8 @@ def shopify_source(
158
158
  initial_value=start_date_obj,
159
159
  end_value=end_date_obj,
160
160
  allow_external_schedulers=True,
161
+ range_end="closed",
162
+ range_start="closed",
161
163
  ),
162
164
  created_at_min: pendulum.DateTime = created_at_min_obj,
163
165
  items_per_page: int = items_per_page,
@@ -606,6 +608,8 @@ def shopify_source(
606
608
  initial_value=start_date_obj,
607
609
  end_value=end_date_obj,
608
610
  allow_external_schedulers=True,
611
+ range_end="closed",
612
+ range_start="closed",
609
613
  ),
610
614
  created_at_min: pendulum.DateTime = created_at_min_obj,
611
615
  items_per_page: int = items_per_page,
@@ -640,6 +644,8 @@ def shopify_source(
640
644
  initial_value=start_date_obj,
641
645
  end_value=end_date_obj,
642
646
  allow_external_schedulers=True,
647
+ range_end="closed",
648
+ range_start="closed",
643
649
  ),
644
650
  created_at_min: pendulum.DateTime = created_at_min_obj,
645
651
  items_per_page: int = items_per_page,
@@ -671,6 +677,8 @@ def shopify_source(
671
677
  "created_at",
672
678
  initial_value=start_date_obj,
673
679
  end_value=end_date_obj,
680
+ range_end="closed",
681
+ range_start="closed",
674
682
  ),
675
683
  items_per_page: int = items_per_page,
676
684
  ) -> Iterable[TDataItem]:
@@ -689,6 +697,8 @@ def shopify_source(
689
697
  "updated_at",
690
698
  initial_value=start_date_obj,
691
699
  end_value=end_date_obj,
700
+ range_end="closed",
701
+ range_start="closed",
692
702
  ),
693
703
  items_per_page: int = items_per_page,
694
704
  ) -> Iterable[TDataItem]:
@@ -730,6 +740,8 @@ def shopify_source(
730
740
  initial_value=start_date_obj,
731
741
  end_value=end_date_obj,
732
742
  allow_external_schedulers=True,
743
+ range_end="closed",
744
+ range_start="closed",
733
745
  ),
734
746
  items_per_page: int = items_per_page,
735
747
  ) -> Iterable[TDataItem]:
@@ -1807,6 +1819,8 @@ query discountNodes($after: String, $query: String, $first: Int) {
1807
1819
  "updatedAt",
1808
1820
  initial_value=start_date_obj,
1809
1821
  end_value=end_date_obj,
1822
+ range_end="closed",
1823
+ range_start="closed",
1810
1824
  ),
1811
1825
  items_per_page: int = items_per_page,
1812
1826
  ) -> Iterable[TDataItem]:
@@ -175,6 +175,8 @@ def slack_source(
175
175
  initial_value=start_dt,
176
176
  end_value=end_dt,
177
177
  allow_external_schedulers=True,
178
+ range_end="closed",
179
+ range_start="closed",
178
180
  ),
179
181
  ) -> Iterable[TDataItem]:
180
182
  """
@@ -198,6 +200,8 @@ def slack_source(
198
200
  initial_value=start_dt,
199
201
  end_value=end_dt,
200
202
  allow_external_schedulers=True,
203
+ range_end="closed",
204
+ range_start="closed",
201
205
  ),
202
206
  ) -> Iterable[TDataItem]:
203
207
  """Yield all messages for a given channel as a DLT resource. Keep blocks column without normalization.
ingestr/src/sources.py CHANGED
@@ -240,6 +240,8 @@ class ArrowMemoryMappedSource:
240
240
  kwargs.get("incremental_key", ""),
241
241
  initial_value=start_value,
242
242
  end_value=end_value,
243
+ range_end="closed",
244
+ range_start="closed",
243
245
  )
244
246
 
245
247
  file_path = uri.split("://")[1]
@@ -285,6 +287,8 @@ class MongoDbSource:
285
287
  kwargs.get("incremental_key", ""),
286
288
  initial_value=start_value,
287
289
  end_value=end_value,
290
+ range_end="closed",
291
+ range_start="closed",
288
292
  )
289
293
 
290
294
  table_instance = self.table_builder(
@@ -353,6 +357,8 @@ class LocalCsvSource:
353
357
  kwargs.get("incremental_key", ""),
354
358
  initial_value=kwargs.get("interval_start"),
355
359
  end_value=kwargs.get("interval_end"),
360
+ range_end="closed",
361
+ range_start="closed",
356
362
  )
357
363
  )
358
364
 
@@ -1311,6 +1317,8 @@ class DynamoDBSource:
1311
1317
  incremental_key.strip(),
1312
1318
  initial_value=isotime(kwargs.get("interval_start")),
1313
1319
  end_value=isotime(kwargs.get("interval_end")),
1320
+ range_end="closed",
1321
+ range_start="closed",
1314
1322
  )
1315
1323
 
1316
1324
  return dynamodb(table, creds, incremental)
@@ -1336,11 +1344,6 @@ class GoogleAnalyticsSource:
1336
1344
  if not property_id:
1337
1345
  raise ValueError("property_id is required to connect to Google Analytics")
1338
1346
 
1339
- interval_start = kwargs.get("interval_start")
1340
- start_date = (
1341
- interval_start.strftime("%Y-%m-%d") if interval_start else "2015-08-14"
1342
- )
1343
-
1344
1347
  fields = table.split(":")
1345
1348
  if len(fields) != 3:
1346
1349
  raise ValueError(
@@ -1364,10 +1367,19 @@ class GoogleAnalyticsSource:
1364
1367
  {"resource_name": "custom", "dimensions": dimensions, "metrics": metrics}
1365
1368
  ]
1366
1369
 
1370
+ start_date = pendulum.now().subtract(days=30).start_of("day")
1371
+ if kwargs.get("interval_start") is not None:
1372
+ start_date = pendulum.instance(kwargs.get("interval_start")) # type: ignore
1373
+
1374
+ end_date = pendulum.now()
1375
+ if kwargs.get("interval_end") is not None:
1376
+ end_date = pendulum.instance(kwargs.get("interval_end")) # type: ignore
1377
+
1367
1378
  return google_analytics(
1368
1379
  property_id=property_id[0],
1369
1380
  start_date=start_date,
1370
- datetime=datetime,
1381
+ end_date=end_date,
1382
+ datetime_dimension=datetime,
1371
1383
  queries=queries,
1372
1384
  credentials=credentials,
1373
1385
  ).with_resources("basic_report")
@@ -1398,9 +1410,7 @@ class GitHubSource:
1398
1410
  "repo variable is required to retrieve data for a specific repository from GitHub."
1399
1411
  )
1400
1412
 
1401
- access_token = source_fields.get("access_token", [None])[0]
1402
- if not access_token and table not in ["repo_events"]:
1403
- raise ValueError("access_token is required to connect with GitHub")
1413
+ access_token = source_fields.get("access_token", [""])[0]
1404
1414
 
1405
1415
  if table in ["issues", "pull_requests"]:
1406
1416
  return github_reactions(
@@ -84,7 +84,10 @@ def incremental_stripe_source(
84
84
  def incremental_resource(
85
85
  endpoint: str,
86
86
  created: Optional[Any] = dlt.sources.incremental(
87
- "created", initial_value=start_date_unix
87
+ "created",
88
+ initial_value=start_date_unix,
89
+ range_end="closed",
90
+ range_start="closed",
88
91
  ),
89
92
  ) -> Generator[Dict[Any, Any], Any, None]:
90
93
  start_value = created.last_value
@@ -110,7 +110,12 @@ def tiktok_source(
110
110
  )
111
111
  def custom_reports(
112
112
  datetime=(
113
- dlt.sources.incremental(incremental_loading_param, start_date)
113
+ dlt.sources.incremental(
114
+ incremental_loading_param,
115
+ start_date,
116
+ range_end="closed",
117
+ range_start="closed",
118
+ )
114
119
  if is_incremental
115
120
  else None
116
121
  ),
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.12.5"
1
+ __version__ = "0.12.6"
@@ -260,6 +260,8 @@ def zendesk_support(
260
260
  initial_value=start_date_ts,
261
261
  end_value=end_date_ts,
262
262
  allow_external_schedulers=True,
263
+ range_end="closed",
264
+ range_start="closed",
263
265
  ),
264
266
  ) -> Iterator[TDataItem]:
265
267
  # URL For ticket events
@@ -294,6 +296,8 @@ def zendesk_support(
294
296
  initial_value=start_date_obj,
295
297
  end_value=end_date_obj,
296
298
  allow_external_schedulers=True,
299
+ range_end="closed",
300
+ range_start="closed",
297
301
  ),
298
302
  ) -> Iterator[TDataItem]:
299
303
  """
@@ -340,6 +344,8 @@ def zendesk_support(
340
344
  initial_value=start_date_iso_str,
341
345
  end_value=end_date_iso_str,
342
346
  allow_external_schedulers=True,
347
+ range_end="closed",
348
+ range_start="closed",
343
349
  ),
344
350
  ) -> Iterator[TDataItem]:
345
351
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.5
3
+ Version: 0.12.6
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -26,6 +26,7 @@ Requires-Dist: google-api-python-client==2.130.0
26
26
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
27
27
  Requires-Dist: mysql-connector-python==9.1.0
28
28
  Requires-Dist: pendulum==3.0.0
29
+ Requires-Dist: psutil==6.1.1
29
30
  Requires-Dist: psycopg2-binary==2.9.10
30
31
  Requires-Dist: py-machineid==0.6.0
31
32
  Requires-Dist: pyairtable==2.3.3
@@ -1,52 +1,51 @@
1
- ingestr/main.py,sha256=xLQiPHoD7dNvrHfNTwD379wHg6xZGmLxzPzQLq2E1RA,24746
1
+ ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
4
4
  ingestr/src/factory.py,sha256=aE7TjHzONb4DKYcfh_6-CJJfvs4lmw7iUySvSm4yQbM,4516
5
5
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
6
- ingestr/src/sources.py,sha256=FXUTmII3DiEANZN37P9-dTFFRzpv0PL8bfaQvr0un8w,50761
6
+ ingestr/src/sources.py,sha256=GIskUoVL82x_mLerU9cgdixBNNhzBnDN-_MDraqK7hY,51166
7
7
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
8
8
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
9
- ingestr/src/version.py,sha256=QFQfu3CUVe9Ncr8kv3aaBY3oWrZmv8xboen_Uwy7eXU,23
9
+ ingestr/src/version.py,sha256=vb8hPdq1CrFlRl6aBYGOWE4MPv-N84JJm1f3KFvG8o4,23
10
10
  ingestr/src/adjust/__init__.py,sha256=NaRNwDhItG8Q7vUHw7zQvyfWjmT32M0CSc5ufjmBM9U,3067
11
11
  ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
12
12
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
13
13
  ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
14
14
  ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
15
15
  ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0,2848
16
- ingestr/src/asana_source/__init__.py,sha256=Y4Ti_876Yong420fQ2o4A97TdgrZNlZVxlTMLyXdSjA,8116
16
+ ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
17
17
  ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
18
18
  ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
19
19
  ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
20
20
  ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
21
21
  ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
22
22
  ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
23
- ingestr/src/facebook_ads/__init__.py,sha256=ZZyogV48gmhDcC3CYQEsC4qT3Q6JI9IOnMff2NS1M-A,9207
23
+ ingestr/src/facebook_ads/__init__.py,sha256=reEpSr4BaKA1wO3qVgCH51gW-TgWkbJ_g24UIhJWbac,9286
24
24
  ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
25
25
  ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
26
26
  ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_tl6UTFk,4881
27
- ingestr/src/filesystem/__init__.py,sha256=wHHaKFuAjsR_ZRjl6g_Flf6FhVs9qhwREthTr03_7cc,4162
27
+ ingestr/src/filesystem/__init__.py,sha256=hcN_sO356ChTPyg72AufrikdkFBBIScTdxtGfDm-W0E,4221
28
28
  ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
29
29
  ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
30
- ingestr/src/github/__init__.py,sha256=csA2VcjOxXrVrvp7zY-JodO9Lpy98bJ4AqRdHCLTcGM,5838
30
+ ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e4,5889
31
31
  ingestr/src/github/helpers.py,sha256=Tmnik9811zBWNO6cJwV9PFQxEx2j32LHAQCvNbubsEI,6759
32
32
  ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
33
33
  ingestr/src/github/settings.py,sha256=N5ahWrDIQ_4IWV9i-hTXxyYduqY9Ym2BTwqsWxcDdJ8,258
34
- ingestr/src/google_analytics/__init__.py,sha256=HjA13wfJm2MGfy3h_DiM5ekkNqM2dgwYCKJ3pprnDtI,2482
35
- ingestr/src/google_analytics/helpers/__init__.py,sha256=y_q7dinlEwNBEpq6kCzjTa8lAhe2bb23bDPP0fcy7fY,2744
36
- ingestr/src/google_analytics/helpers/data_processing.py,sha256=fIdEKr9CmZN_s1T2i9BL8IYTPPqNoK6Vaquq2y8StfE,6072
34
+ ingestr/src/google_analytics/__init__.py,sha256=8Evpmoy464YpNbCI_NmvFHIzWCu7J7SjJw-RrPZ6AL8,3674
35
+ ingestr/src/google_analytics/helpers.py,sha256=vLmFyQ_IEJEK5LlxBJQeJw0VHaE5gRRZdBa54U72CaQ,5965
37
36
  ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
38
37
  ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
39
38
  ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
40
39
  ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9kowZvQUCUxz0G6SlI,5404
41
40
  ingestr/src/google_sheets/helpers/data_processing.py,sha256=WYO6z4XjGcG0Hat2J2enb-eLX5mSNVb2vaqRE83FBWU,11000
42
- ingestr/src/gorgias/__init__.py,sha256=LZ3m6aGuhLVI3eNjvQE0rT4o_wbSPkY_SDKsM-g0V5U,21176
41
+ ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
43
42
  ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
44
- ingestr/src/hubspot/__init__.py,sha256=LshHlFzzs8trAOxSg7C9F7zIBakqsg8XfyNBouip09w,9761
43
+ ingestr/src/hubspot/__init__.py,sha256=DXvn1yGToFUKk-1mMqqoN0OCLNpD16-2mPyEmkhyoVY,9876
45
44
  ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KFE,6727
46
45
  ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
47
46
  ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
48
47
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
49
- ingestr/src/klaviyo/_init_.py,sha256=nq2T1p3Xc7yiwGabsZBp2Jy2fa8_n5oxqxBnUGhKOgg,6592
48
+ ingestr/src/klaviyo/_init_.py,sha256=ucWHqBe8DQvXVpbmxKFAV5ljpCFb4ps_2QTD0OSiWxY,7905
50
49
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
51
50
  ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
52
51
  ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
@@ -56,23 +55,23 @@ ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMO
56
55
  ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
56
  ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falPrnKg_y4,5500
58
57
  ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
59
- ingestr/src/shopify/__init__.py,sha256=Hhv84zRfVsqAGP7pz-PmeopeX9CGu7TXSm3PSXHEwIA,62689
58
+ ingestr/src/shopify/__init__.py,sha256=PF_6VQnS065Br1UzSIekTVXBu3WtrMQL_v5CfbfaX5Y,63151
60
59
  ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
61
60
  ingestr/src/shopify/helpers.py,sha256=NfHD6lWXe88ybR0ri-FCQuh2Vf8l5WG0a0FVjmdoSC4,6296
62
61
  ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSsDk,141
63
- ingestr/src/slack/__init__.py,sha256=UF-ficQ6K32u1EHytW3P35suACo9wuc6nMrAPViyZL8,9981
62
+ ingestr/src/slack/__init__.py,sha256=pyDukxcilqTAe_bBzfWJ8Vxi83S-XEdEFBH2pEgILrM,10113
64
63
  ingestr/src/slack/helpers.py,sha256=08TLK7vhFvH_uekdLVOLF3bTDe1zgH0QxHObXHzk1a8,6545
65
64
  ingestr/src/slack/settings.py,sha256=NhKn4y1zokEa5EmIZ05wtj_-I0GOASXZ5V81M1zXCtY,457
66
65
  ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
66
  ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG4kLd7tMU,2002
68
- ingestr/src/stripe_analytics/__init__.py,sha256=VEXH4to2vNojN4rk3qsypR7urtTzaxSBB3IBiD5tuoE,4514
67
+ ingestr/src/stripe_analytics/__init__.py,sha256=0HCL0qsrh_si1RR3a4k9XS94VWQ4v9aG7CqXF-V-57M,4593
69
68
  ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
70
69
  ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
71
70
  ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
72
71
  ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
73
- ingestr/src/tiktok_ads/__init__.py,sha256=U4ZHPUW0c4LpKx4hjT2Lz5hgWFgwQSbAAkkYIrxYHZo,4469
72
+ ingestr/src/tiktok_ads/__init__.py,sha256=aEqCl3dTH6_d43s1jgAeG1UasEls_SlorORulYMwIL8,4590
74
73
  ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=cfdPflCeR_mCk5fxq0v4d7pzlvZDiAoz8bWQJYqKALM,3935
75
- ingestr/src/zendesk/__init__.py,sha256=C7HkN195DGdOHId2_Sa_kAlcBrUmnVYZUa_tPkiyf1Q,17564
74
+ ingestr/src/zendesk/__init__.py,sha256=tmJ_jdb6kpwmEKpcv6Im71-bOZI6h-Tcofe18OH4I24,17762
76
75
  ingestr/src/zendesk/settings.py,sha256=Vdj706nTJFQ-3KH4nO97iYCQuba3dV3E9gfnmLK6xwU,2294
77
76
  ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1Ydq5FDH_0c,888
78
77
  ingestr/src/zendesk/helpers/api_helpers.py,sha256=dMkNn4ZQXgJTDOXAAXdmRt41phNFoRhYyPaLJih0pZY,4184
@@ -86,8 +85,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
86
85
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
87
86
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
88
87
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
89
- ingestr-0.12.5.dist-info/METADATA,sha256=QhFy0K3FUgK2VGdShWUOeTj_HbHElVPD64bAf2k-4G0,7956
90
- ingestr-0.12.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
91
- ingestr-0.12.5.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
92
- ingestr-0.12.5.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
93
- ingestr-0.12.5.dist-info/RECORD,,
88
+ ingestr-0.12.6.dist-info/METADATA,sha256=y-o_BL8nj7pVQU3sSaz9UJ9XsNVUi8Rjf5G0vNGi6io,7985
89
+ ingestr-0.12.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
90
+ ingestr-0.12.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
91
+ ingestr-0.12.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
92
+ ingestr-0.12.6.dist-info/RECORD,,
@@ -1,70 +0,0 @@
1
- """Google analytics source helpers"""
2
-
3
- from typing import Iterator, List
4
-
5
- import dlt
6
- from apiclient.discovery import Resource # type: ignore
7
- from dlt.common import logger, pendulum
8
- from dlt.common.typing import TDataItem
9
- from google.analytics.data_v1beta.types import (
10
- Dimension,
11
- Metric,
12
- )
13
- from pendulum.datetime import DateTime
14
-
15
- from .data_processing import get_report
16
-
17
-
18
- def basic_report(
19
- client: Resource,
20
- rows_per_page: int,
21
- dimensions: List[str],
22
- metrics: List[str],
23
- property_id: int,
24
- resource_name: str,
25
- start_date: str,
26
- last_date: dlt.sources.incremental[DateTime],
27
- ) -> Iterator[TDataItem]:
28
- """
29
- Retrieves the data for a report given dimensions, metrics, and filters required for the report.
30
-
31
- Args:
32
- client: The Google Analytics client used to make requests.
33
- dimensions: Dimensions for the report. See metadata for the full list of dimensions.
34
- metrics: Metrics for the report. See metadata for the full list of metrics.
35
- property_id: A reference to the Google Analytics project.
36
- More info: https://developers.google.com/analytics/devguides/reporting/data/v1/property-id
37
- rows_per_page: Controls how many rows are retrieved per page in the reports.
38
- Default is 10000, maximum possible is 100000.
39
- resource_name: The resource name used to save incremental into dlt state.
40
- start_date: Incremental load start_date.
41
- Default is taken from dlt state if it exists.
42
- last_date: Incremental load end date.
43
- Default is taken from dlt state if it exists.
44
-
45
- Returns:
46
- Generator of all rows of data in the report.
47
- """
48
-
49
- # grab the start time from last dlt load if not filled, if that is also empty then use the first day of the millennium as the start time instead
50
- if last_date.last_value:
51
- if start_date != "2015-08-14":
52
- logger.warning(
53
- f"Using the starting date: {last_date.last_value} for incremental report: {resource_name} and ignoring start date passed as argument {start_date}"
54
- )
55
- start_date = last_date.last_value.to_date_string()
56
- else:
57
- start_date = start_date or "2015-08-14"
58
-
59
- processed_response = get_report(
60
- client=client,
61
- property_id=property_id,
62
- # fill dimensions and metrics with the proper api client objects
63
- dimension_list=[Dimension(name=dimension) for dimension in dimensions],
64
- metric_list=[Metric(name=metric) for metric in metrics],
65
- limit=rows_per_page,
66
- start_date=start_date,
67
- # configure end_date to yesterday as a date string
68
- end_date=pendulum.now().to_date_string(),
69
- )
70
- yield from processed_response