ingestr 0.12.5__py3-none-any.whl → 0.12.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +1 -1
- ingestr/src/appstore/__init__.py +137 -0
- ingestr/src/appstore/client.py +126 -0
- ingestr/src/appstore/errors.py +15 -0
- ingestr/src/appstore/models.py +117 -0
- ingestr/src/appstore/resources.py +179 -0
- ingestr/src/asana_source/__init__.py +4 -1
- ingestr/src/errors.py +10 -0
- ingestr/src/facebook_ads/__init__.py +4 -1
- ingestr/src/factory.py +2 -0
- ingestr/src/filesystem/__init__.py +3 -1
- ingestr/src/github/__init__.py +7 -3
- ingestr/src/google_analytics/__init__.py +57 -21
- ingestr/src/google_analytics/{helpers/data_processing.py → helpers.py} +29 -33
- ingestr/src/gorgias/__init__.py +12 -4
- ingestr/src/hubspot/__init__.py +8 -1
- ingestr/src/klaviyo/_init_.py +78 -13
- ingestr/src/shopify/__init__.py +14 -0
- ingestr/src/slack/__init__.py +4 -0
- ingestr/src/sources.py +99 -10
- ingestr/src/stripe_analytics/__init__.py +4 -1
- ingestr/src/tiktok_ads/__init__.py +6 -1
- ingestr/src/version.py +1 -1
- ingestr/src/zendesk/__init__.py +6 -0
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/METADATA +3 -1
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/RECORD +29 -24
- ingestr/src/google_analytics/helpers/__init__.py +0 -70
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/WHEEL +0 -0
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -2,26 +2,32 @@
|
|
|
2
2
|
Defines all the sources and resources needed for Google Analytics V4
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import List, Optional, Union
|
|
5
|
+
from typing import Iterator, List, Optional, Union
|
|
6
6
|
|
|
7
7
|
import dlt
|
|
8
|
-
from dlt.common
|
|
9
|
-
from dlt.
|
|
8
|
+
from dlt.common import pendulum
|
|
9
|
+
from dlt.common.typing import DictStrAny, TDataItem
|
|
10
|
+
from dlt.extract import DltResource
|
|
10
11
|
from dlt.sources.credentials import GcpOAuthCredentials, GcpServiceAccountCredentials
|
|
11
12
|
from google.analytics.data_v1beta import BetaAnalyticsDataClient
|
|
13
|
+
from google.analytics.data_v1beta.types import (
|
|
14
|
+
Dimension,
|
|
15
|
+
Metric,
|
|
16
|
+
)
|
|
12
17
|
|
|
13
|
-
from .helpers import
|
|
18
|
+
from .helpers import get_report
|
|
14
19
|
|
|
15
20
|
|
|
16
21
|
@dlt.source(max_table_nesting=0)
|
|
17
22
|
def google_analytics(
|
|
18
|
-
|
|
23
|
+
datetime_dimension: str,
|
|
19
24
|
credentials: Union[
|
|
20
25
|
GcpOAuthCredentials, GcpServiceAccountCredentials
|
|
21
26
|
] = dlt.secrets.value,
|
|
22
27
|
property_id: int = dlt.config.value,
|
|
23
28
|
queries: List[DictStrAny] = dlt.config.value,
|
|
24
|
-
start_date: Optional[
|
|
29
|
+
start_date: Optional[pendulum.DateTime] = pendulum.datetime(2024, 1, 1),
|
|
30
|
+
end_date: Optional[pendulum.DateTime] = None,
|
|
25
31
|
rows_per_page: int = 10000,
|
|
26
32
|
) -> List[DltResource]:
|
|
27
33
|
try:
|
|
@@ -50,21 +56,51 @@ def google_analytics(
|
|
|
50
56
|
|
|
51
57
|
# always add "date" to dimensions so we are able to track the last day of a report
|
|
52
58
|
dimensions = query["dimensions"]
|
|
53
|
-
resource_name = query["resource_name"]
|
|
54
59
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
rows_per_page=rows_per_page,
|
|
60
|
-
property_id=property_id,
|
|
61
|
-
dimensions=dimensions,
|
|
62
|
-
metrics=query["metrics"],
|
|
63
|
-
resource_name=resource_name,
|
|
64
|
-
start_date=start_date,
|
|
65
|
-
last_date=dlt.sources.incremental(
|
|
66
|
-
datetime
|
|
67
|
-
), # pass empty primary key to avoid unique checks, a primary key defined by the resource will be used
|
|
60
|
+
@dlt.resource(
|
|
61
|
+
name="basic_report",
|
|
62
|
+
merge_key=datetime_dimension,
|
|
63
|
+
write_disposition="merge",
|
|
68
64
|
)
|
|
65
|
+
def basic_report(
|
|
66
|
+
incremental=dlt.sources.incremental(
|
|
67
|
+
datetime_dimension,
|
|
68
|
+
initial_value=start_date,
|
|
69
|
+
end_value=end_date,
|
|
70
|
+
range_end="closed",
|
|
71
|
+
range_start="closed",
|
|
72
|
+
),
|
|
73
|
+
) -> Iterator[TDataItem]:
|
|
74
|
+
start_date = incremental.last_value
|
|
75
|
+
end_date = incremental.end_value
|
|
76
|
+
if start_date is None:
|
|
77
|
+
start_date = pendulum.datetime(2024, 1, 1)
|
|
78
|
+
if end_date is None:
|
|
79
|
+
end_date = pendulum.yesterday()
|
|
80
|
+
yield from get_report(
|
|
81
|
+
client=client,
|
|
82
|
+
property_id=property_id,
|
|
83
|
+
dimension_list=[Dimension(name=dimension) for dimension in dimensions],
|
|
84
|
+
metric_list=[Metric(name=metric) for metric in query["metrics"]],
|
|
85
|
+
per_page=rows_per_page,
|
|
86
|
+
start_date=start_date,
|
|
87
|
+
end_date=end_date,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# res = dlt.resource(
|
|
91
|
+
# basic_report, name="basic_report", merge_key=datetime_dimension, write_disposition="merge"
|
|
92
|
+
# )(
|
|
93
|
+
# client=client,
|
|
94
|
+
# rows_per_page=rows_per_page,
|
|
95
|
+
# property_id=property_id,
|
|
96
|
+
# dimensions=dimensions,
|
|
97
|
+
# metrics=query["metrics"],
|
|
98
|
+
# resource_name=resource_name,
|
|
99
|
+
# last_date=dlt.sources.incremental(
|
|
100
|
+
# datetime_dimension,
|
|
101
|
+
# initial_value=start_date,
|
|
102
|
+
# end_value=end_date,
|
|
103
|
+
# ),
|
|
104
|
+
# )
|
|
69
105
|
|
|
70
|
-
return [
|
|
106
|
+
return [basic_report]
|
|
@@ -57,9 +57,9 @@ def get_report(
|
|
|
57
57
|
property_id: int,
|
|
58
58
|
dimension_list: List[Dimension],
|
|
59
59
|
metric_list: List[Metric],
|
|
60
|
-
|
|
61
|
-
start_date:
|
|
62
|
-
end_date:
|
|
60
|
+
per_page: int,
|
|
61
|
+
start_date: pendulum.DateTime,
|
|
62
|
+
end_date: pendulum.DateTime,
|
|
63
63
|
) -> Iterator[TDataItem]:
|
|
64
64
|
"""
|
|
65
65
|
Gets all the possible pages of reports with the given query parameters.
|
|
@@ -79,30 +79,36 @@ def get_report(
|
|
|
79
79
|
Generator of all rows of data in the report.
|
|
80
80
|
"""
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
dimensions=dimension_list,
|
|
85
|
-
metrics=metric_list,
|
|
86
|
-
limit=limit,
|
|
87
|
-
date_ranges=[DateRange(start_date=start_date, end_date=end_date)],
|
|
82
|
+
print(
|
|
83
|
+
"fetching for daterange", start_date.to_date_string(), end_date.to_date_string()
|
|
88
84
|
)
|
|
89
|
-
# process request
|
|
90
|
-
response = client.run_report(request)
|
|
91
|
-
processed_response_generator = process_report(response=response)
|
|
92
|
-
yield from processed_response_generator
|
|
93
85
|
|
|
86
|
+
offset = 0
|
|
87
|
+
while True:
|
|
88
|
+
request = RunReportRequest(
|
|
89
|
+
property=f"properties/{property_id}",
|
|
90
|
+
dimensions=dimension_list,
|
|
91
|
+
metrics=metric_list,
|
|
92
|
+
limit=per_page,
|
|
93
|
+
offset=offset,
|
|
94
|
+
date_ranges=[
|
|
95
|
+
DateRange(
|
|
96
|
+
start_date=start_date.to_date_string(),
|
|
97
|
+
end_date=end_date.to_date_string(),
|
|
98
|
+
)
|
|
99
|
+
],
|
|
100
|
+
)
|
|
101
|
+
# process request
|
|
102
|
+
response = client.run_report(request)
|
|
103
|
+
processed_response_generator = process_report(response=response)
|
|
104
|
+
# import pdb; pdb.set_trace()
|
|
105
|
+
yield from processed_response_generator
|
|
106
|
+
offset += per_page
|
|
107
|
+
if len(response.rows) < per_page or offset > 1000000:
|
|
108
|
+
break
|
|
94
109
|
|
|
95
|
-
def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
|
|
96
|
-
"""
|
|
97
|
-
Receives a single page for a report response, processes it, and returns a generator for every row of data in the report page.
|
|
98
|
-
|
|
99
|
-
Args:
|
|
100
|
-
response: The API response for a single page of the report.
|
|
101
|
-
|
|
102
|
-
Yields:
|
|
103
|
-
Generator of dictionaries for every row of the report page.
|
|
104
|
-
"""
|
|
105
110
|
|
|
111
|
+
def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
|
|
106
112
|
metrics_headers = [header.name for header in response.metric_headers]
|
|
107
113
|
dimensions_headers = [header.name for header in response.dimension_headers]
|
|
108
114
|
|
|
@@ -156,16 +162,6 @@ def process_metric_value(metric_type: MetricType, value: str) -> Union[str, int,
|
|
|
156
162
|
|
|
157
163
|
|
|
158
164
|
def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
|
|
159
|
-
"""
|
|
160
|
-
Helper function that receives a dimension's name and value and converts it to a datetime object if needed.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
dimension_name: Name of the dimension.
|
|
164
|
-
dimension_value: Value of the dimension.
|
|
165
|
-
|
|
166
|
-
Returns:
|
|
167
|
-
The value of the dimension with the correct data type.
|
|
168
|
-
"""
|
|
169
165
|
if dimension_name == "date":
|
|
170
166
|
return pendulum.from_format(dimension_value, "YYYYMMDD", tz="UTC")
|
|
171
167
|
elif dimension_name == "dateHour":
|
ingestr/src/gorgias/__init__.py
CHANGED
|
@@ -116,7 +116,9 @@ def gorgias_source(
|
|
|
116
116
|
},
|
|
117
117
|
)
|
|
118
118
|
def customers(
|
|
119
|
-
updated_datetime=dlt.sources.incremental(
|
|
119
|
+
updated_datetime=dlt.sources.incremental(
|
|
120
|
+
"updated_datetime", start_date_obj, range_end="closed", range_start="closed"
|
|
121
|
+
),
|
|
120
122
|
) -> Iterable[TDataItem]:
|
|
121
123
|
"""
|
|
122
124
|
The resource for customers on your Gorgias domain, supports incremental loading and pagination.
|
|
@@ -290,7 +292,9 @@ def gorgias_source(
|
|
|
290
292
|
},
|
|
291
293
|
)
|
|
292
294
|
def tickets(
|
|
293
|
-
updated_datetime=dlt.sources.incremental(
|
|
295
|
+
updated_datetime=dlt.sources.incremental(
|
|
296
|
+
"updated_datetime", start_date_obj, range_end="closed", range_start="closed"
|
|
297
|
+
),
|
|
294
298
|
) -> Iterable[TDataItem]:
|
|
295
299
|
"""
|
|
296
300
|
The resource for tickets on your Gorgias domain, supports incremental loading and pagination.
|
|
@@ -481,7 +485,9 @@ def gorgias_source(
|
|
|
481
485
|
},
|
|
482
486
|
)
|
|
483
487
|
def ticket_messages(
|
|
484
|
-
updated_datetime=dlt.sources.incremental(
|
|
488
|
+
updated_datetime=dlt.sources.incremental(
|
|
489
|
+
"updated_datetime", start_date_obj, range_end="closed", range_start="closed"
|
|
490
|
+
),
|
|
485
491
|
) -> Iterable[TDataItem]:
|
|
486
492
|
"""
|
|
487
493
|
The resource for ticket messages on your Gorgias domain, supports incremental loading and pagination.
|
|
@@ -566,7 +572,9 @@ def gorgias_source(
|
|
|
566
572
|
},
|
|
567
573
|
)
|
|
568
574
|
def satisfaction_surveys(
|
|
569
|
-
updated_datetime=dlt.sources.incremental(
|
|
575
|
+
updated_datetime=dlt.sources.incremental(
|
|
576
|
+
"updated_datetime", start_date_obj, range_end="closed", range_start="closed"
|
|
577
|
+
),
|
|
570
578
|
) -> Iterable[TDataItem]:
|
|
571
579
|
"""
|
|
572
580
|
The resource for satisfaction surveys on your Gorgias domain, supports incremental loading and pagination.
|
ingestr/src/hubspot/__init__.py
CHANGED
|
@@ -278,4 +278,11 @@ def hubspot_events_for_objects(
|
|
|
278
278
|
write_disposition="append",
|
|
279
279
|
selected=True,
|
|
280
280
|
table_name=lambda e: name + "_" + str(e["eventType"]),
|
|
281
|
-
)(
|
|
281
|
+
)(
|
|
282
|
+
dlt.sources.incremental(
|
|
283
|
+
"occurredAt",
|
|
284
|
+
initial_value=start_date.isoformat(),
|
|
285
|
+
range_end="closed",
|
|
286
|
+
range_start="closed",
|
|
287
|
+
)
|
|
288
|
+
)
|
ingestr/src/klaviyo/_init_.py
CHANGED
|
@@ -33,7 +33,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
33
33
|
|
|
34
34
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
35
35
|
def events(
|
|
36
|
-
datetime=dlt.sources.incremental(
|
|
36
|
+
datetime=dlt.sources.incremental(
|
|
37
|
+
"datetime",
|
|
38
|
+
start_date_obj.isoformat(),
|
|
39
|
+
range_end="closed",
|
|
40
|
+
range_start="closed",
|
|
41
|
+
),
|
|
37
42
|
) -> Iterable[TDataItem]:
|
|
38
43
|
intervals = split_date_range(
|
|
39
44
|
pendulum.parse(datetime.start_value), pendulum.now()
|
|
@@ -44,7 +49,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
44
49
|
|
|
45
50
|
@dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
|
|
46
51
|
def profiles(
|
|
47
|
-
updated=dlt.sources.incremental(
|
|
52
|
+
updated=dlt.sources.incremental(
|
|
53
|
+
"updated",
|
|
54
|
+
start_date_obj.isoformat(),
|
|
55
|
+
range_end="closed",
|
|
56
|
+
range_start="closed",
|
|
57
|
+
),
|
|
48
58
|
) -> Iterable[TDataItem]:
|
|
49
59
|
intervals = split_date_range(
|
|
50
60
|
pendulum.parse(updated.start_value), pendulum.now()
|
|
@@ -55,7 +65,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
55
65
|
|
|
56
66
|
@dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
|
|
57
67
|
def campaigns(
|
|
58
|
-
updated_at=dlt.sources.incremental(
|
|
68
|
+
updated_at=dlt.sources.incremental(
|
|
69
|
+
"updated_at",
|
|
70
|
+
start_date_obj.isoformat(),
|
|
71
|
+
range_end="closed",
|
|
72
|
+
range_start="closed",
|
|
73
|
+
),
|
|
59
74
|
) -> Iterable[TDataItem]:
|
|
60
75
|
intervals = split_date_range(
|
|
61
76
|
pendulum.parse(updated_at.start_value), pendulum.now()
|
|
@@ -69,7 +84,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
69
84
|
|
|
70
85
|
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
71
86
|
def metrics(
|
|
72
|
-
updated=dlt.sources.incremental(
|
|
87
|
+
updated=dlt.sources.incremental(
|
|
88
|
+
"updated",
|
|
89
|
+
start_date_obj.isoformat(),
|
|
90
|
+
range_end="closed",
|
|
91
|
+
range_start="closed",
|
|
92
|
+
),
|
|
73
93
|
) -> Iterable[TDataItem]:
|
|
74
94
|
yield from client.fetch_metrics(create_client(), updated.start_value)
|
|
75
95
|
|
|
@@ -83,7 +103,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
83
103
|
|
|
84
104
|
@dlt.resource(write_disposition="merge", primary_key="id", name="catalog-variants")
|
|
85
105
|
def catalog_variants(
|
|
86
|
-
updated=dlt.sources.incremental(
|
|
106
|
+
updated=dlt.sources.incremental(
|
|
107
|
+
"updated",
|
|
108
|
+
start_date_obj.isoformat(),
|
|
109
|
+
range_end="closed",
|
|
110
|
+
range_start="closed",
|
|
111
|
+
),
|
|
87
112
|
) -> Iterable[TDataItem]:
|
|
88
113
|
yield from client.fetch_catalog_variant(create_client(), updated.start_value)
|
|
89
114
|
|
|
@@ -91,19 +116,34 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
91
116
|
write_disposition="merge", primary_key="id", name="catalog-categories"
|
|
92
117
|
)
|
|
93
118
|
def catalog_categories(
|
|
94
|
-
updated=dlt.sources.incremental(
|
|
119
|
+
updated=dlt.sources.incremental(
|
|
120
|
+
"updated",
|
|
121
|
+
start_date_obj.isoformat(),
|
|
122
|
+
range_end="closed",
|
|
123
|
+
range_start="closed",
|
|
124
|
+
),
|
|
95
125
|
) -> Iterable[TDataItem]:
|
|
96
126
|
yield from client.fetch_catalog_categories(create_client(), updated.start_value)
|
|
97
127
|
|
|
98
128
|
@dlt.resource(write_disposition="merge", primary_key="id", name="catalog-items")
|
|
99
129
|
def catalog_items(
|
|
100
|
-
updated=dlt.sources.incremental(
|
|
130
|
+
updated=dlt.sources.incremental(
|
|
131
|
+
"updated",
|
|
132
|
+
start_date_obj.isoformat(),
|
|
133
|
+
range_end="closed",
|
|
134
|
+
range_start="closed",
|
|
135
|
+
),
|
|
101
136
|
) -> Iterable[TDataItem]:
|
|
102
137
|
yield from client.fetch_catalog_item(create_client(), updated.start_value)
|
|
103
138
|
|
|
104
139
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
105
140
|
def forms(
|
|
106
|
-
updated_at=dlt.sources.incremental(
|
|
141
|
+
updated_at=dlt.sources.incremental(
|
|
142
|
+
"updated_at",
|
|
143
|
+
start_date_obj.isoformat(),
|
|
144
|
+
range_end="closed",
|
|
145
|
+
range_start="closed",
|
|
146
|
+
),
|
|
107
147
|
) -> Iterable[TDataItem]:
|
|
108
148
|
intervals = split_date_range(
|
|
109
149
|
pendulum.parse(updated_at.start_value), pendulum.now()
|
|
@@ -114,13 +154,23 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
114
154
|
|
|
115
155
|
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
116
156
|
def lists(
|
|
117
|
-
updated=dlt.sources.incremental(
|
|
157
|
+
updated=dlt.sources.incremental(
|
|
158
|
+
"updated",
|
|
159
|
+
start_date_obj.isoformat(),
|
|
160
|
+
range_end="closed",
|
|
161
|
+
range_start="closed",
|
|
162
|
+
),
|
|
118
163
|
) -> Iterable[TDataItem]:
|
|
119
164
|
yield from client.fetch_lists(create_client(), updated.start_value)
|
|
120
165
|
|
|
121
166
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
122
167
|
def images(
|
|
123
|
-
updated_at=dlt.sources.incremental(
|
|
168
|
+
updated_at=dlt.sources.incremental(
|
|
169
|
+
"updated_at",
|
|
170
|
+
start_date_obj.isoformat(),
|
|
171
|
+
range_end="closed",
|
|
172
|
+
range_start="closed",
|
|
173
|
+
),
|
|
124
174
|
) -> Iterable[TDataItem]:
|
|
125
175
|
intervals = split_date_range(
|
|
126
176
|
pendulum.parse(updated_at.start_value), pendulum.now()
|
|
@@ -130,13 +180,23 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
130
180
|
|
|
131
181
|
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
132
182
|
def segments(
|
|
133
|
-
updated=dlt.sources.incremental(
|
|
183
|
+
updated=dlt.sources.incremental(
|
|
184
|
+
"updated",
|
|
185
|
+
start_date_obj.isoformat(),
|
|
186
|
+
range_end="closed",
|
|
187
|
+
range_start="closed",
|
|
188
|
+
),
|
|
134
189
|
) -> Iterable[TDataItem]:
|
|
135
190
|
yield from client.fetch_segments(create_client(), updated.start_value)
|
|
136
191
|
|
|
137
192
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
138
193
|
def flows(
|
|
139
|
-
updated=dlt.sources.incremental(
|
|
194
|
+
updated=dlt.sources.incremental(
|
|
195
|
+
"updated",
|
|
196
|
+
start_date_obj.isoformat(),
|
|
197
|
+
range_end="closed",
|
|
198
|
+
range_start="closed",
|
|
199
|
+
),
|
|
140
200
|
) -> Iterable[TDataItem]:
|
|
141
201
|
intervals = split_date_range(
|
|
142
202
|
pendulum.parse(updated.start_value), pendulum.now()
|
|
@@ -146,7 +206,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
146
206
|
|
|
147
207
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
148
208
|
def templates(
|
|
149
|
-
updated=dlt.sources.incremental(
|
|
209
|
+
updated=dlt.sources.incremental(
|
|
210
|
+
"updated",
|
|
211
|
+
start_date_obj.isoformat(),
|
|
212
|
+
range_end="closed",
|
|
213
|
+
range_start="closed",
|
|
214
|
+
),
|
|
150
215
|
) -> Iterable[TDataItem]:
|
|
151
216
|
intervals = split_date_range(
|
|
152
217
|
pendulum.parse(updated.start_value), pendulum.now()
|
ingestr/src/shopify/__init__.py
CHANGED
|
@@ -158,6 +158,8 @@ def shopify_source(
|
|
|
158
158
|
initial_value=start_date_obj,
|
|
159
159
|
end_value=end_date_obj,
|
|
160
160
|
allow_external_schedulers=True,
|
|
161
|
+
range_end="closed",
|
|
162
|
+
range_start="closed",
|
|
161
163
|
),
|
|
162
164
|
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
163
165
|
items_per_page: int = items_per_page,
|
|
@@ -606,6 +608,8 @@ def shopify_source(
|
|
|
606
608
|
initial_value=start_date_obj,
|
|
607
609
|
end_value=end_date_obj,
|
|
608
610
|
allow_external_schedulers=True,
|
|
611
|
+
range_end="closed",
|
|
612
|
+
range_start="closed",
|
|
609
613
|
),
|
|
610
614
|
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
611
615
|
items_per_page: int = items_per_page,
|
|
@@ -640,6 +644,8 @@ def shopify_source(
|
|
|
640
644
|
initial_value=start_date_obj,
|
|
641
645
|
end_value=end_date_obj,
|
|
642
646
|
allow_external_schedulers=True,
|
|
647
|
+
range_end="closed",
|
|
648
|
+
range_start="closed",
|
|
643
649
|
),
|
|
644
650
|
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
645
651
|
items_per_page: int = items_per_page,
|
|
@@ -671,6 +677,8 @@ def shopify_source(
|
|
|
671
677
|
"created_at",
|
|
672
678
|
initial_value=start_date_obj,
|
|
673
679
|
end_value=end_date_obj,
|
|
680
|
+
range_end="closed",
|
|
681
|
+
range_start="closed",
|
|
674
682
|
),
|
|
675
683
|
items_per_page: int = items_per_page,
|
|
676
684
|
) -> Iterable[TDataItem]:
|
|
@@ -689,6 +697,8 @@ def shopify_source(
|
|
|
689
697
|
"updated_at",
|
|
690
698
|
initial_value=start_date_obj,
|
|
691
699
|
end_value=end_date_obj,
|
|
700
|
+
range_end="closed",
|
|
701
|
+
range_start="closed",
|
|
692
702
|
),
|
|
693
703
|
items_per_page: int = items_per_page,
|
|
694
704
|
) -> Iterable[TDataItem]:
|
|
@@ -730,6 +740,8 @@ def shopify_source(
|
|
|
730
740
|
initial_value=start_date_obj,
|
|
731
741
|
end_value=end_date_obj,
|
|
732
742
|
allow_external_schedulers=True,
|
|
743
|
+
range_end="closed",
|
|
744
|
+
range_start="closed",
|
|
733
745
|
),
|
|
734
746
|
items_per_page: int = items_per_page,
|
|
735
747
|
) -> Iterable[TDataItem]:
|
|
@@ -1807,6 +1819,8 @@ query discountNodes($after: String, $query: String, $first: Int) {
|
|
|
1807
1819
|
"updatedAt",
|
|
1808
1820
|
initial_value=start_date_obj,
|
|
1809
1821
|
end_value=end_date_obj,
|
|
1822
|
+
range_end="closed",
|
|
1823
|
+
range_start="closed",
|
|
1810
1824
|
),
|
|
1811
1825
|
items_per_page: int = items_per_page,
|
|
1812
1826
|
) -> Iterable[TDataItem]:
|
ingestr/src/slack/__init__.py
CHANGED
|
@@ -175,6 +175,8 @@ def slack_source(
|
|
|
175
175
|
initial_value=start_dt,
|
|
176
176
|
end_value=end_dt,
|
|
177
177
|
allow_external_schedulers=True,
|
|
178
|
+
range_end="closed",
|
|
179
|
+
range_start="closed",
|
|
178
180
|
),
|
|
179
181
|
) -> Iterable[TDataItem]:
|
|
180
182
|
"""
|
|
@@ -198,6 +200,8 @@ def slack_source(
|
|
|
198
200
|
initial_value=start_dt,
|
|
199
201
|
end_value=end_dt,
|
|
200
202
|
allow_external_schedulers=True,
|
|
203
|
+
range_end="closed",
|
|
204
|
+
range_start="closed",
|
|
201
205
|
),
|
|
202
206
|
) -> Iterable[TDataItem]:
|
|
203
207
|
"""Yield all messages for a given channel as a DLT resource. Keep blocks column without normalization.
|
ingestr/src/sources.py
CHANGED
|
@@ -3,7 +3,7 @@ import csv
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
|
-
from datetime import date, datetime
|
|
6
|
+
from datetime import date, datetime, timedelta
|
|
7
7
|
from typing import (
|
|
8
8
|
Any,
|
|
9
9
|
Callable,
|
|
@@ -46,10 +46,16 @@ from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
|
|
|
46
46
|
from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
47
47
|
from ingestr.src.airtable import airtable_source
|
|
48
48
|
from ingestr.src.appsflyer._init_ import appsflyer_source
|
|
49
|
+
from ingestr.src.appstore import app_store
|
|
50
|
+
from ingestr.src.appstore.client import AppStoreConnectClient
|
|
49
51
|
from ingestr.src.arrow import memory_mapped_arrow
|
|
50
52
|
from ingestr.src.asana_source import asana_source
|
|
51
53
|
from ingestr.src.chess import source
|
|
52
54
|
from ingestr.src.dynamodb import dynamodb
|
|
55
|
+
from ingestr.src.errors import (
|
|
56
|
+
MissingValueError,
|
|
57
|
+
UnsupportedResourceError,
|
|
58
|
+
)
|
|
53
59
|
from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
|
|
54
60
|
from ingestr.src.filesystem import readers
|
|
55
61
|
from ingestr.src.filters import table_adapter_exclude_columns
|
|
@@ -240,6 +246,8 @@ class ArrowMemoryMappedSource:
|
|
|
240
246
|
kwargs.get("incremental_key", ""),
|
|
241
247
|
initial_value=start_value,
|
|
242
248
|
end_value=end_value,
|
|
249
|
+
range_end="closed",
|
|
250
|
+
range_start="closed",
|
|
243
251
|
)
|
|
244
252
|
|
|
245
253
|
file_path = uri.split("://")[1]
|
|
@@ -285,6 +293,8 @@ class MongoDbSource:
|
|
|
285
293
|
kwargs.get("incremental_key", ""),
|
|
286
294
|
initial_value=start_value,
|
|
287
295
|
end_value=end_value,
|
|
296
|
+
range_end="closed",
|
|
297
|
+
range_start="closed",
|
|
288
298
|
)
|
|
289
299
|
|
|
290
300
|
table_instance = self.table_builder(
|
|
@@ -353,6 +363,8 @@ class LocalCsvSource:
|
|
|
353
363
|
kwargs.get("incremental_key", ""),
|
|
354
364
|
initial_value=kwargs.get("interval_start"),
|
|
355
365
|
end_value=kwargs.get("interval_end"),
|
|
366
|
+
range_end="closed",
|
|
367
|
+
range_start="closed",
|
|
356
368
|
)
|
|
357
369
|
)
|
|
358
370
|
|
|
@@ -1311,6 +1323,8 @@ class DynamoDBSource:
|
|
|
1311
1323
|
incremental_key.strip(),
|
|
1312
1324
|
initial_value=isotime(kwargs.get("interval_start")),
|
|
1313
1325
|
end_value=isotime(kwargs.get("interval_end")),
|
|
1326
|
+
range_end="closed",
|
|
1327
|
+
range_start="closed",
|
|
1314
1328
|
)
|
|
1315
1329
|
|
|
1316
1330
|
return dynamodb(table, creds, incremental)
|
|
@@ -1336,11 +1350,6 @@ class GoogleAnalyticsSource:
|
|
|
1336
1350
|
if not property_id:
|
|
1337
1351
|
raise ValueError("property_id is required to connect to Google Analytics")
|
|
1338
1352
|
|
|
1339
|
-
interval_start = kwargs.get("interval_start")
|
|
1340
|
-
start_date = (
|
|
1341
|
-
interval_start.strftime("%Y-%m-%d") if interval_start else "2015-08-14"
|
|
1342
|
-
)
|
|
1343
|
-
|
|
1344
1353
|
fields = table.split(":")
|
|
1345
1354
|
if len(fields) != 3:
|
|
1346
1355
|
raise ValueError(
|
|
@@ -1364,10 +1373,19 @@ class GoogleAnalyticsSource:
|
|
|
1364
1373
|
{"resource_name": "custom", "dimensions": dimensions, "metrics": metrics}
|
|
1365
1374
|
]
|
|
1366
1375
|
|
|
1376
|
+
start_date = pendulum.now().subtract(days=30).start_of("day")
|
|
1377
|
+
if kwargs.get("interval_start") is not None:
|
|
1378
|
+
start_date = pendulum.instance(kwargs.get("interval_start")) # type: ignore
|
|
1379
|
+
|
|
1380
|
+
end_date = pendulum.now()
|
|
1381
|
+
if kwargs.get("interval_end") is not None:
|
|
1382
|
+
end_date = pendulum.instance(kwargs.get("interval_end")) # type: ignore
|
|
1383
|
+
|
|
1367
1384
|
return google_analytics(
|
|
1368
1385
|
property_id=property_id[0],
|
|
1369
1386
|
start_date=start_date,
|
|
1370
|
-
|
|
1387
|
+
end_date=end_date,
|
|
1388
|
+
datetime_dimension=datetime,
|
|
1371
1389
|
queries=queries,
|
|
1372
1390
|
credentials=credentials,
|
|
1373
1391
|
).with_resources("basic_report")
|
|
@@ -1398,9 +1416,7 @@ class GitHubSource:
|
|
|
1398
1416
|
"repo variable is required to retrieve data for a specific repository from GitHub."
|
|
1399
1417
|
)
|
|
1400
1418
|
|
|
1401
|
-
access_token = source_fields.get("access_token", [
|
|
1402
|
-
if not access_token and table not in ["repo_events"]:
|
|
1403
|
-
raise ValueError("access_token is required to connect with GitHub")
|
|
1419
|
+
access_token = source_fields.get("access_token", [""])[0]
|
|
1404
1420
|
|
|
1405
1421
|
if table in ["issues", "pull_requests"]:
|
|
1406
1422
|
return github_reactions(
|
|
@@ -1414,3 +1430,76 @@ class GitHubSource:
|
|
|
1414
1430
|
raise ValueError(
|
|
1415
1431
|
f"Resource '{table}' is not supported for GitHub source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
1416
1432
|
)
|
|
1433
|
+
|
|
1434
|
+
|
|
1435
|
+
class AppleAppStoreSource:
|
|
1436
|
+
def handles_incrementality(self) -> bool:
|
|
1437
|
+
return True
|
|
1438
|
+
|
|
1439
|
+
def init_client(
|
|
1440
|
+
self,
|
|
1441
|
+
key_id: str,
|
|
1442
|
+
issuer_id: str,
|
|
1443
|
+
key_path: Optional[List[str]],
|
|
1444
|
+
key_base64: Optional[List[str]],
|
|
1445
|
+
):
|
|
1446
|
+
key = None
|
|
1447
|
+
if key_path is not None:
|
|
1448
|
+
with open(key_path[0]) as f:
|
|
1449
|
+
key = f.read()
|
|
1450
|
+
else:
|
|
1451
|
+
key = base64.b64decode(key_base64[0]).decode() # type: ignore
|
|
1452
|
+
|
|
1453
|
+
return AppStoreConnectClient(key.encode(), key_id, issuer_id)
|
|
1454
|
+
|
|
1455
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1456
|
+
if kwargs.get("incremental_key"):
|
|
1457
|
+
raise ValueError(
|
|
1458
|
+
"App Store takes care of incrementality on its own, you should not provide incremental_key"
|
|
1459
|
+
)
|
|
1460
|
+
parsed_uri = urlparse(uri)
|
|
1461
|
+
params = parse_qs(parsed_uri.query)
|
|
1462
|
+
|
|
1463
|
+
key_id = params.get("key_id")
|
|
1464
|
+
if key_id is None:
|
|
1465
|
+
raise MissingValueError("key_id", "App Store")
|
|
1466
|
+
|
|
1467
|
+
key_path = params.get("key_path")
|
|
1468
|
+
key_base64 = params.get("key_base64")
|
|
1469
|
+
key_available = any(
|
|
1470
|
+
map(
|
|
1471
|
+
lambda x: x is not None,
|
|
1472
|
+
[key_path, key_base64],
|
|
1473
|
+
)
|
|
1474
|
+
)
|
|
1475
|
+
if key_available is False:
|
|
1476
|
+
raise MissingValueError("key_path or key_base64", "App Store")
|
|
1477
|
+
|
|
1478
|
+
issuer_id = params.get("issuer_id")
|
|
1479
|
+
if issuer_id is None:
|
|
1480
|
+
raise MissingValueError("issuer_id", "App Store")
|
|
1481
|
+
|
|
1482
|
+
client = self.init_client(key_id[0], issuer_id[0], key_path, key_base64)
|
|
1483
|
+
|
|
1484
|
+
app_ids = params.get("app_id")
|
|
1485
|
+
if ":" in table:
|
|
1486
|
+
intended_table, app_ids_override = table.split(":", maxsplit=1)
|
|
1487
|
+
app_ids = app_ids_override.split(",")
|
|
1488
|
+
table = intended_table
|
|
1489
|
+
|
|
1490
|
+
if app_ids is None:
|
|
1491
|
+
raise MissingValueError("app_id", "App Store")
|
|
1492
|
+
|
|
1493
|
+
src = app_store(
|
|
1494
|
+
client,
|
|
1495
|
+
app_ids,
|
|
1496
|
+
start_date=kwargs.get(
|
|
1497
|
+
"interval_start", datetime.now() - timedelta(days=30)
|
|
1498
|
+
),
|
|
1499
|
+
end_date=kwargs.get("interval_end"),
|
|
1500
|
+
)
|
|
1501
|
+
|
|
1502
|
+
if table not in src.resources:
|
|
1503
|
+
raise UnsupportedResourceError(table, "AppStore")
|
|
1504
|
+
|
|
1505
|
+
return src.with_resources(table)
|