ingestr 0.12.5__py3-none-any.whl → 0.12.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +1 -1
- ingestr/src/asana_source/__init__.py +4 -1
- ingestr/src/facebook_ads/__init__.py +4 -1
- ingestr/src/filesystem/__init__.py +3 -1
- ingestr/src/github/__init__.py +7 -3
- ingestr/src/google_analytics/__init__.py +57 -21
- ingestr/src/google_analytics/{helpers/data_processing.py → helpers.py} +29 -33
- ingestr/src/gorgias/__init__.py +12 -4
- ingestr/src/hubspot/__init__.py +8 -1
- ingestr/src/klaviyo/_init_.py +78 -13
- ingestr/src/shopify/__init__.py +14 -0
- ingestr/src/slack/__init__.py +4 -0
- ingestr/src/sources.py +19 -9
- ingestr/src/stripe_analytics/__init__.py +4 -1
- ingestr/src/tiktok_ads/__init__.py +6 -1
- ingestr/src/version.py +1 -1
- ingestr/src/zendesk/__init__.py +6 -0
- {ingestr-0.12.5.dist-info → ingestr-0.12.6.dist-info}/METADATA +2 -1
- {ingestr-0.12.5.dist-info → ingestr-0.12.6.dist-info}/RECORD +22 -23
- ingestr/src/google_analytics/helpers/__init__.py +0 -70
- {ingestr-0.12.5.dist-info → ingestr-0.12.6.dist-info}/WHEEL +0 -0
- {ingestr-0.12.5.dist-info → ingestr-0.12.6.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.5.dist-info → ingestr-0.12.6.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -444,7 +444,7 @@ def ingest(
|
|
|
444
444
|
|
|
445
445
|
progressInstance: Collector = SpinnerCollector()
|
|
446
446
|
if progress == Progress.log:
|
|
447
|
-
progressInstance = LogCollector(
|
|
447
|
+
progressInstance = LogCollector()
|
|
448
448
|
|
|
449
449
|
is_pipelines_dir_temp = False
|
|
450
450
|
if pipelines_dir is None:
|
|
@@ -150,7 +150,10 @@ def tasks(
|
|
|
150
150
|
project_array: t.List[TDataItem],
|
|
151
151
|
access_token: str = dlt.secrets.value,
|
|
152
152
|
modified_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
153
|
-
"modified_at",
|
|
153
|
+
"modified_at",
|
|
154
|
+
initial_value=DEFAULT_START_DATE,
|
|
155
|
+
range_end="closed",
|
|
156
|
+
range_start="closed",
|
|
154
157
|
),
|
|
155
158
|
fields: Iterable[str] = TASK_FIELDS,
|
|
156
159
|
) -> Iterable[TDataItem]:
|
|
@@ -159,7 +159,10 @@ def facebook_insights_source(
|
|
|
159
159
|
)
|
|
160
160
|
def facebook_insights(
|
|
161
161
|
date_start: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
162
|
-
"date_start",
|
|
162
|
+
"date_start",
|
|
163
|
+
initial_value=initial_load_start_date_str,
|
|
164
|
+
range_end="closed",
|
|
165
|
+
range_start="closed",
|
|
163
166
|
),
|
|
164
167
|
) -> Iterator[TDataItems]:
|
|
165
168
|
start_date = get_start_date(date_start, attribution_window_days_lag)
|
|
@@ -38,7 +38,9 @@ def readers(
|
|
|
38
38
|
"""
|
|
39
39
|
filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
|
|
40
40
|
filesystem_resource.apply_hints(
|
|
41
|
-
incremental=dlt.sources.incremental("modification_date")
|
|
41
|
+
incremental=dlt.sources.incremental("modification_date"),
|
|
42
|
+
range_end="closed",
|
|
43
|
+
range_start="closed",
|
|
42
44
|
)
|
|
43
45
|
return (
|
|
44
46
|
filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
|
ingestr/src/github/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@ from .helpers import get_reactions_data, get_rest_pages, get_stargazers
|
|
|
14
14
|
def github_reactions(
|
|
15
15
|
owner: str,
|
|
16
16
|
name: str,
|
|
17
|
-
access_token: str
|
|
17
|
+
access_token: str,
|
|
18
18
|
items_per_page: int = 100,
|
|
19
19
|
max_items: Optional[int] = None,
|
|
20
20
|
) -> Sequence[DltResource]:
|
|
@@ -89,7 +89,11 @@ def github_repo_events(
|
|
|
89
89
|
@dlt.resource(primary_key="id", table_name=lambda i: i["type"])
|
|
90
90
|
def repo_events(
|
|
91
91
|
last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
92
|
-
"created_at",
|
|
92
|
+
"created_at",
|
|
93
|
+
initial_value="1970-01-01T00:00:00Z",
|
|
94
|
+
last_value_func=max,
|
|
95
|
+
range_end="closed",
|
|
96
|
+
range_start="closed",
|
|
93
97
|
),
|
|
94
98
|
) -> Iterator[TDataItems]:
|
|
95
99
|
repos_path = (
|
|
@@ -114,7 +118,7 @@ def github_repo_events(
|
|
|
114
118
|
def github_stargazers(
|
|
115
119
|
owner: str,
|
|
116
120
|
name: str,
|
|
117
|
-
access_token: str
|
|
121
|
+
access_token: str,
|
|
118
122
|
items_per_page: int = 100,
|
|
119
123
|
max_items: Optional[int] = None,
|
|
120
124
|
) -> Sequence[DltResource]:
|
|
@@ -2,26 +2,32 @@
|
|
|
2
2
|
Defines all the sources and resources needed for Google Analytics V4
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import List, Optional, Union
|
|
5
|
+
from typing import Iterator, List, Optional, Union
|
|
6
6
|
|
|
7
7
|
import dlt
|
|
8
|
-
from dlt.common
|
|
9
|
-
from dlt.
|
|
8
|
+
from dlt.common import pendulum
|
|
9
|
+
from dlt.common.typing import DictStrAny, TDataItem
|
|
10
|
+
from dlt.extract import DltResource
|
|
10
11
|
from dlt.sources.credentials import GcpOAuthCredentials, GcpServiceAccountCredentials
|
|
11
12
|
from google.analytics.data_v1beta import BetaAnalyticsDataClient
|
|
13
|
+
from google.analytics.data_v1beta.types import (
|
|
14
|
+
Dimension,
|
|
15
|
+
Metric,
|
|
16
|
+
)
|
|
12
17
|
|
|
13
|
-
from .helpers import
|
|
18
|
+
from .helpers import get_report
|
|
14
19
|
|
|
15
20
|
|
|
16
21
|
@dlt.source(max_table_nesting=0)
|
|
17
22
|
def google_analytics(
|
|
18
|
-
|
|
23
|
+
datetime_dimension: str,
|
|
19
24
|
credentials: Union[
|
|
20
25
|
GcpOAuthCredentials, GcpServiceAccountCredentials
|
|
21
26
|
] = dlt.secrets.value,
|
|
22
27
|
property_id: int = dlt.config.value,
|
|
23
28
|
queries: List[DictStrAny] = dlt.config.value,
|
|
24
|
-
start_date: Optional[
|
|
29
|
+
start_date: Optional[pendulum.DateTime] = pendulum.datetime(2024, 1, 1),
|
|
30
|
+
end_date: Optional[pendulum.DateTime] = None,
|
|
25
31
|
rows_per_page: int = 10000,
|
|
26
32
|
) -> List[DltResource]:
|
|
27
33
|
try:
|
|
@@ -50,21 +56,51 @@ def google_analytics(
|
|
|
50
56
|
|
|
51
57
|
# always add "date" to dimensions so we are able to track the last day of a report
|
|
52
58
|
dimensions = query["dimensions"]
|
|
53
|
-
resource_name = query["resource_name"]
|
|
54
59
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
rows_per_page=rows_per_page,
|
|
60
|
-
property_id=property_id,
|
|
61
|
-
dimensions=dimensions,
|
|
62
|
-
metrics=query["metrics"],
|
|
63
|
-
resource_name=resource_name,
|
|
64
|
-
start_date=start_date,
|
|
65
|
-
last_date=dlt.sources.incremental(
|
|
66
|
-
datetime
|
|
67
|
-
), # pass empty primary key to avoid unique checks, a primary key defined by the resource will be used
|
|
60
|
+
@dlt.resource(
|
|
61
|
+
name="basic_report",
|
|
62
|
+
merge_key=datetime_dimension,
|
|
63
|
+
write_disposition="merge",
|
|
68
64
|
)
|
|
65
|
+
def basic_report(
|
|
66
|
+
incremental=dlt.sources.incremental(
|
|
67
|
+
datetime_dimension,
|
|
68
|
+
initial_value=start_date,
|
|
69
|
+
end_value=end_date,
|
|
70
|
+
range_end="closed",
|
|
71
|
+
range_start="closed",
|
|
72
|
+
),
|
|
73
|
+
) -> Iterator[TDataItem]:
|
|
74
|
+
start_date = incremental.last_value
|
|
75
|
+
end_date = incremental.end_value
|
|
76
|
+
if start_date is None:
|
|
77
|
+
start_date = pendulum.datetime(2024, 1, 1)
|
|
78
|
+
if end_date is None:
|
|
79
|
+
end_date = pendulum.yesterday()
|
|
80
|
+
yield from get_report(
|
|
81
|
+
client=client,
|
|
82
|
+
property_id=property_id,
|
|
83
|
+
dimension_list=[Dimension(name=dimension) for dimension in dimensions],
|
|
84
|
+
metric_list=[Metric(name=metric) for metric in query["metrics"]],
|
|
85
|
+
per_page=rows_per_page,
|
|
86
|
+
start_date=start_date,
|
|
87
|
+
end_date=end_date,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# res = dlt.resource(
|
|
91
|
+
# basic_report, name="basic_report", merge_key=datetime_dimension, write_disposition="merge"
|
|
92
|
+
# )(
|
|
93
|
+
# client=client,
|
|
94
|
+
# rows_per_page=rows_per_page,
|
|
95
|
+
# property_id=property_id,
|
|
96
|
+
# dimensions=dimensions,
|
|
97
|
+
# metrics=query["metrics"],
|
|
98
|
+
# resource_name=resource_name,
|
|
99
|
+
# last_date=dlt.sources.incremental(
|
|
100
|
+
# datetime_dimension,
|
|
101
|
+
# initial_value=start_date,
|
|
102
|
+
# end_value=end_date,
|
|
103
|
+
# ),
|
|
104
|
+
# )
|
|
69
105
|
|
|
70
|
-
return [
|
|
106
|
+
return [basic_report]
|
|
@@ -57,9 +57,9 @@ def get_report(
|
|
|
57
57
|
property_id: int,
|
|
58
58
|
dimension_list: List[Dimension],
|
|
59
59
|
metric_list: List[Metric],
|
|
60
|
-
|
|
61
|
-
start_date:
|
|
62
|
-
end_date:
|
|
60
|
+
per_page: int,
|
|
61
|
+
start_date: pendulum.DateTime,
|
|
62
|
+
end_date: pendulum.DateTime,
|
|
63
63
|
) -> Iterator[TDataItem]:
|
|
64
64
|
"""
|
|
65
65
|
Gets all the possible pages of reports with the given query parameters.
|
|
@@ -79,30 +79,36 @@ def get_report(
|
|
|
79
79
|
Generator of all rows of data in the report.
|
|
80
80
|
"""
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
dimensions=dimension_list,
|
|
85
|
-
metrics=metric_list,
|
|
86
|
-
limit=limit,
|
|
87
|
-
date_ranges=[DateRange(start_date=start_date, end_date=end_date)],
|
|
82
|
+
print(
|
|
83
|
+
"fetching for daterange", start_date.to_date_string(), end_date.to_date_string()
|
|
88
84
|
)
|
|
89
|
-
# process request
|
|
90
|
-
response = client.run_report(request)
|
|
91
|
-
processed_response_generator = process_report(response=response)
|
|
92
|
-
yield from processed_response_generator
|
|
93
85
|
|
|
86
|
+
offset = 0
|
|
87
|
+
while True:
|
|
88
|
+
request = RunReportRequest(
|
|
89
|
+
property=f"properties/{property_id}",
|
|
90
|
+
dimensions=dimension_list,
|
|
91
|
+
metrics=metric_list,
|
|
92
|
+
limit=per_page,
|
|
93
|
+
offset=offset,
|
|
94
|
+
date_ranges=[
|
|
95
|
+
DateRange(
|
|
96
|
+
start_date=start_date.to_date_string(),
|
|
97
|
+
end_date=end_date.to_date_string(),
|
|
98
|
+
)
|
|
99
|
+
],
|
|
100
|
+
)
|
|
101
|
+
# process request
|
|
102
|
+
response = client.run_report(request)
|
|
103
|
+
processed_response_generator = process_report(response=response)
|
|
104
|
+
# import pdb; pdb.set_trace()
|
|
105
|
+
yield from processed_response_generator
|
|
106
|
+
offset += per_page
|
|
107
|
+
if len(response.rows) < per_page or offset > 1000000:
|
|
108
|
+
break
|
|
94
109
|
|
|
95
|
-
def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
|
|
96
|
-
"""
|
|
97
|
-
Receives a single page for a report response, processes it, and returns a generator for every row of data in the report page.
|
|
98
|
-
|
|
99
|
-
Args:
|
|
100
|
-
response: The API response for a single page of the report.
|
|
101
|
-
|
|
102
|
-
Yields:
|
|
103
|
-
Generator of dictionaries for every row of the report page.
|
|
104
|
-
"""
|
|
105
110
|
|
|
111
|
+
def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
|
|
106
112
|
metrics_headers = [header.name for header in response.metric_headers]
|
|
107
113
|
dimensions_headers = [header.name for header in response.dimension_headers]
|
|
108
114
|
|
|
@@ -156,16 +162,6 @@ def process_metric_value(metric_type: MetricType, value: str) -> Union[str, int,
|
|
|
156
162
|
|
|
157
163
|
|
|
158
164
|
def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
|
|
159
|
-
"""
|
|
160
|
-
Helper function that receives a dimension's name and value and converts it to a datetime object if needed.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
dimension_name: Name of the dimension.
|
|
164
|
-
dimension_value: Value of the dimension.
|
|
165
|
-
|
|
166
|
-
Returns:
|
|
167
|
-
The value of the dimension with the correct data type.
|
|
168
|
-
"""
|
|
169
165
|
if dimension_name == "date":
|
|
170
166
|
return pendulum.from_format(dimension_value, "YYYYMMDD", tz="UTC")
|
|
171
167
|
elif dimension_name == "dateHour":
|
ingestr/src/gorgias/__init__.py
CHANGED
|
@@ -116,7 +116,9 @@ def gorgias_source(
|
|
|
116
116
|
},
|
|
117
117
|
)
|
|
118
118
|
def customers(
|
|
119
|
-
updated_datetime=dlt.sources.incremental(
|
|
119
|
+
updated_datetime=dlt.sources.incremental(
|
|
120
|
+
"updated_datetime", start_date_obj, range_end="closed", range_start="closed"
|
|
121
|
+
),
|
|
120
122
|
) -> Iterable[TDataItem]:
|
|
121
123
|
"""
|
|
122
124
|
The resource for customers on your Gorgias domain, supports incremental loading and pagination.
|
|
@@ -290,7 +292,9 @@ def gorgias_source(
|
|
|
290
292
|
},
|
|
291
293
|
)
|
|
292
294
|
def tickets(
|
|
293
|
-
updated_datetime=dlt.sources.incremental(
|
|
295
|
+
updated_datetime=dlt.sources.incremental(
|
|
296
|
+
"updated_datetime", start_date_obj, range_end="closed", range_start="closed"
|
|
297
|
+
),
|
|
294
298
|
) -> Iterable[TDataItem]:
|
|
295
299
|
"""
|
|
296
300
|
The resource for tickets on your Gorgias domain, supports incremental loading and pagination.
|
|
@@ -481,7 +485,9 @@ def gorgias_source(
|
|
|
481
485
|
},
|
|
482
486
|
)
|
|
483
487
|
def ticket_messages(
|
|
484
|
-
updated_datetime=dlt.sources.incremental(
|
|
488
|
+
updated_datetime=dlt.sources.incremental(
|
|
489
|
+
"updated_datetime", start_date_obj, range_end="closed", range_start="closed"
|
|
490
|
+
),
|
|
485
491
|
) -> Iterable[TDataItem]:
|
|
486
492
|
"""
|
|
487
493
|
The resource for ticket messages on your Gorgias domain, supports incremental loading and pagination.
|
|
@@ -566,7 +572,9 @@ def gorgias_source(
|
|
|
566
572
|
},
|
|
567
573
|
)
|
|
568
574
|
def satisfaction_surveys(
|
|
569
|
-
updated_datetime=dlt.sources.incremental(
|
|
575
|
+
updated_datetime=dlt.sources.incremental(
|
|
576
|
+
"updated_datetime", start_date_obj, range_end="closed", range_start="closed"
|
|
577
|
+
),
|
|
570
578
|
) -> Iterable[TDataItem]:
|
|
571
579
|
"""
|
|
572
580
|
The resource for satisfaction surveys on your Gorgias domain, supports incremental loading and pagination.
|
ingestr/src/hubspot/__init__.py
CHANGED
|
@@ -278,4 +278,11 @@ def hubspot_events_for_objects(
|
|
|
278
278
|
write_disposition="append",
|
|
279
279
|
selected=True,
|
|
280
280
|
table_name=lambda e: name + "_" + str(e["eventType"]),
|
|
281
|
-
)(
|
|
281
|
+
)(
|
|
282
|
+
dlt.sources.incremental(
|
|
283
|
+
"occurredAt",
|
|
284
|
+
initial_value=start_date.isoformat(),
|
|
285
|
+
range_end="closed",
|
|
286
|
+
range_start="closed",
|
|
287
|
+
)
|
|
288
|
+
)
|
ingestr/src/klaviyo/_init_.py
CHANGED
|
@@ -33,7 +33,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
33
33
|
|
|
34
34
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
35
35
|
def events(
|
|
36
|
-
datetime=dlt.sources.incremental(
|
|
36
|
+
datetime=dlt.sources.incremental(
|
|
37
|
+
"datetime",
|
|
38
|
+
start_date_obj.isoformat(),
|
|
39
|
+
range_end="closed",
|
|
40
|
+
range_start="closed",
|
|
41
|
+
),
|
|
37
42
|
) -> Iterable[TDataItem]:
|
|
38
43
|
intervals = split_date_range(
|
|
39
44
|
pendulum.parse(datetime.start_value), pendulum.now()
|
|
@@ -44,7 +49,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
44
49
|
|
|
45
50
|
@dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
|
|
46
51
|
def profiles(
|
|
47
|
-
updated=dlt.sources.incremental(
|
|
52
|
+
updated=dlt.sources.incremental(
|
|
53
|
+
"updated",
|
|
54
|
+
start_date_obj.isoformat(),
|
|
55
|
+
range_end="closed",
|
|
56
|
+
range_start="closed",
|
|
57
|
+
),
|
|
48
58
|
) -> Iterable[TDataItem]:
|
|
49
59
|
intervals = split_date_range(
|
|
50
60
|
pendulum.parse(updated.start_value), pendulum.now()
|
|
@@ -55,7 +65,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
55
65
|
|
|
56
66
|
@dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
|
|
57
67
|
def campaigns(
|
|
58
|
-
updated_at=dlt.sources.incremental(
|
|
68
|
+
updated_at=dlt.sources.incremental(
|
|
69
|
+
"updated_at",
|
|
70
|
+
start_date_obj.isoformat(),
|
|
71
|
+
range_end="closed",
|
|
72
|
+
range_start="closed",
|
|
73
|
+
),
|
|
59
74
|
) -> Iterable[TDataItem]:
|
|
60
75
|
intervals = split_date_range(
|
|
61
76
|
pendulum.parse(updated_at.start_value), pendulum.now()
|
|
@@ -69,7 +84,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
69
84
|
|
|
70
85
|
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
71
86
|
def metrics(
|
|
72
|
-
updated=dlt.sources.incremental(
|
|
87
|
+
updated=dlt.sources.incremental(
|
|
88
|
+
"updated",
|
|
89
|
+
start_date_obj.isoformat(),
|
|
90
|
+
range_end="closed",
|
|
91
|
+
range_start="closed",
|
|
92
|
+
),
|
|
73
93
|
) -> Iterable[TDataItem]:
|
|
74
94
|
yield from client.fetch_metrics(create_client(), updated.start_value)
|
|
75
95
|
|
|
@@ -83,7 +103,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
83
103
|
|
|
84
104
|
@dlt.resource(write_disposition="merge", primary_key="id", name="catalog-variants")
|
|
85
105
|
def catalog_variants(
|
|
86
|
-
updated=dlt.sources.incremental(
|
|
106
|
+
updated=dlt.sources.incremental(
|
|
107
|
+
"updated",
|
|
108
|
+
start_date_obj.isoformat(),
|
|
109
|
+
range_end="closed",
|
|
110
|
+
range_start="closed",
|
|
111
|
+
),
|
|
87
112
|
) -> Iterable[TDataItem]:
|
|
88
113
|
yield from client.fetch_catalog_variant(create_client(), updated.start_value)
|
|
89
114
|
|
|
@@ -91,19 +116,34 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
91
116
|
write_disposition="merge", primary_key="id", name="catalog-categories"
|
|
92
117
|
)
|
|
93
118
|
def catalog_categories(
|
|
94
|
-
updated=dlt.sources.incremental(
|
|
119
|
+
updated=dlt.sources.incremental(
|
|
120
|
+
"updated",
|
|
121
|
+
start_date_obj.isoformat(),
|
|
122
|
+
range_end="closed",
|
|
123
|
+
range_start="closed",
|
|
124
|
+
),
|
|
95
125
|
) -> Iterable[TDataItem]:
|
|
96
126
|
yield from client.fetch_catalog_categories(create_client(), updated.start_value)
|
|
97
127
|
|
|
98
128
|
@dlt.resource(write_disposition="merge", primary_key="id", name="catalog-items")
|
|
99
129
|
def catalog_items(
|
|
100
|
-
updated=dlt.sources.incremental(
|
|
130
|
+
updated=dlt.sources.incremental(
|
|
131
|
+
"updated",
|
|
132
|
+
start_date_obj.isoformat(),
|
|
133
|
+
range_end="closed",
|
|
134
|
+
range_start="closed",
|
|
135
|
+
),
|
|
101
136
|
) -> Iterable[TDataItem]:
|
|
102
137
|
yield from client.fetch_catalog_item(create_client(), updated.start_value)
|
|
103
138
|
|
|
104
139
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
105
140
|
def forms(
|
|
106
|
-
updated_at=dlt.sources.incremental(
|
|
141
|
+
updated_at=dlt.sources.incremental(
|
|
142
|
+
"updated_at",
|
|
143
|
+
start_date_obj.isoformat(),
|
|
144
|
+
range_end="closed",
|
|
145
|
+
range_start="closed",
|
|
146
|
+
),
|
|
107
147
|
) -> Iterable[TDataItem]:
|
|
108
148
|
intervals = split_date_range(
|
|
109
149
|
pendulum.parse(updated_at.start_value), pendulum.now()
|
|
@@ -114,13 +154,23 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
114
154
|
|
|
115
155
|
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
116
156
|
def lists(
|
|
117
|
-
updated=dlt.sources.incremental(
|
|
157
|
+
updated=dlt.sources.incremental(
|
|
158
|
+
"updated",
|
|
159
|
+
start_date_obj.isoformat(),
|
|
160
|
+
range_end="closed",
|
|
161
|
+
range_start="closed",
|
|
162
|
+
),
|
|
118
163
|
) -> Iterable[TDataItem]:
|
|
119
164
|
yield from client.fetch_lists(create_client(), updated.start_value)
|
|
120
165
|
|
|
121
166
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
122
167
|
def images(
|
|
123
|
-
updated_at=dlt.sources.incremental(
|
|
168
|
+
updated_at=dlt.sources.incremental(
|
|
169
|
+
"updated_at",
|
|
170
|
+
start_date_obj.isoformat(),
|
|
171
|
+
range_end="closed",
|
|
172
|
+
range_start="closed",
|
|
173
|
+
),
|
|
124
174
|
) -> Iterable[TDataItem]:
|
|
125
175
|
intervals = split_date_range(
|
|
126
176
|
pendulum.parse(updated_at.start_value), pendulum.now()
|
|
@@ -130,13 +180,23 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
130
180
|
|
|
131
181
|
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
132
182
|
def segments(
|
|
133
|
-
updated=dlt.sources.incremental(
|
|
183
|
+
updated=dlt.sources.incremental(
|
|
184
|
+
"updated",
|
|
185
|
+
start_date_obj.isoformat(),
|
|
186
|
+
range_end="closed",
|
|
187
|
+
range_start="closed",
|
|
188
|
+
),
|
|
134
189
|
) -> Iterable[TDataItem]:
|
|
135
190
|
yield from client.fetch_segments(create_client(), updated.start_value)
|
|
136
191
|
|
|
137
192
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
138
193
|
def flows(
|
|
139
|
-
updated=dlt.sources.incremental(
|
|
194
|
+
updated=dlt.sources.incremental(
|
|
195
|
+
"updated",
|
|
196
|
+
start_date_obj.isoformat(),
|
|
197
|
+
range_end="closed",
|
|
198
|
+
range_start="closed",
|
|
199
|
+
),
|
|
140
200
|
) -> Iterable[TDataItem]:
|
|
141
201
|
intervals = split_date_range(
|
|
142
202
|
pendulum.parse(updated.start_value), pendulum.now()
|
|
@@ -146,7 +206,12 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
|
|
|
146
206
|
|
|
147
207
|
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
148
208
|
def templates(
|
|
149
|
-
updated=dlt.sources.incremental(
|
|
209
|
+
updated=dlt.sources.incremental(
|
|
210
|
+
"updated",
|
|
211
|
+
start_date_obj.isoformat(),
|
|
212
|
+
range_end="closed",
|
|
213
|
+
range_start="closed",
|
|
214
|
+
),
|
|
150
215
|
) -> Iterable[TDataItem]:
|
|
151
216
|
intervals = split_date_range(
|
|
152
217
|
pendulum.parse(updated.start_value), pendulum.now()
|
ingestr/src/shopify/__init__.py
CHANGED
|
@@ -158,6 +158,8 @@ def shopify_source(
|
|
|
158
158
|
initial_value=start_date_obj,
|
|
159
159
|
end_value=end_date_obj,
|
|
160
160
|
allow_external_schedulers=True,
|
|
161
|
+
range_end="closed",
|
|
162
|
+
range_start="closed",
|
|
161
163
|
),
|
|
162
164
|
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
163
165
|
items_per_page: int = items_per_page,
|
|
@@ -606,6 +608,8 @@ def shopify_source(
|
|
|
606
608
|
initial_value=start_date_obj,
|
|
607
609
|
end_value=end_date_obj,
|
|
608
610
|
allow_external_schedulers=True,
|
|
611
|
+
range_end="closed",
|
|
612
|
+
range_start="closed",
|
|
609
613
|
),
|
|
610
614
|
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
611
615
|
items_per_page: int = items_per_page,
|
|
@@ -640,6 +644,8 @@ def shopify_source(
|
|
|
640
644
|
initial_value=start_date_obj,
|
|
641
645
|
end_value=end_date_obj,
|
|
642
646
|
allow_external_schedulers=True,
|
|
647
|
+
range_end="closed",
|
|
648
|
+
range_start="closed",
|
|
643
649
|
),
|
|
644
650
|
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
645
651
|
items_per_page: int = items_per_page,
|
|
@@ -671,6 +677,8 @@ def shopify_source(
|
|
|
671
677
|
"created_at",
|
|
672
678
|
initial_value=start_date_obj,
|
|
673
679
|
end_value=end_date_obj,
|
|
680
|
+
range_end="closed",
|
|
681
|
+
range_start="closed",
|
|
674
682
|
),
|
|
675
683
|
items_per_page: int = items_per_page,
|
|
676
684
|
) -> Iterable[TDataItem]:
|
|
@@ -689,6 +697,8 @@ def shopify_source(
|
|
|
689
697
|
"updated_at",
|
|
690
698
|
initial_value=start_date_obj,
|
|
691
699
|
end_value=end_date_obj,
|
|
700
|
+
range_end="closed",
|
|
701
|
+
range_start="closed",
|
|
692
702
|
),
|
|
693
703
|
items_per_page: int = items_per_page,
|
|
694
704
|
) -> Iterable[TDataItem]:
|
|
@@ -730,6 +740,8 @@ def shopify_source(
|
|
|
730
740
|
initial_value=start_date_obj,
|
|
731
741
|
end_value=end_date_obj,
|
|
732
742
|
allow_external_schedulers=True,
|
|
743
|
+
range_end="closed",
|
|
744
|
+
range_start="closed",
|
|
733
745
|
),
|
|
734
746
|
items_per_page: int = items_per_page,
|
|
735
747
|
) -> Iterable[TDataItem]:
|
|
@@ -1807,6 +1819,8 @@ query discountNodes($after: String, $query: String, $first: Int) {
|
|
|
1807
1819
|
"updatedAt",
|
|
1808
1820
|
initial_value=start_date_obj,
|
|
1809
1821
|
end_value=end_date_obj,
|
|
1822
|
+
range_end="closed",
|
|
1823
|
+
range_start="closed",
|
|
1810
1824
|
),
|
|
1811
1825
|
items_per_page: int = items_per_page,
|
|
1812
1826
|
) -> Iterable[TDataItem]:
|
ingestr/src/slack/__init__.py
CHANGED
|
@@ -175,6 +175,8 @@ def slack_source(
|
|
|
175
175
|
initial_value=start_dt,
|
|
176
176
|
end_value=end_dt,
|
|
177
177
|
allow_external_schedulers=True,
|
|
178
|
+
range_end="closed",
|
|
179
|
+
range_start="closed",
|
|
178
180
|
),
|
|
179
181
|
) -> Iterable[TDataItem]:
|
|
180
182
|
"""
|
|
@@ -198,6 +200,8 @@ def slack_source(
|
|
|
198
200
|
initial_value=start_dt,
|
|
199
201
|
end_value=end_dt,
|
|
200
202
|
allow_external_schedulers=True,
|
|
203
|
+
range_end="closed",
|
|
204
|
+
range_start="closed",
|
|
201
205
|
),
|
|
202
206
|
) -> Iterable[TDataItem]:
|
|
203
207
|
"""Yield all messages for a given channel as a DLT resource. Keep blocks column without normalization.
|
ingestr/src/sources.py
CHANGED
|
@@ -240,6 +240,8 @@ class ArrowMemoryMappedSource:
|
|
|
240
240
|
kwargs.get("incremental_key", ""),
|
|
241
241
|
initial_value=start_value,
|
|
242
242
|
end_value=end_value,
|
|
243
|
+
range_end="closed",
|
|
244
|
+
range_start="closed",
|
|
243
245
|
)
|
|
244
246
|
|
|
245
247
|
file_path = uri.split("://")[1]
|
|
@@ -285,6 +287,8 @@ class MongoDbSource:
|
|
|
285
287
|
kwargs.get("incremental_key", ""),
|
|
286
288
|
initial_value=start_value,
|
|
287
289
|
end_value=end_value,
|
|
290
|
+
range_end="closed",
|
|
291
|
+
range_start="closed",
|
|
288
292
|
)
|
|
289
293
|
|
|
290
294
|
table_instance = self.table_builder(
|
|
@@ -353,6 +357,8 @@ class LocalCsvSource:
|
|
|
353
357
|
kwargs.get("incremental_key", ""),
|
|
354
358
|
initial_value=kwargs.get("interval_start"),
|
|
355
359
|
end_value=kwargs.get("interval_end"),
|
|
360
|
+
range_end="closed",
|
|
361
|
+
range_start="closed",
|
|
356
362
|
)
|
|
357
363
|
)
|
|
358
364
|
|
|
@@ -1311,6 +1317,8 @@ class DynamoDBSource:
|
|
|
1311
1317
|
incremental_key.strip(),
|
|
1312
1318
|
initial_value=isotime(kwargs.get("interval_start")),
|
|
1313
1319
|
end_value=isotime(kwargs.get("interval_end")),
|
|
1320
|
+
range_end="closed",
|
|
1321
|
+
range_start="closed",
|
|
1314
1322
|
)
|
|
1315
1323
|
|
|
1316
1324
|
return dynamodb(table, creds, incremental)
|
|
@@ -1336,11 +1344,6 @@ class GoogleAnalyticsSource:
|
|
|
1336
1344
|
if not property_id:
|
|
1337
1345
|
raise ValueError("property_id is required to connect to Google Analytics")
|
|
1338
1346
|
|
|
1339
|
-
interval_start = kwargs.get("interval_start")
|
|
1340
|
-
start_date = (
|
|
1341
|
-
interval_start.strftime("%Y-%m-%d") if interval_start else "2015-08-14"
|
|
1342
|
-
)
|
|
1343
|
-
|
|
1344
1347
|
fields = table.split(":")
|
|
1345
1348
|
if len(fields) != 3:
|
|
1346
1349
|
raise ValueError(
|
|
@@ -1364,10 +1367,19 @@ class GoogleAnalyticsSource:
|
|
|
1364
1367
|
{"resource_name": "custom", "dimensions": dimensions, "metrics": metrics}
|
|
1365
1368
|
]
|
|
1366
1369
|
|
|
1370
|
+
start_date = pendulum.now().subtract(days=30).start_of("day")
|
|
1371
|
+
if kwargs.get("interval_start") is not None:
|
|
1372
|
+
start_date = pendulum.instance(kwargs.get("interval_start")) # type: ignore
|
|
1373
|
+
|
|
1374
|
+
end_date = pendulum.now()
|
|
1375
|
+
if kwargs.get("interval_end") is not None:
|
|
1376
|
+
end_date = pendulum.instance(kwargs.get("interval_end")) # type: ignore
|
|
1377
|
+
|
|
1367
1378
|
return google_analytics(
|
|
1368
1379
|
property_id=property_id[0],
|
|
1369
1380
|
start_date=start_date,
|
|
1370
|
-
|
|
1381
|
+
end_date=end_date,
|
|
1382
|
+
datetime_dimension=datetime,
|
|
1371
1383
|
queries=queries,
|
|
1372
1384
|
credentials=credentials,
|
|
1373
1385
|
).with_resources("basic_report")
|
|
@@ -1398,9 +1410,7 @@ class GitHubSource:
|
|
|
1398
1410
|
"repo variable is required to retrieve data for a specific repository from GitHub."
|
|
1399
1411
|
)
|
|
1400
1412
|
|
|
1401
|
-
access_token = source_fields.get("access_token", [
|
|
1402
|
-
if not access_token and table not in ["repo_events"]:
|
|
1403
|
-
raise ValueError("access_token is required to connect with GitHub")
|
|
1413
|
+
access_token = source_fields.get("access_token", [""])[0]
|
|
1404
1414
|
|
|
1405
1415
|
if table in ["issues", "pull_requests"]:
|
|
1406
1416
|
return github_reactions(
|
|
@@ -84,7 +84,10 @@ def incremental_stripe_source(
|
|
|
84
84
|
def incremental_resource(
|
|
85
85
|
endpoint: str,
|
|
86
86
|
created: Optional[Any] = dlt.sources.incremental(
|
|
87
|
-
"created",
|
|
87
|
+
"created",
|
|
88
|
+
initial_value=start_date_unix,
|
|
89
|
+
range_end="closed",
|
|
90
|
+
range_start="closed",
|
|
88
91
|
),
|
|
89
92
|
) -> Generator[Dict[Any, Any], Any, None]:
|
|
90
93
|
start_value = created.last_value
|
|
@@ -110,7 +110,12 @@ def tiktok_source(
|
|
|
110
110
|
)
|
|
111
111
|
def custom_reports(
|
|
112
112
|
datetime=(
|
|
113
|
-
dlt.sources.incremental(
|
|
113
|
+
dlt.sources.incremental(
|
|
114
|
+
incremental_loading_param,
|
|
115
|
+
start_date,
|
|
116
|
+
range_end="closed",
|
|
117
|
+
range_start="closed",
|
|
118
|
+
)
|
|
114
119
|
if is_incremental
|
|
115
120
|
else None
|
|
116
121
|
),
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.12.
|
|
1
|
+
__version__ = "0.12.6"
|
ingestr/src/zendesk/__init__.py
CHANGED
|
@@ -260,6 +260,8 @@ def zendesk_support(
|
|
|
260
260
|
initial_value=start_date_ts,
|
|
261
261
|
end_value=end_date_ts,
|
|
262
262
|
allow_external_schedulers=True,
|
|
263
|
+
range_end="closed",
|
|
264
|
+
range_start="closed",
|
|
263
265
|
),
|
|
264
266
|
) -> Iterator[TDataItem]:
|
|
265
267
|
# URL For ticket events
|
|
@@ -294,6 +296,8 @@ def zendesk_support(
|
|
|
294
296
|
initial_value=start_date_obj,
|
|
295
297
|
end_value=end_date_obj,
|
|
296
298
|
allow_external_schedulers=True,
|
|
299
|
+
range_end="closed",
|
|
300
|
+
range_start="closed",
|
|
297
301
|
),
|
|
298
302
|
) -> Iterator[TDataItem]:
|
|
299
303
|
"""
|
|
@@ -340,6 +344,8 @@ def zendesk_support(
|
|
|
340
344
|
initial_value=start_date_iso_str,
|
|
341
345
|
end_value=end_date_iso_str,
|
|
342
346
|
allow_external_schedulers=True,
|
|
347
|
+
range_end="closed",
|
|
348
|
+
range_start="closed",
|
|
343
349
|
),
|
|
344
350
|
) -> Iterator[TDataItem]:
|
|
345
351
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.6
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -26,6 +26,7 @@ Requires-Dist: google-api-python-client==2.130.0
|
|
|
26
26
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
27
27
|
Requires-Dist: mysql-connector-python==9.1.0
|
|
28
28
|
Requires-Dist: pendulum==3.0.0
|
|
29
|
+
Requires-Dist: psutil==6.1.1
|
|
29
30
|
Requires-Dist: psycopg2-binary==2.9.10
|
|
30
31
|
Requires-Dist: py-machineid==0.6.0
|
|
31
32
|
Requires-Dist: pyairtable==2.3.3
|
|
@@ -1,52 +1,51 @@
|
|
|
1
|
-
ingestr/main.py,sha256=
|
|
1
|
+
ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
|
|
4
4
|
ingestr/src/factory.py,sha256=aE7TjHzONb4DKYcfh_6-CJJfvs4lmw7iUySvSm4yQbM,4516
|
|
5
5
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
6
|
-
ingestr/src/sources.py,sha256=
|
|
6
|
+
ingestr/src/sources.py,sha256=GIskUoVL82x_mLerU9cgdixBNNhzBnDN-_MDraqK7hY,51166
|
|
7
7
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
8
8
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
9
|
-
ingestr/src/version.py,sha256=
|
|
9
|
+
ingestr/src/version.py,sha256=vb8hPdq1CrFlRl6aBYGOWE4MPv-N84JJm1f3KFvG8o4,23
|
|
10
10
|
ingestr/src/adjust/__init__.py,sha256=NaRNwDhItG8Q7vUHw7zQvyfWjmT32M0CSc5ufjmBM9U,3067
|
|
11
11
|
ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
|
|
12
12
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
13
13
|
ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
|
|
14
14
|
ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
|
|
15
15
|
ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0,2848
|
|
16
|
-
ingestr/src/asana_source/__init__.py,sha256=
|
|
16
|
+
ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
|
|
17
17
|
ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
|
|
18
18
|
ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
|
|
19
19
|
ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
|
|
20
20
|
ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
|
|
21
21
|
ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
|
|
22
22
|
ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
|
|
23
|
-
ingestr/src/facebook_ads/__init__.py,sha256=
|
|
23
|
+
ingestr/src/facebook_ads/__init__.py,sha256=reEpSr4BaKA1wO3qVgCH51gW-TgWkbJ_g24UIhJWbac,9286
|
|
24
24
|
ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
|
|
25
25
|
ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
|
|
26
26
|
ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_tl6UTFk,4881
|
|
27
|
-
ingestr/src/filesystem/__init__.py,sha256=
|
|
27
|
+
ingestr/src/filesystem/__init__.py,sha256=hcN_sO356ChTPyg72AufrikdkFBBIScTdxtGfDm-W0E,4221
|
|
28
28
|
ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
|
|
29
29
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
30
|
-
ingestr/src/github/__init__.py,sha256=
|
|
30
|
+
ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e4,5889
|
|
31
31
|
ingestr/src/github/helpers.py,sha256=Tmnik9811zBWNO6cJwV9PFQxEx2j32LHAQCvNbubsEI,6759
|
|
32
32
|
ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
|
|
33
33
|
ingestr/src/github/settings.py,sha256=N5ahWrDIQ_4IWV9i-hTXxyYduqY9Ym2BTwqsWxcDdJ8,258
|
|
34
|
-
ingestr/src/google_analytics/__init__.py,sha256=
|
|
35
|
-
ingestr/src/google_analytics/helpers
|
|
36
|
-
ingestr/src/google_analytics/helpers/data_processing.py,sha256=fIdEKr9CmZN_s1T2i9BL8IYTPPqNoK6Vaquq2y8StfE,6072
|
|
34
|
+
ingestr/src/google_analytics/__init__.py,sha256=8Evpmoy464YpNbCI_NmvFHIzWCu7J7SjJw-RrPZ6AL8,3674
|
|
35
|
+
ingestr/src/google_analytics/helpers.py,sha256=vLmFyQ_IEJEK5LlxBJQeJw0VHaE5gRRZdBa54U72CaQ,5965
|
|
37
36
|
ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
|
|
38
37
|
ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
|
|
39
38
|
ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
|
|
40
39
|
ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9kowZvQUCUxz0G6SlI,5404
|
|
41
40
|
ingestr/src/google_sheets/helpers/data_processing.py,sha256=WYO6z4XjGcG0Hat2J2enb-eLX5mSNVb2vaqRE83FBWU,11000
|
|
42
|
-
ingestr/src/gorgias/__init__.py,sha256=
|
|
41
|
+
ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
|
|
43
42
|
ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
|
|
44
|
-
ingestr/src/hubspot/__init__.py,sha256=
|
|
43
|
+
ingestr/src/hubspot/__init__.py,sha256=DXvn1yGToFUKk-1mMqqoN0OCLNpD16-2mPyEmkhyoVY,9876
|
|
45
44
|
ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KFE,6727
|
|
46
45
|
ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
|
|
47
46
|
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
48
47
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
49
|
-
ingestr/src/klaviyo/_init_.py,sha256=
|
|
48
|
+
ingestr/src/klaviyo/_init_.py,sha256=ucWHqBe8DQvXVpbmxKFAV5ljpCFb4ps_2QTD0OSiWxY,7905
|
|
50
49
|
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
51
50
|
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
52
51
|
ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
|
|
@@ -56,23 +55,23 @@ ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMO
|
|
|
56
55
|
ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
56
|
ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falPrnKg_y4,5500
|
|
58
57
|
ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
|
|
59
|
-
ingestr/src/shopify/__init__.py,sha256=
|
|
58
|
+
ingestr/src/shopify/__init__.py,sha256=PF_6VQnS065Br1UzSIekTVXBu3WtrMQL_v5CfbfaX5Y,63151
|
|
60
59
|
ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
|
|
61
60
|
ingestr/src/shopify/helpers.py,sha256=NfHD6lWXe88ybR0ri-FCQuh2Vf8l5WG0a0FVjmdoSC4,6296
|
|
62
61
|
ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSsDk,141
|
|
63
|
-
ingestr/src/slack/__init__.py,sha256=
|
|
62
|
+
ingestr/src/slack/__init__.py,sha256=pyDukxcilqTAe_bBzfWJ8Vxi83S-XEdEFBH2pEgILrM,10113
|
|
64
63
|
ingestr/src/slack/helpers.py,sha256=08TLK7vhFvH_uekdLVOLF3bTDe1zgH0QxHObXHzk1a8,6545
|
|
65
64
|
ingestr/src/slack/settings.py,sha256=NhKn4y1zokEa5EmIZ05wtj_-I0GOASXZ5V81M1zXCtY,457
|
|
66
65
|
ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
66
|
ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG4kLd7tMU,2002
|
|
68
|
-
ingestr/src/stripe_analytics/__init__.py,sha256=
|
|
67
|
+
ingestr/src/stripe_analytics/__init__.py,sha256=0HCL0qsrh_si1RR3a4k9XS94VWQ4v9aG7CqXF-V-57M,4593
|
|
69
68
|
ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
|
|
70
69
|
ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
|
|
71
70
|
ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
|
|
72
71
|
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
73
|
-
ingestr/src/tiktok_ads/__init__.py,sha256=
|
|
72
|
+
ingestr/src/tiktok_ads/__init__.py,sha256=aEqCl3dTH6_d43s1jgAeG1UasEls_SlorORulYMwIL8,4590
|
|
74
73
|
ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=cfdPflCeR_mCk5fxq0v4d7pzlvZDiAoz8bWQJYqKALM,3935
|
|
75
|
-
ingestr/src/zendesk/__init__.py,sha256=
|
|
74
|
+
ingestr/src/zendesk/__init__.py,sha256=tmJ_jdb6kpwmEKpcv6Im71-bOZI6h-Tcofe18OH4I24,17762
|
|
76
75
|
ingestr/src/zendesk/settings.py,sha256=Vdj706nTJFQ-3KH4nO97iYCQuba3dV3E9gfnmLK6xwU,2294
|
|
77
76
|
ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1Ydq5FDH_0c,888
|
|
78
77
|
ingestr/src/zendesk/helpers/api_helpers.py,sha256=dMkNn4ZQXgJTDOXAAXdmRt41phNFoRhYyPaLJih0pZY,4184
|
|
@@ -86,8 +85,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
86
85
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
87
86
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
88
87
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
89
|
-
ingestr-0.12.
|
|
90
|
-
ingestr-0.12.
|
|
91
|
-
ingestr-0.12.
|
|
92
|
-
ingestr-0.12.
|
|
93
|
-
ingestr-0.12.
|
|
88
|
+
ingestr-0.12.6.dist-info/METADATA,sha256=y-o_BL8nj7pVQU3sSaz9UJ9XsNVUi8Rjf5G0vNGi6io,7985
|
|
89
|
+
ingestr-0.12.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
90
|
+
ingestr-0.12.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
91
|
+
ingestr-0.12.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
92
|
+
ingestr-0.12.6.dist-info/RECORD,,
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
"""Google analytics source helpers"""
|
|
2
|
-
|
|
3
|
-
from typing import Iterator, List
|
|
4
|
-
|
|
5
|
-
import dlt
|
|
6
|
-
from apiclient.discovery import Resource # type: ignore
|
|
7
|
-
from dlt.common import logger, pendulum
|
|
8
|
-
from dlt.common.typing import TDataItem
|
|
9
|
-
from google.analytics.data_v1beta.types import (
|
|
10
|
-
Dimension,
|
|
11
|
-
Metric,
|
|
12
|
-
)
|
|
13
|
-
from pendulum.datetime import DateTime
|
|
14
|
-
|
|
15
|
-
from .data_processing import get_report
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def basic_report(
|
|
19
|
-
client: Resource,
|
|
20
|
-
rows_per_page: int,
|
|
21
|
-
dimensions: List[str],
|
|
22
|
-
metrics: List[str],
|
|
23
|
-
property_id: int,
|
|
24
|
-
resource_name: str,
|
|
25
|
-
start_date: str,
|
|
26
|
-
last_date: dlt.sources.incremental[DateTime],
|
|
27
|
-
) -> Iterator[TDataItem]:
|
|
28
|
-
"""
|
|
29
|
-
Retrieves the data for a report given dimensions, metrics, and filters required for the report.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
client: The Google Analytics client used to make requests.
|
|
33
|
-
dimensions: Dimensions for the report. See metadata for the full list of dimensions.
|
|
34
|
-
metrics: Metrics for the report. See metadata for the full list of metrics.
|
|
35
|
-
property_id: A reference to the Google Analytics project.
|
|
36
|
-
More info: https://developers.google.com/analytics/devguides/reporting/data/v1/property-id
|
|
37
|
-
rows_per_page: Controls how many rows are retrieved per page in the reports.
|
|
38
|
-
Default is 10000, maximum possible is 100000.
|
|
39
|
-
resource_name: The resource name used to save incremental into dlt state.
|
|
40
|
-
start_date: Incremental load start_date.
|
|
41
|
-
Default is taken from dlt state if it exists.
|
|
42
|
-
last_date: Incremental load end date.
|
|
43
|
-
Default is taken from dlt state if it exists.
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
Generator of all rows of data in the report.
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
# grab the start time from last dlt load if not filled, if that is also empty then use the first day of the millennium as the start time instead
|
|
50
|
-
if last_date.last_value:
|
|
51
|
-
if start_date != "2015-08-14":
|
|
52
|
-
logger.warning(
|
|
53
|
-
f"Using the starting date: {last_date.last_value} for incremental report: {resource_name} and ignoring start date passed as argument {start_date}"
|
|
54
|
-
)
|
|
55
|
-
start_date = last_date.last_value.to_date_string()
|
|
56
|
-
else:
|
|
57
|
-
start_date = start_date or "2015-08-14"
|
|
58
|
-
|
|
59
|
-
processed_response = get_report(
|
|
60
|
-
client=client,
|
|
61
|
-
property_id=property_id,
|
|
62
|
-
# fill dimensions and metrics with the proper api client objects
|
|
63
|
-
dimension_list=[Dimension(name=dimension) for dimension in dimensions],
|
|
64
|
-
metric_list=[Metric(name=metric) for metric in metrics],
|
|
65
|
-
limit=rows_per_page,
|
|
66
|
-
start_date=start_date,
|
|
67
|
-
# configure end_date to yesterday as a date string
|
|
68
|
-
end_date=pendulum.now().to_date_string(),
|
|
69
|
-
)
|
|
70
|
-
yield from processed_response
|
|
File without changes
|
|
File without changes
|
|
File without changes
|