ingestr 0.12.5__py3-none-any.whl → 0.12.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +1 -1
- ingestr/src/appstore/__init__.py +137 -0
- ingestr/src/appstore/client.py +126 -0
- ingestr/src/appstore/errors.py +15 -0
- ingestr/src/appstore/models.py +117 -0
- ingestr/src/appstore/resources.py +179 -0
- ingestr/src/asana_source/__init__.py +4 -1
- ingestr/src/errors.py +10 -0
- ingestr/src/facebook_ads/__init__.py +4 -1
- ingestr/src/factory.py +2 -0
- ingestr/src/filesystem/__init__.py +3 -1
- ingestr/src/github/__init__.py +7 -3
- ingestr/src/google_analytics/__init__.py +57 -21
- ingestr/src/google_analytics/{helpers/data_processing.py → helpers.py} +29 -33
- ingestr/src/gorgias/__init__.py +12 -4
- ingestr/src/hubspot/__init__.py +8 -1
- ingestr/src/klaviyo/_init_.py +78 -13
- ingestr/src/shopify/__init__.py +14 -0
- ingestr/src/slack/__init__.py +4 -0
- ingestr/src/sources.py +99 -10
- ingestr/src/stripe_analytics/__init__.py +4 -1
- ingestr/src/tiktok_ads/__init__.py +6 -1
- ingestr/src/version.py +1 -1
- ingestr/src/zendesk/__init__.py +6 -0
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/METADATA +3 -1
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/RECORD +29 -24
- ingestr/src/google_analytics/helpers/__init__.py +0 -70
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/WHEEL +0 -0
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.5.dist-info → ingestr-0.12.7.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -444,7 +444,7 @@ def ingest(
|
|
|
444
444
|
|
|
445
445
|
progressInstance: Collector = SpinnerCollector()
|
|
446
446
|
if progress == Progress.log:
|
|
447
|
-
progressInstance = LogCollector(
|
|
447
|
+
progressInstance = LogCollector()
|
|
448
448
|
|
|
449
449
|
is_pipelines_dir_temp = False
|
|
450
450
|
if pipelines_dir is None:
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import gzip
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Iterable, List, Optional
|
|
8
|
+
|
|
9
|
+
import dlt
|
|
10
|
+
import requests
|
|
11
|
+
from dlt.common.typing import TDataItem
|
|
12
|
+
from dlt.sources import DltResource
|
|
13
|
+
|
|
14
|
+
from .client import AppStoreConnectClientInterface
|
|
15
|
+
from .errors import (
|
|
16
|
+
NoOngoingReportRequestsFoundError,
|
|
17
|
+
NoReportsFoundError,
|
|
18
|
+
NoSuchReportError,
|
|
19
|
+
)
|
|
20
|
+
from .models import AnalyticsReportInstancesResponse
|
|
21
|
+
from .resources import RESOURCES
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dlt.source
|
|
25
|
+
def app_store(
|
|
26
|
+
client: AppStoreConnectClientInterface,
|
|
27
|
+
app_ids: List[str],
|
|
28
|
+
start_date: Optional[datetime] = None,
|
|
29
|
+
end_date: Optional[datetime] = None,
|
|
30
|
+
) -> Iterable[DltResource]:
|
|
31
|
+
for resource in RESOURCES:
|
|
32
|
+
yield dlt.resource(
|
|
33
|
+
get_analytics_reports,
|
|
34
|
+
name=resource.name,
|
|
35
|
+
primary_key=resource.primary_key,
|
|
36
|
+
columns=resource.columns,
|
|
37
|
+
)(client, app_ids, resource.report_name, start_date, end_date)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def filter_instances_by_date(
|
|
41
|
+
instances: AnalyticsReportInstancesResponse,
|
|
42
|
+
start_date: Optional[datetime],
|
|
43
|
+
end_date: Optional[datetime],
|
|
44
|
+
) -> AnalyticsReportInstancesResponse:
|
|
45
|
+
instances = deepcopy(instances)
|
|
46
|
+
if start_date is not None:
|
|
47
|
+
instances.data = list(
|
|
48
|
+
filter(
|
|
49
|
+
lambda x: datetime.fromisoformat(x.attributes.processingDate)
|
|
50
|
+
>= start_date,
|
|
51
|
+
instances.data,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
if end_date is not None:
|
|
55
|
+
instances.data = list(
|
|
56
|
+
filter(
|
|
57
|
+
lambda x: datetime.fromisoformat(x.attributes.processingDate)
|
|
58
|
+
<= end_date,
|
|
59
|
+
instances.data,
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return instances
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_analytics_reports(
|
|
67
|
+
client: AppStoreConnectClientInterface,
|
|
68
|
+
app_ids: List[str],
|
|
69
|
+
report_name: str,
|
|
70
|
+
start_date: Optional[datetime],
|
|
71
|
+
end_date: Optional[datetime],
|
|
72
|
+
last_processing_date=dlt.sources.incremental("processing_date"),
|
|
73
|
+
) -> Iterable[TDataItem]:
|
|
74
|
+
if last_processing_date.last_value:
|
|
75
|
+
start_date = datetime.fromisoformat(last_processing_date.last_value)
|
|
76
|
+
for app_id in app_ids:
|
|
77
|
+
yield from get_report(client, app_id, report_name, start_date, end_date)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_report(
|
|
81
|
+
client: AppStoreConnectClientInterface,
|
|
82
|
+
app_id: str,
|
|
83
|
+
report_name: str,
|
|
84
|
+
start_date: Optional[datetime],
|
|
85
|
+
end_date: Optional[datetime],
|
|
86
|
+
) -> Iterable[TDataItem]:
|
|
87
|
+
report_requests = client.list_analytics_report_requests(app_id)
|
|
88
|
+
ongoing_requests = list(
|
|
89
|
+
filter(
|
|
90
|
+
lambda x: x.attributes.accessType == "ONGOING"
|
|
91
|
+
and not x.attributes.stoppedDueToInactivity,
|
|
92
|
+
report_requests.data,
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if len(ongoing_requests) == 0:
|
|
97
|
+
raise NoOngoingReportRequestsFoundError()
|
|
98
|
+
|
|
99
|
+
reports = client.list_analytics_reports(ongoing_requests[0].id, report_name)
|
|
100
|
+
if len(reports.data) == 0:
|
|
101
|
+
raise NoSuchReportError(report_name)
|
|
102
|
+
|
|
103
|
+
for report in reports.data:
|
|
104
|
+
instances = client.list_report_instances(report.id)
|
|
105
|
+
|
|
106
|
+
instances = filter_instances_by_date(instances, start_date, end_date)
|
|
107
|
+
|
|
108
|
+
if len(instances.data) == 0:
|
|
109
|
+
raise NoReportsFoundError()
|
|
110
|
+
|
|
111
|
+
for instance in instances.data:
|
|
112
|
+
segments = client.list_report_segments(instance.id)
|
|
113
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
114
|
+
files = []
|
|
115
|
+
for segment in segments.data:
|
|
116
|
+
payload = requests.get(segment.attributes.url, stream=True)
|
|
117
|
+
payload.raise_for_status()
|
|
118
|
+
|
|
119
|
+
csv_path = os.path.join(
|
|
120
|
+
temp_dir, f"{segment.attributes.checksum}.csv"
|
|
121
|
+
)
|
|
122
|
+
with open(csv_path, "wb") as f:
|
|
123
|
+
for chunk in payload.iter_content(chunk_size=8192):
|
|
124
|
+
f.write(chunk)
|
|
125
|
+
files.append(csv_path)
|
|
126
|
+
for file in files:
|
|
127
|
+
with gzip.open(file, "rt") as f:
|
|
128
|
+
# TODO: infer delimiter from the file itself
|
|
129
|
+
delimiter = (
|
|
130
|
+
"," if report_name == "App Crashes Expanded" else "\t"
|
|
131
|
+
)
|
|
132
|
+
reader = csv.DictReader(f, delimiter=delimiter)
|
|
133
|
+
for row in reader:
|
|
134
|
+
yield {
|
|
135
|
+
"processing_date": instance.attributes.processingDate,
|
|
136
|
+
**row,
|
|
137
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import time
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import jwt
|
|
6
|
+
import requests
|
|
7
|
+
from requests.models import PreparedRequest
|
|
8
|
+
|
|
9
|
+
from .models import (
|
|
10
|
+
AnalyticsReportInstancesResponse,
|
|
11
|
+
AnalyticsReportRequestsResponse,
|
|
12
|
+
AnalyticsReportResponse,
|
|
13
|
+
AnalyticsReportSegmentsResponse,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AppStoreConnectClientInterface(abc.ABC):
|
|
18
|
+
@abc.abstractmethod
|
|
19
|
+
def list_analytics_report_requests(self, app_id) -> AnalyticsReportRequestsResponse:
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@abc.abstractmethod
|
|
23
|
+
def list_analytics_reports(
|
|
24
|
+
self, req_id: str, report_name: str
|
|
25
|
+
) -> AnalyticsReportResponse:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@abc.abstractmethod
|
|
29
|
+
def list_report_instances(
|
|
30
|
+
self,
|
|
31
|
+
report_id: str,
|
|
32
|
+
granularity: str = "DAILY",
|
|
33
|
+
) -> AnalyticsReportInstancesResponse:
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
@abc.abstractmethod
|
|
37
|
+
def list_report_segments(self, instance_id: str) -> AnalyticsReportSegmentsResponse:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AppStoreConnectClient(AppStoreConnectClientInterface):
|
|
42
|
+
def __init__(self, key: bytes, key_id: str, issuer_id: str):
|
|
43
|
+
self.__key = key
|
|
44
|
+
self.__key_id = key_id
|
|
45
|
+
self.__issuer_id = issuer_id
|
|
46
|
+
|
|
47
|
+
def list_analytics_report_requests(self, app_id) -> AnalyticsReportRequestsResponse:
|
|
48
|
+
res = requests.get(
|
|
49
|
+
f"https://api.appstoreconnect.apple.com/v1/apps/{app_id}/analyticsReportRequests",
|
|
50
|
+
auth=self.auth,
|
|
51
|
+
)
|
|
52
|
+
res.raise_for_status()
|
|
53
|
+
|
|
54
|
+
return AnalyticsReportRequestsResponse.from_json(res.text) # type: ignore
|
|
55
|
+
|
|
56
|
+
def list_analytics_reports(
|
|
57
|
+
self, req_id: str, report_name: str
|
|
58
|
+
) -> AnalyticsReportResponse:
|
|
59
|
+
params = {"filter[name]": report_name}
|
|
60
|
+
res = requests.get(
|
|
61
|
+
f"https://api.appstoreconnect.apple.com/v1/analyticsReportRequests/{req_id}/reports",
|
|
62
|
+
auth=self.auth,
|
|
63
|
+
params=params,
|
|
64
|
+
)
|
|
65
|
+
res.raise_for_status()
|
|
66
|
+
return AnalyticsReportResponse.from_json(res.text) # type: ignore
|
|
67
|
+
|
|
68
|
+
def list_report_instances(
|
|
69
|
+
self,
|
|
70
|
+
report_id: str,
|
|
71
|
+
granularity: str = "DAILY",
|
|
72
|
+
) -> AnalyticsReportInstancesResponse:
|
|
73
|
+
data = []
|
|
74
|
+
url = f"https://api.appstoreconnect.apple.com/v1/analyticsReports/{report_id}/instances"
|
|
75
|
+
params: Optional[dict] = {"filter[granularity]": granularity}
|
|
76
|
+
|
|
77
|
+
while url:
|
|
78
|
+
res = requests.get(url, auth=self.auth, params=params)
|
|
79
|
+
res.raise_for_status()
|
|
80
|
+
|
|
81
|
+
response_data = AnalyticsReportInstancesResponse.from_json(res.text) # type: ignore
|
|
82
|
+
data.extend(response_data.data)
|
|
83
|
+
|
|
84
|
+
url = response_data.links.next
|
|
85
|
+
params = None # Clear params for subsequent requests
|
|
86
|
+
|
|
87
|
+
return AnalyticsReportInstancesResponse(
|
|
88
|
+
data=data,
|
|
89
|
+
links=response_data.links,
|
|
90
|
+
meta=response_data.meta,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def list_report_segments(self, instance_id: str) -> AnalyticsReportSegmentsResponse:
|
|
94
|
+
segments = []
|
|
95
|
+
url = f"https://api.appstoreconnect.apple.com/v1/analyticsReportInstances/{instance_id}/segments"
|
|
96
|
+
|
|
97
|
+
while url:
|
|
98
|
+
res = requests.get(url, auth=self.auth)
|
|
99
|
+
res.raise_for_status()
|
|
100
|
+
|
|
101
|
+
response_data = AnalyticsReportSegmentsResponse.from_json(res.text) # type: ignore
|
|
102
|
+
segments.extend(response_data.data)
|
|
103
|
+
|
|
104
|
+
url = response_data.links.next
|
|
105
|
+
|
|
106
|
+
return AnalyticsReportSegmentsResponse(
|
|
107
|
+
data=segments, links=response_data.links, meta=response_data.meta
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def auth(self, req: PreparedRequest) -> PreparedRequest:
|
|
111
|
+
headers = {
|
|
112
|
+
"alg": "ES256",
|
|
113
|
+
"kid": self.__key_id,
|
|
114
|
+
}
|
|
115
|
+
payload = {
|
|
116
|
+
"iss": self.__issuer_id,
|
|
117
|
+
"exp": int(time.time()) + 600,
|
|
118
|
+
"aud": "appstoreconnect-v1",
|
|
119
|
+
}
|
|
120
|
+
req.headers["Authorization"] = jwt.encode(
|
|
121
|
+
payload,
|
|
122
|
+
self.__key,
|
|
123
|
+
algorithm="ES256",
|
|
124
|
+
headers=headers,
|
|
125
|
+
)
|
|
126
|
+
return req
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
class NoReportsFoundError(Exception):
|
|
2
|
+
def __init__(self):
|
|
3
|
+
super().__init__("No Report instances found for the given date range")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class NoOngoingReportRequestsFoundError(Exception):
|
|
7
|
+
def __init__(self):
|
|
8
|
+
super().__init__(
|
|
9
|
+
"No ONGOING report requests found (or they're stopped due to inactivity)"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NoSuchReportError(Exception):
|
|
14
|
+
def __init__(self, report_name):
|
|
15
|
+
super().__init__(f"No such report found: {report_name}")
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from dataclasses_json import dataclass_json
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass_json
|
|
8
|
+
@dataclass
|
|
9
|
+
class Links:
|
|
10
|
+
self: str
|
|
11
|
+
next: Optional[str] = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass_json
|
|
15
|
+
@dataclass
|
|
16
|
+
class ReportRequestAttributes:
|
|
17
|
+
accessType: str
|
|
18
|
+
stoppedDueToInactivity: bool
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass_json
|
|
22
|
+
@dataclass
|
|
23
|
+
class ReportAttributes:
|
|
24
|
+
name: str
|
|
25
|
+
category: str
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass_json
|
|
29
|
+
@dataclass
|
|
30
|
+
class ReportInstanceAttributes:
|
|
31
|
+
granularity: str
|
|
32
|
+
processingDate: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass_json
|
|
36
|
+
@dataclass
|
|
37
|
+
class ReportSegmentAttributes:
|
|
38
|
+
checksum: str
|
|
39
|
+
url: str
|
|
40
|
+
sizeInBytes: int
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass_json
|
|
44
|
+
@dataclass
|
|
45
|
+
class ReportRequest:
|
|
46
|
+
type: str
|
|
47
|
+
id: str
|
|
48
|
+
attributes: ReportRequestAttributes
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass_json
|
|
52
|
+
@dataclass
|
|
53
|
+
class Report:
|
|
54
|
+
type: str
|
|
55
|
+
id: str
|
|
56
|
+
attributes: ReportAttributes
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass_json
|
|
60
|
+
@dataclass
|
|
61
|
+
class ReportInstance:
|
|
62
|
+
type: str
|
|
63
|
+
id: str
|
|
64
|
+
attributes: ReportInstanceAttributes
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass_json
|
|
68
|
+
@dataclass
|
|
69
|
+
class ReportSegment:
|
|
70
|
+
type: str
|
|
71
|
+
id: str
|
|
72
|
+
attributes: ReportSegmentAttributes
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass_json
|
|
76
|
+
@dataclass
|
|
77
|
+
class PagingMeta:
|
|
78
|
+
total: int
|
|
79
|
+
limit: int
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass_json
|
|
83
|
+
@dataclass
|
|
84
|
+
class Meta:
|
|
85
|
+
paging: PagingMeta
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass_json
|
|
89
|
+
@dataclass
|
|
90
|
+
class AnalyticsReportRequestsResponse:
|
|
91
|
+
data: List[ReportRequest]
|
|
92
|
+
meta: Meta
|
|
93
|
+
links: Links
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass_json
|
|
97
|
+
@dataclass
|
|
98
|
+
class AnalyticsReportResponse:
|
|
99
|
+
data: List[Report]
|
|
100
|
+
meta: Meta
|
|
101
|
+
links: Links
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass_json
|
|
105
|
+
@dataclass
|
|
106
|
+
class AnalyticsReportInstancesResponse:
|
|
107
|
+
data: List[ReportInstance]
|
|
108
|
+
meta: Meta
|
|
109
|
+
links: Links
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass_json
|
|
113
|
+
@dataclass
|
|
114
|
+
class AnalyticsReportSegmentsResponse:
|
|
115
|
+
data: List[ReportSegment]
|
|
116
|
+
meta: Meta
|
|
117
|
+
links: Links
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class ResourceConfig:
|
|
7
|
+
name: str
|
|
8
|
+
primary_key: List[str]
|
|
9
|
+
columns: dict
|
|
10
|
+
report_name: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
RESOURCES: List[ResourceConfig] = [
|
|
14
|
+
ResourceConfig(
|
|
15
|
+
name="app-downloads-detailed",
|
|
16
|
+
primary_key=[
|
|
17
|
+
"App Apple Identifier",
|
|
18
|
+
"App Name",
|
|
19
|
+
"App Version",
|
|
20
|
+
"Campaign",
|
|
21
|
+
"Date",
|
|
22
|
+
"Device",
|
|
23
|
+
"Download Type",
|
|
24
|
+
"Page Title",
|
|
25
|
+
"Page Type",
|
|
26
|
+
"Platform Version",
|
|
27
|
+
"Pre-Order",
|
|
28
|
+
"Source Info",
|
|
29
|
+
"Source Type",
|
|
30
|
+
"Territory",
|
|
31
|
+
],
|
|
32
|
+
columns={
|
|
33
|
+
"Date": {"data_type": "date"},
|
|
34
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
35
|
+
"Counts": {"data_type": "bigint"},
|
|
36
|
+
"processing_date": {"data_type": "date"},
|
|
37
|
+
},
|
|
38
|
+
report_name="App Downloads Detailed",
|
|
39
|
+
),
|
|
40
|
+
ResourceConfig(
|
|
41
|
+
name="app-store-discovery-and-engagement-detailed",
|
|
42
|
+
primary_key=[
|
|
43
|
+
"App Apple Identifier",
|
|
44
|
+
"App Name",
|
|
45
|
+
"Campaign",
|
|
46
|
+
"Date",
|
|
47
|
+
"Device",
|
|
48
|
+
"Engagement Type",
|
|
49
|
+
"Event",
|
|
50
|
+
"Page Title",
|
|
51
|
+
"Page Type",
|
|
52
|
+
"Platform Version",
|
|
53
|
+
"Source Info",
|
|
54
|
+
"Source Type",
|
|
55
|
+
"Territory",
|
|
56
|
+
],
|
|
57
|
+
columns={
|
|
58
|
+
"Date": {"data_type": "date"},
|
|
59
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
60
|
+
"Counts": {"data_type": "bigint"},
|
|
61
|
+
"Unique Counts": {"data_type": "bigint"},
|
|
62
|
+
"processing_date": {"data_type": "date"},
|
|
63
|
+
},
|
|
64
|
+
report_name="App Store Discovery and Engagement Detailed",
|
|
65
|
+
),
|
|
66
|
+
ResourceConfig(
|
|
67
|
+
name="app-sessions-detailed",
|
|
68
|
+
primary_key=[
|
|
69
|
+
"Date",
|
|
70
|
+
"App Name",
|
|
71
|
+
"App Apple Identifier",
|
|
72
|
+
"App Version",
|
|
73
|
+
"Device",
|
|
74
|
+
"Platform Version",
|
|
75
|
+
"Source Type",
|
|
76
|
+
"Source Info",
|
|
77
|
+
"Campaign",
|
|
78
|
+
"Page Type",
|
|
79
|
+
"Page Title",
|
|
80
|
+
"App Download Date",
|
|
81
|
+
"Territory",
|
|
82
|
+
],
|
|
83
|
+
columns={
|
|
84
|
+
"Date": {"data_type": "date"},
|
|
85
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
86
|
+
"Sessions": {"data_type": "bigint"},
|
|
87
|
+
"Total Session Duration": {"data_type": "bigint"},
|
|
88
|
+
"Unique Devices": {"data_type": "bigint"},
|
|
89
|
+
"processing_date": {"data_type": "date"},
|
|
90
|
+
},
|
|
91
|
+
report_name="App Sessions Detailed",
|
|
92
|
+
),
|
|
93
|
+
ResourceConfig(
|
|
94
|
+
name="app-store-installation-and-deletion-detailed",
|
|
95
|
+
primary_key=[
|
|
96
|
+
"App Apple Identifier",
|
|
97
|
+
"App Download Date",
|
|
98
|
+
"App Name",
|
|
99
|
+
"App Version",
|
|
100
|
+
"Campaign",
|
|
101
|
+
"Counts",
|
|
102
|
+
"Date",
|
|
103
|
+
"Device",
|
|
104
|
+
"Download Type",
|
|
105
|
+
"Event",
|
|
106
|
+
"Page Title",
|
|
107
|
+
"Page Type",
|
|
108
|
+
"Platform Version",
|
|
109
|
+
"Source Info",
|
|
110
|
+
"Source Type",
|
|
111
|
+
"Territory",
|
|
112
|
+
"Unique Devices",
|
|
113
|
+
],
|
|
114
|
+
columns={
|
|
115
|
+
"Date": {"data_type": "date"},
|
|
116
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
117
|
+
"Counts": {"data_type": "bigint"},
|
|
118
|
+
"Unique Devices": {"data_type": "bigint"},
|
|
119
|
+
"App Download Date": {"data_type": "date"},
|
|
120
|
+
"processing_date": {"data_type": "date"},
|
|
121
|
+
},
|
|
122
|
+
report_name="App Store Installation and Deletion Detailed",
|
|
123
|
+
),
|
|
124
|
+
ResourceConfig(
|
|
125
|
+
name="app-store-purchases-detailed",
|
|
126
|
+
primary_key=[
|
|
127
|
+
"App Apple Identifier",
|
|
128
|
+
"App Download Date",
|
|
129
|
+
"App Name",
|
|
130
|
+
"Campaign",
|
|
131
|
+
"Content Apple Identifier",
|
|
132
|
+
"Content Name",
|
|
133
|
+
"Date",
|
|
134
|
+
"Device",
|
|
135
|
+
"Page Title",
|
|
136
|
+
"Page Type",
|
|
137
|
+
"Payment Method",
|
|
138
|
+
"Platform Version",
|
|
139
|
+
"Pre-Order",
|
|
140
|
+
"Purchase Type",
|
|
141
|
+
"Source Info",
|
|
142
|
+
"Source Type",
|
|
143
|
+
"Territory",
|
|
144
|
+
],
|
|
145
|
+
columns={
|
|
146
|
+
"Date": {"data_type": "date"},
|
|
147
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
148
|
+
"App Download Date": {"data_type": "date"},
|
|
149
|
+
"Content Apple Identifier": {"data_type": "bigint"},
|
|
150
|
+
"Purchases": {"data_type": "bigint"},
|
|
151
|
+
"Proceeds In USD": {"data_type": "double"},
|
|
152
|
+
"Sales In USD": {"data_type": "double"},
|
|
153
|
+
"Paying Users": {"data_type": "bigint"},
|
|
154
|
+
"processing_date": {"data_type": "date"},
|
|
155
|
+
},
|
|
156
|
+
report_name="App Store Purchases Detailed",
|
|
157
|
+
),
|
|
158
|
+
ResourceConfig(
|
|
159
|
+
name="app-crashes-expanded",
|
|
160
|
+
primary_key=[
|
|
161
|
+
"App Name",
|
|
162
|
+
"App Version",
|
|
163
|
+
"Build",
|
|
164
|
+
"Date",
|
|
165
|
+
"Device",
|
|
166
|
+
"Platform",
|
|
167
|
+
"Release Type",
|
|
168
|
+
"Territory",
|
|
169
|
+
],
|
|
170
|
+
columns={
|
|
171
|
+
"Date": {"data_type": "date"},
|
|
172
|
+
"processing_date": {"data_type": "date"},
|
|
173
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
174
|
+
"Count": {"data_type": "bigint"},
|
|
175
|
+
"Unique Devices": {"data_type": "bigint"},
|
|
176
|
+
},
|
|
177
|
+
report_name="App Crashes Expanded",
|
|
178
|
+
),
|
|
179
|
+
]
|
|
@@ -150,7 +150,10 @@ def tasks(
|
|
|
150
150
|
project_array: t.List[TDataItem],
|
|
151
151
|
access_token: str = dlt.secrets.value,
|
|
152
152
|
modified_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
153
|
-
"modified_at",
|
|
153
|
+
"modified_at",
|
|
154
|
+
initial_value=DEFAULT_START_DATE,
|
|
155
|
+
range_end="closed",
|
|
156
|
+
range_start="closed",
|
|
154
157
|
),
|
|
155
158
|
fields: Iterable[str] = TASK_FIELDS,
|
|
156
159
|
) -> Iterable[TDataItem]:
|
ingestr/src/errors.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
class MissingValueError(Exception):
|
|
2
|
+
def __init__(self, value, source):
|
|
3
|
+
super().__init__(f"{value} is required to connect to {source}")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class UnsupportedResourceError(Exception):
|
|
7
|
+
def __init__(self, resource, source):
|
|
8
|
+
super().__init__(
|
|
9
|
+
f"Resource '{resource}' is not supported for {source} source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
10
|
+
)
|
|
@@ -159,7 +159,10 @@ def facebook_insights_source(
|
|
|
159
159
|
)
|
|
160
160
|
def facebook_insights(
|
|
161
161
|
date_start: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
162
|
-
"date_start",
|
|
162
|
+
"date_start",
|
|
163
|
+
initial_value=initial_load_start_date_str,
|
|
164
|
+
range_end="closed",
|
|
165
|
+
range_start="closed",
|
|
163
166
|
),
|
|
164
167
|
) -> Iterator[TDataItems]:
|
|
165
168
|
start_date = get_start_date(date_start, attribution_window_days_lag)
|
ingestr/src/factory.py
CHANGED
|
@@ -18,6 +18,7 @@ from ingestr.src.destinations import (
|
|
|
18
18
|
from ingestr.src.sources import (
|
|
19
19
|
AdjustSource,
|
|
20
20
|
AirtableSource,
|
|
21
|
+
AppleAppStoreSource,
|
|
21
22
|
AppsflyerSource,
|
|
22
23
|
ArrowMemoryMappedSource,
|
|
23
24
|
AsanaSource,
|
|
@@ -122,6 +123,7 @@ class SourceDestinationFactory:
|
|
|
122
123
|
"asana": AsanaSource,
|
|
123
124
|
"tiktok": TikTokSource,
|
|
124
125
|
"googleanalytics": GoogleAnalyticsSource,
|
|
126
|
+
"appstore": AppleAppStoreSource,
|
|
125
127
|
}
|
|
126
128
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
127
129
|
"bigquery": BigQueryDestination,
|
|
@@ -38,7 +38,9 @@ def readers(
|
|
|
38
38
|
"""
|
|
39
39
|
filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
|
|
40
40
|
filesystem_resource.apply_hints(
|
|
41
|
-
incremental=dlt.sources.incremental("modification_date")
|
|
41
|
+
incremental=dlt.sources.incremental("modification_date"),
|
|
42
|
+
range_end="closed",
|
|
43
|
+
range_start="closed",
|
|
42
44
|
)
|
|
43
45
|
return (
|
|
44
46
|
filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
|
ingestr/src/github/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@ from .helpers import get_reactions_data, get_rest_pages, get_stargazers
|
|
|
14
14
|
def github_reactions(
|
|
15
15
|
owner: str,
|
|
16
16
|
name: str,
|
|
17
|
-
access_token: str
|
|
17
|
+
access_token: str,
|
|
18
18
|
items_per_page: int = 100,
|
|
19
19
|
max_items: Optional[int] = None,
|
|
20
20
|
) -> Sequence[DltResource]:
|
|
@@ -89,7 +89,11 @@ def github_repo_events(
|
|
|
89
89
|
@dlt.resource(primary_key="id", table_name=lambda i: i["type"])
|
|
90
90
|
def repo_events(
|
|
91
91
|
last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
92
|
-
"created_at",
|
|
92
|
+
"created_at",
|
|
93
|
+
initial_value="1970-01-01T00:00:00Z",
|
|
94
|
+
last_value_func=max,
|
|
95
|
+
range_end="closed",
|
|
96
|
+
range_start="closed",
|
|
93
97
|
),
|
|
94
98
|
) -> Iterator[TDataItems]:
|
|
95
99
|
repos_path = (
|
|
@@ -114,7 +118,7 @@ def github_repo_events(
|
|
|
114
118
|
def github_stargazers(
|
|
115
119
|
owner: str,
|
|
116
120
|
name: str,
|
|
117
|
-
access_token: str
|
|
121
|
+
access_token: str,
|
|
118
122
|
items_per_page: int = 100,
|
|
119
123
|
max_items: Optional[int] = None,
|
|
120
124
|
) -> Sequence[DltResource]:
|