ingestr 0.12.6__py3-none-any.whl → 0.12.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/adjust/__init__.py +7 -2
- ingestr/src/adjust/adjust_helpers.py +27 -15
- ingestr/src/appstore/__init__.py +137 -0
- ingestr/src/appstore/client.py +126 -0
- ingestr/src/appstore/errors.py +15 -0
- ingestr/src/appstore/models.py +117 -0
- ingestr/src/appstore/resources.py +179 -0
- ingestr/src/errors.py +10 -0
- ingestr/src/factory.py +2 -0
- ingestr/src/sources.py +80 -1
- ingestr/src/version.py +1 -1
- {ingestr-0.12.6.dist-info → ingestr-0.12.8.dist-info}/METADATA +2 -1
- {ingestr-0.12.6.dist-info → ingestr-0.12.8.dist-info}/RECORD +16 -10
- {ingestr-0.12.6.dist-info → ingestr-0.12.8.dist-info}/WHEEL +0 -0
- {ingestr-0.12.6.dist-info → ingestr-0.12.8.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.6.dist-info → ingestr-0.12.8.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/adjust/__init__.py
CHANGED
|
@@ -56,6 +56,11 @@ def adjust_source(
|
|
|
56
56
|
filters=filters,
|
|
57
57
|
)
|
|
58
58
|
|
|
59
|
+
@dlt.resource(write_disposition="replace", primary_key="id")
|
|
60
|
+
def events():
|
|
61
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
62
|
+
yield adjust_api.fetch_events()
|
|
63
|
+
|
|
59
64
|
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
60
65
|
def creatives():
|
|
61
66
|
adjust_api = AdjustAPI(api_key=api_key)
|
|
@@ -68,7 +73,7 @@ def adjust_source(
|
|
|
68
73
|
)
|
|
69
74
|
|
|
70
75
|
if not dimensions:
|
|
71
|
-
return campaigns, creatives
|
|
76
|
+
return campaigns, creatives, events
|
|
72
77
|
|
|
73
78
|
merge_key = merge_key
|
|
74
79
|
type_hints = {}
|
|
@@ -100,4 +105,4 @@ def adjust_source(
|
|
|
100
105
|
filters=filters,
|
|
101
106
|
)
|
|
102
107
|
|
|
103
|
-
return campaigns, creatives, custom
|
|
108
|
+
return campaigns, creatives, custom, events
|
|
@@ -28,10 +28,20 @@ DEFAULT_METRICS = [
|
|
|
28
28
|
]
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
def retry_on_limit(response: requests.Response, exception: BaseException) -> bool:
|
|
32
|
+
return response.status_code == 429
|
|
33
|
+
|
|
34
|
+
|
|
31
35
|
class AdjustAPI:
|
|
32
36
|
def __init__(self, api_key):
|
|
33
37
|
self.api_key = api_key
|
|
34
|
-
self.
|
|
38
|
+
self.request_client = Client(
|
|
39
|
+
request_timeout=8.0,
|
|
40
|
+
raise_for_status=False,
|
|
41
|
+
retry_condition=retry_on_limit,
|
|
42
|
+
request_max_attempts=12,
|
|
43
|
+
request_backoff_factor=2,
|
|
44
|
+
).session
|
|
35
45
|
|
|
36
46
|
def fetch_report_data(
|
|
37
47
|
self,
|
|
@@ -62,20 +72,11 @@ class AdjustAPI:
|
|
|
62
72
|
f"Invalid date range: Start date ({start_date}) must be earlier than end date ({end_date})."
|
|
63
73
|
)
|
|
64
74
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
request_client = Client(
|
|
71
|
-
request_timeout=8.0,
|
|
72
|
-
raise_for_status=False,
|
|
73
|
-
retry_condition=retry_on_limit,
|
|
74
|
-
request_max_attempts=12,
|
|
75
|
-
request_backoff_factor=2,
|
|
76
|
-
).session
|
|
77
|
-
|
|
78
|
-
response = request_client.get(self.uri, headers=headers, params=params)
|
|
75
|
+
response = self.request_client.get(
|
|
76
|
+
"https://automate.adjust.com/reports-service/report",
|
|
77
|
+
headers=headers,
|
|
78
|
+
params=params,
|
|
79
|
+
)
|
|
79
80
|
if response.status_code == 200:
|
|
80
81
|
result = response.json()
|
|
81
82
|
items = result.get("rows", [])
|
|
@@ -83,6 +84,17 @@ class AdjustAPI:
|
|
|
83
84
|
else:
|
|
84
85
|
raise HTTPError(f"Request failed with status code: {response.status_code}")
|
|
85
86
|
|
|
87
|
+
def fetch_events(self):
|
|
88
|
+
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
89
|
+
response = self.request_client.get(
|
|
90
|
+
"https://automate.adjust.com/reports-service/events", headers=headers
|
|
91
|
+
)
|
|
92
|
+
if response.status_code == 200:
|
|
93
|
+
result = response.json()
|
|
94
|
+
yield result
|
|
95
|
+
else:
|
|
96
|
+
raise HTTPError(f"Request failed with status code: {response.status_code}")
|
|
97
|
+
|
|
86
98
|
|
|
87
99
|
def parse_filters(filters_raw: str) -> dict:
|
|
88
100
|
# Parse filter string like "key1=value1,key2=value2,value3,value4"
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import gzip
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Iterable, List, Optional
|
|
8
|
+
|
|
9
|
+
import dlt
|
|
10
|
+
import requests
|
|
11
|
+
from dlt.common.typing import TDataItem
|
|
12
|
+
from dlt.sources import DltResource
|
|
13
|
+
|
|
14
|
+
from .client import AppStoreConnectClientInterface
|
|
15
|
+
from .errors import (
|
|
16
|
+
NoOngoingReportRequestsFoundError,
|
|
17
|
+
NoReportsFoundError,
|
|
18
|
+
NoSuchReportError,
|
|
19
|
+
)
|
|
20
|
+
from .models import AnalyticsReportInstancesResponse
|
|
21
|
+
from .resources import RESOURCES
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dlt.source
|
|
25
|
+
def app_store(
|
|
26
|
+
client: AppStoreConnectClientInterface,
|
|
27
|
+
app_ids: List[str],
|
|
28
|
+
start_date: Optional[datetime] = None,
|
|
29
|
+
end_date: Optional[datetime] = None,
|
|
30
|
+
) -> Iterable[DltResource]:
|
|
31
|
+
for resource in RESOURCES:
|
|
32
|
+
yield dlt.resource(
|
|
33
|
+
get_analytics_reports,
|
|
34
|
+
name=resource.name,
|
|
35
|
+
primary_key=resource.primary_key,
|
|
36
|
+
columns=resource.columns,
|
|
37
|
+
)(client, app_ids, resource.report_name, start_date, end_date)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def filter_instances_by_date(
|
|
41
|
+
instances: AnalyticsReportInstancesResponse,
|
|
42
|
+
start_date: Optional[datetime],
|
|
43
|
+
end_date: Optional[datetime],
|
|
44
|
+
) -> AnalyticsReportInstancesResponse:
|
|
45
|
+
instances = deepcopy(instances)
|
|
46
|
+
if start_date is not None:
|
|
47
|
+
instances.data = list(
|
|
48
|
+
filter(
|
|
49
|
+
lambda x: datetime.fromisoformat(x.attributes.processingDate)
|
|
50
|
+
>= start_date,
|
|
51
|
+
instances.data,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
if end_date is not None:
|
|
55
|
+
instances.data = list(
|
|
56
|
+
filter(
|
|
57
|
+
lambda x: datetime.fromisoformat(x.attributes.processingDate)
|
|
58
|
+
<= end_date,
|
|
59
|
+
instances.data,
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return instances
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_analytics_reports(
|
|
67
|
+
client: AppStoreConnectClientInterface,
|
|
68
|
+
app_ids: List[str],
|
|
69
|
+
report_name: str,
|
|
70
|
+
start_date: Optional[datetime],
|
|
71
|
+
end_date: Optional[datetime],
|
|
72
|
+
last_processing_date=dlt.sources.incremental("processing_date"),
|
|
73
|
+
) -> Iterable[TDataItem]:
|
|
74
|
+
if last_processing_date.last_value:
|
|
75
|
+
start_date = datetime.fromisoformat(last_processing_date.last_value)
|
|
76
|
+
for app_id in app_ids:
|
|
77
|
+
yield from get_report(client, app_id, report_name, start_date, end_date)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_report(
|
|
81
|
+
client: AppStoreConnectClientInterface,
|
|
82
|
+
app_id: str,
|
|
83
|
+
report_name: str,
|
|
84
|
+
start_date: Optional[datetime],
|
|
85
|
+
end_date: Optional[datetime],
|
|
86
|
+
) -> Iterable[TDataItem]:
|
|
87
|
+
report_requests = client.list_analytics_report_requests(app_id)
|
|
88
|
+
ongoing_requests = list(
|
|
89
|
+
filter(
|
|
90
|
+
lambda x: x.attributes.accessType == "ONGOING"
|
|
91
|
+
and not x.attributes.stoppedDueToInactivity,
|
|
92
|
+
report_requests.data,
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if len(ongoing_requests) == 0:
|
|
97
|
+
raise NoOngoingReportRequestsFoundError()
|
|
98
|
+
|
|
99
|
+
reports = client.list_analytics_reports(ongoing_requests[0].id, report_name)
|
|
100
|
+
if len(reports.data) == 0:
|
|
101
|
+
raise NoSuchReportError(report_name)
|
|
102
|
+
|
|
103
|
+
for report in reports.data:
|
|
104
|
+
instances = client.list_report_instances(report.id)
|
|
105
|
+
|
|
106
|
+
instances = filter_instances_by_date(instances, start_date, end_date)
|
|
107
|
+
|
|
108
|
+
if len(instances.data) == 0:
|
|
109
|
+
raise NoReportsFoundError()
|
|
110
|
+
|
|
111
|
+
for instance in instances.data:
|
|
112
|
+
segments = client.list_report_segments(instance.id)
|
|
113
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
114
|
+
files = []
|
|
115
|
+
for segment in segments.data:
|
|
116
|
+
payload = requests.get(segment.attributes.url, stream=True)
|
|
117
|
+
payload.raise_for_status()
|
|
118
|
+
|
|
119
|
+
csv_path = os.path.join(
|
|
120
|
+
temp_dir, f"{segment.attributes.checksum}.csv"
|
|
121
|
+
)
|
|
122
|
+
with open(csv_path, "wb") as f:
|
|
123
|
+
for chunk in payload.iter_content(chunk_size=8192):
|
|
124
|
+
f.write(chunk)
|
|
125
|
+
files.append(csv_path)
|
|
126
|
+
for file in files:
|
|
127
|
+
with gzip.open(file, "rt") as f:
|
|
128
|
+
# TODO: infer delimiter from the file itself
|
|
129
|
+
delimiter = (
|
|
130
|
+
"," if report_name == "App Crashes Expanded" else "\t"
|
|
131
|
+
)
|
|
132
|
+
reader = csv.DictReader(f, delimiter=delimiter)
|
|
133
|
+
for row in reader:
|
|
134
|
+
yield {
|
|
135
|
+
"processing_date": instance.attributes.processingDate,
|
|
136
|
+
**row,
|
|
137
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import time
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import jwt
|
|
6
|
+
import requests
|
|
7
|
+
from requests.models import PreparedRequest
|
|
8
|
+
|
|
9
|
+
from .models import (
|
|
10
|
+
AnalyticsReportInstancesResponse,
|
|
11
|
+
AnalyticsReportRequestsResponse,
|
|
12
|
+
AnalyticsReportResponse,
|
|
13
|
+
AnalyticsReportSegmentsResponse,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AppStoreConnectClientInterface(abc.ABC):
|
|
18
|
+
@abc.abstractmethod
|
|
19
|
+
def list_analytics_report_requests(self, app_id) -> AnalyticsReportRequestsResponse:
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@abc.abstractmethod
|
|
23
|
+
def list_analytics_reports(
|
|
24
|
+
self, req_id: str, report_name: str
|
|
25
|
+
) -> AnalyticsReportResponse:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@abc.abstractmethod
|
|
29
|
+
def list_report_instances(
|
|
30
|
+
self,
|
|
31
|
+
report_id: str,
|
|
32
|
+
granularity: str = "DAILY",
|
|
33
|
+
) -> AnalyticsReportInstancesResponse:
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
@abc.abstractmethod
|
|
37
|
+
def list_report_segments(self, instance_id: str) -> AnalyticsReportSegmentsResponse:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AppStoreConnectClient(AppStoreConnectClientInterface):
|
|
42
|
+
def __init__(self, key: bytes, key_id: str, issuer_id: str):
|
|
43
|
+
self.__key = key
|
|
44
|
+
self.__key_id = key_id
|
|
45
|
+
self.__issuer_id = issuer_id
|
|
46
|
+
|
|
47
|
+
def list_analytics_report_requests(self, app_id) -> AnalyticsReportRequestsResponse:
|
|
48
|
+
res = requests.get(
|
|
49
|
+
f"https://api.appstoreconnect.apple.com/v1/apps/{app_id}/analyticsReportRequests",
|
|
50
|
+
auth=self.auth,
|
|
51
|
+
)
|
|
52
|
+
res.raise_for_status()
|
|
53
|
+
|
|
54
|
+
return AnalyticsReportRequestsResponse.from_json(res.text) # type: ignore
|
|
55
|
+
|
|
56
|
+
def list_analytics_reports(
|
|
57
|
+
self, req_id: str, report_name: str
|
|
58
|
+
) -> AnalyticsReportResponse:
|
|
59
|
+
params = {"filter[name]": report_name}
|
|
60
|
+
res = requests.get(
|
|
61
|
+
f"https://api.appstoreconnect.apple.com/v1/analyticsReportRequests/{req_id}/reports",
|
|
62
|
+
auth=self.auth,
|
|
63
|
+
params=params,
|
|
64
|
+
)
|
|
65
|
+
res.raise_for_status()
|
|
66
|
+
return AnalyticsReportResponse.from_json(res.text) # type: ignore
|
|
67
|
+
|
|
68
|
+
def list_report_instances(
|
|
69
|
+
self,
|
|
70
|
+
report_id: str,
|
|
71
|
+
granularity: str = "DAILY",
|
|
72
|
+
) -> AnalyticsReportInstancesResponse:
|
|
73
|
+
data = []
|
|
74
|
+
url = f"https://api.appstoreconnect.apple.com/v1/analyticsReports/{report_id}/instances"
|
|
75
|
+
params: Optional[dict] = {"filter[granularity]": granularity}
|
|
76
|
+
|
|
77
|
+
while url:
|
|
78
|
+
res = requests.get(url, auth=self.auth, params=params)
|
|
79
|
+
res.raise_for_status()
|
|
80
|
+
|
|
81
|
+
response_data = AnalyticsReportInstancesResponse.from_json(res.text) # type: ignore
|
|
82
|
+
data.extend(response_data.data)
|
|
83
|
+
|
|
84
|
+
url = response_data.links.next
|
|
85
|
+
params = None # Clear params for subsequent requests
|
|
86
|
+
|
|
87
|
+
return AnalyticsReportInstancesResponse(
|
|
88
|
+
data=data,
|
|
89
|
+
links=response_data.links,
|
|
90
|
+
meta=response_data.meta,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def list_report_segments(self, instance_id: str) -> AnalyticsReportSegmentsResponse:
|
|
94
|
+
segments = []
|
|
95
|
+
url = f"https://api.appstoreconnect.apple.com/v1/analyticsReportInstances/{instance_id}/segments"
|
|
96
|
+
|
|
97
|
+
while url:
|
|
98
|
+
res = requests.get(url, auth=self.auth)
|
|
99
|
+
res.raise_for_status()
|
|
100
|
+
|
|
101
|
+
response_data = AnalyticsReportSegmentsResponse.from_json(res.text) # type: ignore
|
|
102
|
+
segments.extend(response_data.data)
|
|
103
|
+
|
|
104
|
+
url = response_data.links.next
|
|
105
|
+
|
|
106
|
+
return AnalyticsReportSegmentsResponse(
|
|
107
|
+
data=segments, links=response_data.links, meta=response_data.meta
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def auth(self, req: PreparedRequest) -> PreparedRequest:
|
|
111
|
+
headers = {
|
|
112
|
+
"alg": "ES256",
|
|
113
|
+
"kid": self.__key_id,
|
|
114
|
+
}
|
|
115
|
+
payload = {
|
|
116
|
+
"iss": self.__issuer_id,
|
|
117
|
+
"exp": int(time.time()) + 600,
|
|
118
|
+
"aud": "appstoreconnect-v1",
|
|
119
|
+
}
|
|
120
|
+
req.headers["Authorization"] = jwt.encode(
|
|
121
|
+
payload,
|
|
122
|
+
self.__key,
|
|
123
|
+
algorithm="ES256",
|
|
124
|
+
headers=headers,
|
|
125
|
+
)
|
|
126
|
+
return req
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
class NoReportsFoundError(Exception):
|
|
2
|
+
def __init__(self):
|
|
3
|
+
super().__init__("No Report instances found for the given date range")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class NoOngoingReportRequestsFoundError(Exception):
|
|
7
|
+
def __init__(self):
|
|
8
|
+
super().__init__(
|
|
9
|
+
"No ONGOING report requests found (or they're stopped due to inactivity)"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NoSuchReportError(Exception):
|
|
14
|
+
def __init__(self, report_name):
|
|
15
|
+
super().__init__(f"No such report found: {report_name}")
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from dataclasses_json import dataclass_json
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass_json
|
|
8
|
+
@dataclass
|
|
9
|
+
class Links:
|
|
10
|
+
self: str
|
|
11
|
+
next: Optional[str] = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass_json
|
|
15
|
+
@dataclass
|
|
16
|
+
class ReportRequestAttributes:
|
|
17
|
+
accessType: str
|
|
18
|
+
stoppedDueToInactivity: bool
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass_json
|
|
22
|
+
@dataclass
|
|
23
|
+
class ReportAttributes:
|
|
24
|
+
name: str
|
|
25
|
+
category: str
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass_json
|
|
29
|
+
@dataclass
|
|
30
|
+
class ReportInstanceAttributes:
|
|
31
|
+
granularity: str
|
|
32
|
+
processingDate: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass_json
|
|
36
|
+
@dataclass
|
|
37
|
+
class ReportSegmentAttributes:
|
|
38
|
+
checksum: str
|
|
39
|
+
url: str
|
|
40
|
+
sizeInBytes: int
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass_json
|
|
44
|
+
@dataclass
|
|
45
|
+
class ReportRequest:
|
|
46
|
+
type: str
|
|
47
|
+
id: str
|
|
48
|
+
attributes: ReportRequestAttributes
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass_json
|
|
52
|
+
@dataclass
|
|
53
|
+
class Report:
|
|
54
|
+
type: str
|
|
55
|
+
id: str
|
|
56
|
+
attributes: ReportAttributes
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass_json
|
|
60
|
+
@dataclass
|
|
61
|
+
class ReportInstance:
|
|
62
|
+
type: str
|
|
63
|
+
id: str
|
|
64
|
+
attributes: ReportInstanceAttributes
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass_json
|
|
68
|
+
@dataclass
|
|
69
|
+
class ReportSegment:
|
|
70
|
+
type: str
|
|
71
|
+
id: str
|
|
72
|
+
attributes: ReportSegmentAttributes
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass_json
|
|
76
|
+
@dataclass
|
|
77
|
+
class PagingMeta:
|
|
78
|
+
total: int
|
|
79
|
+
limit: int
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass_json
|
|
83
|
+
@dataclass
|
|
84
|
+
class Meta:
|
|
85
|
+
paging: PagingMeta
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass_json
|
|
89
|
+
@dataclass
|
|
90
|
+
class AnalyticsReportRequestsResponse:
|
|
91
|
+
data: List[ReportRequest]
|
|
92
|
+
meta: Meta
|
|
93
|
+
links: Links
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass_json
|
|
97
|
+
@dataclass
|
|
98
|
+
class AnalyticsReportResponse:
|
|
99
|
+
data: List[Report]
|
|
100
|
+
meta: Meta
|
|
101
|
+
links: Links
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass_json
|
|
105
|
+
@dataclass
|
|
106
|
+
class AnalyticsReportInstancesResponse:
|
|
107
|
+
data: List[ReportInstance]
|
|
108
|
+
meta: Meta
|
|
109
|
+
links: Links
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass_json
|
|
113
|
+
@dataclass
|
|
114
|
+
class AnalyticsReportSegmentsResponse:
|
|
115
|
+
data: List[ReportSegment]
|
|
116
|
+
meta: Meta
|
|
117
|
+
links: Links
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class ResourceConfig:
|
|
7
|
+
name: str
|
|
8
|
+
primary_key: List[str]
|
|
9
|
+
columns: dict
|
|
10
|
+
report_name: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
RESOURCES: List[ResourceConfig] = [
|
|
14
|
+
ResourceConfig(
|
|
15
|
+
name="app-downloads-detailed",
|
|
16
|
+
primary_key=[
|
|
17
|
+
"App Apple Identifier",
|
|
18
|
+
"App Name",
|
|
19
|
+
"App Version",
|
|
20
|
+
"Campaign",
|
|
21
|
+
"Date",
|
|
22
|
+
"Device",
|
|
23
|
+
"Download Type",
|
|
24
|
+
"Page Title",
|
|
25
|
+
"Page Type",
|
|
26
|
+
"Platform Version",
|
|
27
|
+
"Pre-Order",
|
|
28
|
+
"Source Info",
|
|
29
|
+
"Source Type",
|
|
30
|
+
"Territory",
|
|
31
|
+
],
|
|
32
|
+
columns={
|
|
33
|
+
"Date": {"data_type": "date"},
|
|
34
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
35
|
+
"Counts": {"data_type": "bigint"},
|
|
36
|
+
"processing_date": {"data_type": "date"},
|
|
37
|
+
},
|
|
38
|
+
report_name="App Downloads Detailed",
|
|
39
|
+
),
|
|
40
|
+
ResourceConfig(
|
|
41
|
+
name="app-store-discovery-and-engagement-detailed",
|
|
42
|
+
primary_key=[
|
|
43
|
+
"App Apple Identifier",
|
|
44
|
+
"App Name",
|
|
45
|
+
"Campaign",
|
|
46
|
+
"Date",
|
|
47
|
+
"Device",
|
|
48
|
+
"Engagement Type",
|
|
49
|
+
"Event",
|
|
50
|
+
"Page Title",
|
|
51
|
+
"Page Type",
|
|
52
|
+
"Platform Version",
|
|
53
|
+
"Source Info",
|
|
54
|
+
"Source Type",
|
|
55
|
+
"Territory",
|
|
56
|
+
],
|
|
57
|
+
columns={
|
|
58
|
+
"Date": {"data_type": "date"},
|
|
59
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
60
|
+
"Counts": {"data_type": "bigint"},
|
|
61
|
+
"Unique Counts": {"data_type": "bigint"},
|
|
62
|
+
"processing_date": {"data_type": "date"},
|
|
63
|
+
},
|
|
64
|
+
report_name="App Store Discovery and Engagement Detailed",
|
|
65
|
+
),
|
|
66
|
+
ResourceConfig(
|
|
67
|
+
name="app-sessions-detailed",
|
|
68
|
+
primary_key=[
|
|
69
|
+
"Date",
|
|
70
|
+
"App Name",
|
|
71
|
+
"App Apple Identifier",
|
|
72
|
+
"App Version",
|
|
73
|
+
"Device",
|
|
74
|
+
"Platform Version",
|
|
75
|
+
"Source Type",
|
|
76
|
+
"Source Info",
|
|
77
|
+
"Campaign",
|
|
78
|
+
"Page Type",
|
|
79
|
+
"Page Title",
|
|
80
|
+
"App Download Date",
|
|
81
|
+
"Territory",
|
|
82
|
+
],
|
|
83
|
+
columns={
|
|
84
|
+
"Date": {"data_type": "date"},
|
|
85
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
86
|
+
"Sessions": {"data_type": "bigint"},
|
|
87
|
+
"Total Session Duration": {"data_type": "bigint"},
|
|
88
|
+
"Unique Devices": {"data_type": "bigint"},
|
|
89
|
+
"processing_date": {"data_type": "date"},
|
|
90
|
+
},
|
|
91
|
+
report_name="App Sessions Detailed",
|
|
92
|
+
),
|
|
93
|
+
ResourceConfig(
|
|
94
|
+
name="app-store-installation-and-deletion-detailed",
|
|
95
|
+
primary_key=[
|
|
96
|
+
"App Apple Identifier",
|
|
97
|
+
"App Download Date",
|
|
98
|
+
"App Name",
|
|
99
|
+
"App Version",
|
|
100
|
+
"Campaign",
|
|
101
|
+
"Counts",
|
|
102
|
+
"Date",
|
|
103
|
+
"Device",
|
|
104
|
+
"Download Type",
|
|
105
|
+
"Event",
|
|
106
|
+
"Page Title",
|
|
107
|
+
"Page Type",
|
|
108
|
+
"Platform Version",
|
|
109
|
+
"Source Info",
|
|
110
|
+
"Source Type",
|
|
111
|
+
"Territory",
|
|
112
|
+
"Unique Devices",
|
|
113
|
+
],
|
|
114
|
+
columns={
|
|
115
|
+
"Date": {"data_type": "date"},
|
|
116
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
117
|
+
"Counts": {"data_type": "bigint"},
|
|
118
|
+
"Unique Devices": {"data_type": "bigint"},
|
|
119
|
+
"App Download Date": {"data_type": "date"},
|
|
120
|
+
"processing_date": {"data_type": "date"},
|
|
121
|
+
},
|
|
122
|
+
report_name="App Store Installation and Deletion Detailed",
|
|
123
|
+
),
|
|
124
|
+
ResourceConfig(
|
|
125
|
+
name="app-store-purchases-detailed",
|
|
126
|
+
primary_key=[
|
|
127
|
+
"App Apple Identifier",
|
|
128
|
+
"App Download Date",
|
|
129
|
+
"App Name",
|
|
130
|
+
"Campaign",
|
|
131
|
+
"Content Apple Identifier",
|
|
132
|
+
"Content Name",
|
|
133
|
+
"Date",
|
|
134
|
+
"Device",
|
|
135
|
+
"Page Title",
|
|
136
|
+
"Page Type",
|
|
137
|
+
"Payment Method",
|
|
138
|
+
"Platform Version",
|
|
139
|
+
"Pre-Order",
|
|
140
|
+
"Purchase Type",
|
|
141
|
+
"Source Info",
|
|
142
|
+
"Source Type",
|
|
143
|
+
"Territory",
|
|
144
|
+
],
|
|
145
|
+
columns={
|
|
146
|
+
"Date": {"data_type": "date"},
|
|
147
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
148
|
+
"App Download Date": {"data_type": "date"},
|
|
149
|
+
"Content Apple Identifier": {"data_type": "bigint"},
|
|
150
|
+
"Purchases": {"data_type": "bigint"},
|
|
151
|
+
"Proceeds In USD": {"data_type": "double"},
|
|
152
|
+
"Sales In USD": {"data_type": "double"},
|
|
153
|
+
"Paying Users": {"data_type": "bigint"},
|
|
154
|
+
"processing_date": {"data_type": "date"},
|
|
155
|
+
},
|
|
156
|
+
report_name="App Store Purchases Detailed",
|
|
157
|
+
),
|
|
158
|
+
ResourceConfig(
|
|
159
|
+
name="app-crashes-expanded",
|
|
160
|
+
primary_key=[
|
|
161
|
+
"App Name",
|
|
162
|
+
"App Version",
|
|
163
|
+
"Build",
|
|
164
|
+
"Date",
|
|
165
|
+
"Device",
|
|
166
|
+
"Platform",
|
|
167
|
+
"Release Type",
|
|
168
|
+
"Territory",
|
|
169
|
+
],
|
|
170
|
+
columns={
|
|
171
|
+
"Date": {"data_type": "date"},
|
|
172
|
+
"processing_date": {"data_type": "date"},
|
|
173
|
+
"App Apple Identifier": {"data_type": "bigint"},
|
|
174
|
+
"Count": {"data_type": "bigint"},
|
|
175
|
+
"Unique Devices": {"data_type": "bigint"},
|
|
176
|
+
},
|
|
177
|
+
report_name="App Crashes Expanded",
|
|
178
|
+
),
|
|
179
|
+
]
|
ingestr/src/errors.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
class MissingValueError(Exception):
|
|
2
|
+
def __init__(self, value, source):
|
|
3
|
+
super().__init__(f"{value} is required to connect to {source}")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class UnsupportedResourceError(Exception):
|
|
7
|
+
def __init__(self, resource, source):
|
|
8
|
+
super().__init__(
|
|
9
|
+
f"Resource '{resource}' is not supported for {source} source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
10
|
+
)
|
ingestr/src/factory.py
CHANGED
|
@@ -18,6 +18,7 @@ from ingestr.src.destinations import (
|
|
|
18
18
|
from ingestr.src.sources import (
|
|
19
19
|
AdjustSource,
|
|
20
20
|
AirtableSource,
|
|
21
|
+
AppleAppStoreSource,
|
|
21
22
|
AppsflyerSource,
|
|
22
23
|
ArrowMemoryMappedSource,
|
|
23
24
|
AsanaSource,
|
|
@@ -122,6 +123,7 @@ class SourceDestinationFactory:
|
|
|
122
123
|
"asana": AsanaSource,
|
|
123
124
|
"tiktok": TikTokSource,
|
|
124
125
|
"googleanalytics": GoogleAnalyticsSource,
|
|
126
|
+
"appstore": AppleAppStoreSource,
|
|
125
127
|
}
|
|
126
128
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
127
129
|
"bigquery": BigQueryDestination,
|
ingestr/src/sources.py
CHANGED
|
@@ -3,7 +3,7 @@ import csv
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
|
-
from datetime import date, datetime
|
|
6
|
+
from datetime import date, datetime, timedelta
|
|
7
7
|
from typing import (
|
|
8
8
|
Any,
|
|
9
9
|
Callable,
|
|
@@ -46,10 +46,16 @@ from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
|
|
|
46
46
|
from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
47
47
|
from ingestr.src.airtable import airtable_source
|
|
48
48
|
from ingestr.src.appsflyer._init_ import appsflyer_source
|
|
49
|
+
from ingestr.src.appstore import app_store
|
|
50
|
+
from ingestr.src.appstore.client import AppStoreConnectClient
|
|
49
51
|
from ingestr.src.arrow import memory_mapped_arrow
|
|
50
52
|
from ingestr.src.asana_source import asana_source
|
|
51
53
|
from ingestr.src.chess import source
|
|
52
54
|
from ingestr.src.dynamodb import dynamodb
|
|
55
|
+
from ingestr.src.errors import (
|
|
56
|
+
MissingValueError,
|
|
57
|
+
UnsupportedResourceError,
|
|
58
|
+
)
|
|
53
59
|
from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
|
|
54
60
|
from ingestr.src.filesystem import readers
|
|
55
61
|
from ingestr.src.filters import table_adapter_exclude_columns
|
|
@@ -1424,3 +1430,76 @@ class GitHubSource:
|
|
|
1424
1430
|
raise ValueError(
|
|
1425
1431
|
f"Resource '{table}' is not supported for GitHub source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
1426
1432
|
)
|
|
1433
|
+
|
|
1434
|
+
|
|
1435
|
+
class AppleAppStoreSource:
|
|
1436
|
+
def handles_incrementality(self) -> bool:
|
|
1437
|
+
return True
|
|
1438
|
+
|
|
1439
|
+
def init_client(
|
|
1440
|
+
self,
|
|
1441
|
+
key_id: str,
|
|
1442
|
+
issuer_id: str,
|
|
1443
|
+
key_path: Optional[List[str]],
|
|
1444
|
+
key_base64: Optional[List[str]],
|
|
1445
|
+
):
|
|
1446
|
+
key = None
|
|
1447
|
+
if key_path is not None:
|
|
1448
|
+
with open(key_path[0]) as f:
|
|
1449
|
+
key = f.read()
|
|
1450
|
+
else:
|
|
1451
|
+
key = base64.b64decode(key_base64[0]).decode() # type: ignore
|
|
1452
|
+
|
|
1453
|
+
return AppStoreConnectClient(key.encode(), key_id, issuer_id)
|
|
1454
|
+
|
|
1455
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1456
|
+
if kwargs.get("incremental_key"):
|
|
1457
|
+
raise ValueError(
|
|
1458
|
+
"App Store takes care of incrementality on its own, you should not provide incremental_key"
|
|
1459
|
+
)
|
|
1460
|
+
parsed_uri = urlparse(uri)
|
|
1461
|
+
params = parse_qs(parsed_uri.query)
|
|
1462
|
+
|
|
1463
|
+
key_id = params.get("key_id")
|
|
1464
|
+
if key_id is None:
|
|
1465
|
+
raise MissingValueError("key_id", "App Store")
|
|
1466
|
+
|
|
1467
|
+
key_path = params.get("key_path")
|
|
1468
|
+
key_base64 = params.get("key_base64")
|
|
1469
|
+
key_available = any(
|
|
1470
|
+
map(
|
|
1471
|
+
lambda x: x is not None,
|
|
1472
|
+
[key_path, key_base64],
|
|
1473
|
+
)
|
|
1474
|
+
)
|
|
1475
|
+
if key_available is False:
|
|
1476
|
+
raise MissingValueError("key_path or key_base64", "App Store")
|
|
1477
|
+
|
|
1478
|
+
issuer_id = params.get("issuer_id")
|
|
1479
|
+
if issuer_id is None:
|
|
1480
|
+
raise MissingValueError("issuer_id", "App Store")
|
|
1481
|
+
|
|
1482
|
+
client = self.init_client(key_id[0], issuer_id[0], key_path, key_base64)
|
|
1483
|
+
|
|
1484
|
+
app_ids = params.get("app_id")
|
|
1485
|
+
if ":" in table:
|
|
1486
|
+
intended_table, app_ids_override = table.split(":", maxsplit=1)
|
|
1487
|
+
app_ids = app_ids_override.split(",")
|
|
1488
|
+
table = intended_table
|
|
1489
|
+
|
|
1490
|
+
if app_ids is None:
|
|
1491
|
+
raise MissingValueError("app_id", "App Store")
|
|
1492
|
+
|
|
1493
|
+
src = app_store(
|
|
1494
|
+
client,
|
|
1495
|
+
app_ids,
|
|
1496
|
+
start_date=kwargs.get(
|
|
1497
|
+
"interval_start", datetime.now() - timedelta(days=30)
|
|
1498
|
+
),
|
|
1499
|
+
end_date=kwargs.get("interval_end"),
|
|
1500
|
+
)
|
|
1501
|
+
|
|
1502
|
+
if table not in src.resources:
|
|
1503
|
+
raise UnsupportedResourceError(table, "AppStore")
|
|
1504
|
+
|
|
1505
|
+
return src.with_resources(table)
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.12.
|
|
1
|
+
__version__ = "0.12.8"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.8
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -17,6 +17,7 @@ Requires-Python: >=3.9
|
|
|
17
17
|
Requires-Dist: asana==3.2.3
|
|
18
18
|
Requires-Dist: confluent-kafka>=2.6.1
|
|
19
19
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
20
|
+
Requires-Dist: dataclasses-json==0.6.7
|
|
20
21
|
Requires-Dist: dlt==1.5.0
|
|
21
22
|
Requires-Dist: duckdb-engine==0.13.5
|
|
22
23
|
Requires-Dist: duckdb==1.1.3
|
|
@@ -1,17 +1,23 @@
|
|
|
1
1
|
ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
|
|
4
|
-
ingestr/src/
|
|
4
|
+
ingestr/src/errors.py,sha256=MrdLY5Gpr3g3qbYjl-U8-m8kxBJQOJo4ZVOsQpQbRR8,447
|
|
5
|
+
ingestr/src/factory.py,sha256=jjxieXpSK02tNcg7f_t5xxqs49EnI739smRLX8qLsUU,4582
|
|
5
6
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
6
|
-
ingestr/src/sources.py,sha256=
|
|
7
|
+
ingestr/src/sources.py,sha256=dMXTfykbAZTN8SNpOWJbtl10krdJfg12S13at3Z4L38,53647
|
|
7
8
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
8
9
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
9
|
-
ingestr/src/version.py,sha256=
|
|
10
|
-
ingestr/src/adjust/__init__.py,sha256=
|
|
11
|
-
ingestr/src/adjust/adjust_helpers.py,sha256
|
|
10
|
+
ingestr/src/version.py,sha256=F7xxYe0dXryqS1cGEXFikx8AI7-UsZzdi89hJdyx-b0,23
|
|
11
|
+
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
12
|
+
ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
|
|
12
13
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
13
14
|
ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
|
|
14
15
|
ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
|
|
16
|
+
ingestr/src/appstore/__init__.py,sha256=s39r3YUjdfStA6lBcPzqQzestiojC3U41LB3F6Y8gG0,4538
|
|
17
|
+
ingestr/src/appstore/client.py,sha256=qY9nBZPNIAveR-Dn-pW141Mr9xi9LMOz2HHfnfueHvE,3975
|
|
18
|
+
ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2Ms,481
|
|
19
|
+
ingestr/src/appstore/models.py,sha256=tW1JSATHBIxZ6a77-RTCBQptJk6iRC8fWcmx4NW7SVA,1716
|
|
20
|
+
ingestr/src/appstore/resources.py,sha256=DJxnNrBohVV0uSeruGV-N_e7UHSlhMhjhYNYdBuqECU,5375
|
|
15
21
|
ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0,2848
|
|
16
22
|
ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
|
|
17
23
|
ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
|
|
@@ -85,8 +91,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
85
91
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
86
92
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
87
93
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
88
|
-
ingestr-0.12.
|
|
89
|
-
ingestr-0.12.
|
|
90
|
-
ingestr-0.12.
|
|
91
|
-
ingestr-0.12.
|
|
92
|
-
ingestr-0.12.
|
|
94
|
+
ingestr-0.12.8.dist-info/METADATA,sha256=zbhdTjqZrWDsmnXTxy1tfC79Q75vzHc-7UWLM62vocQ,8024
|
|
95
|
+
ingestr-0.12.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
96
|
+
ingestr-0.12.8.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
97
|
+
ingestr-0.12.8.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
98
|
+
ingestr-0.12.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|