ingestr 0.12.6__py3-none-any.whl → 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -56,6 +56,11 @@ def adjust_source(
56
56
  filters=filters,
57
57
  )
58
58
 
59
+ @dlt.resource(write_disposition="replace", primary_key="id")
60
+ def events():
61
+ adjust_api = AdjustAPI(api_key=api_key)
62
+ yield adjust_api.fetch_events()
63
+
59
64
  @dlt.resource(write_disposition="merge", merge_key="day")
60
65
  def creatives():
61
66
  adjust_api = AdjustAPI(api_key=api_key)
@@ -68,7 +73,7 @@ def adjust_source(
68
73
  )
69
74
 
70
75
  if not dimensions:
71
- return campaigns, creatives
76
+ return campaigns, creatives, events
72
77
 
73
78
  merge_key = merge_key
74
79
  type_hints = {}
@@ -100,4 +105,4 @@ def adjust_source(
100
105
  filters=filters,
101
106
  )
102
107
 
103
- return campaigns, creatives, custom
108
+ return campaigns, creatives, custom, events
@@ -28,10 +28,20 @@ DEFAULT_METRICS = [
28
28
  ]
29
29
 
30
30
 
31
+ def retry_on_limit(response: requests.Response, exception: BaseException) -> bool:
32
+ return response.status_code == 429
33
+
34
+
31
35
  class AdjustAPI:
32
36
  def __init__(self, api_key):
33
37
  self.api_key = api_key
34
- self.uri = "https://automate.adjust.com/reports-service/report"
38
+ self.request_client = Client(
39
+ request_timeout=8.0,
40
+ raise_for_status=False,
41
+ retry_condition=retry_on_limit,
42
+ request_max_attempts=12,
43
+ request_backoff_factor=2,
44
+ ).session
35
45
 
36
46
  def fetch_report_data(
37
47
  self,
@@ -62,20 +72,11 @@ class AdjustAPI:
62
72
  f"Invalid date range: Start date ({start_date}) must be earlier than end date ({end_date})."
63
73
  )
64
74
 
65
- def retry_on_limit(
66
- response: requests.Response, exception: BaseException
67
- ) -> bool:
68
- return response.status_code == 429
69
-
70
- request_client = Client(
71
- request_timeout=8.0,
72
- raise_for_status=False,
73
- retry_condition=retry_on_limit,
74
- request_max_attempts=12,
75
- request_backoff_factor=2,
76
- ).session
77
-
78
- response = request_client.get(self.uri, headers=headers, params=params)
75
+ response = self.request_client.get(
76
+ "https://automate.adjust.com/reports-service/report",
77
+ headers=headers,
78
+ params=params,
79
+ )
79
80
  if response.status_code == 200:
80
81
  result = response.json()
81
82
  items = result.get("rows", [])
@@ -83,6 +84,17 @@ class AdjustAPI:
83
84
  else:
84
85
  raise HTTPError(f"Request failed with status code: {response.status_code}")
85
86
 
87
+ def fetch_events(self):
88
+ headers = {"Authorization": f"Bearer {self.api_key}"}
89
+ response = self.request_client.get(
90
+ "https://automate.adjust.com/reports-service/events", headers=headers
91
+ )
92
+ if response.status_code == 200:
93
+ result = response.json()
94
+ yield result
95
+ else:
96
+ raise HTTPError(f"Request failed with status code: {response.status_code}")
97
+
86
98
 
87
99
  def parse_filters(filters_raw: str) -> dict:
88
100
  # Parse filter string like "key1=value1,key2=value2,value3,value4"
@@ -0,0 +1,137 @@
1
+ import csv
2
+ import gzip
3
+ import os
4
+ import tempfile
5
+ from copy import deepcopy
6
+ from datetime import datetime
7
+ from typing import Iterable, List, Optional
8
+
9
+ import dlt
10
+ import requests
11
+ from dlt.common.typing import TDataItem
12
+ from dlt.sources import DltResource
13
+
14
+ from .client import AppStoreConnectClientInterface
15
+ from .errors import (
16
+ NoOngoingReportRequestsFoundError,
17
+ NoReportsFoundError,
18
+ NoSuchReportError,
19
+ )
20
+ from .models import AnalyticsReportInstancesResponse
21
+ from .resources import RESOURCES
22
+
23
+
24
+ @dlt.source
25
+ def app_store(
26
+ client: AppStoreConnectClientInterface,
27
+ app_ids: List[str],
28
+ start_date: Optional[datetime] = None,
29
+ end_date: Optional[datetime] = None,
30
+ ) -> Iterable[DltResource]:
31
+ for resource in RESOURCES:
32
+ yield dlt.resource(
33
+ get_analytics_reports,
34
+ name=resource.name,
35
+ primary_key=resource.primary_key,
36
+ columns=resource.columns,
37
+ )(client, app_ids, resource.report_name, start_date, end_date)
38
+
39
+
40
+ def filter_instances_by_date(
41
+ instances: AnalyticsReportInstancesResponse,
42
+ start_date: Optional[datetime],
43
+ end_date: Optional[datetime],
44
+ ) -> AnalyticsReportInstancesResponse:
45
+ instances = deepcopy(instances)
46
+ if start_date is not None:
47
+ instances.data = list(
48
+ filter(
49
+ lambda x: datetime.fromisoformat(x.attributes.processingDate)
50
+ >= start_date,
51
+ instances.data,
52
+ )
53
+ )
54
+ if end_date is not None:
55
+ instances.data = list(
56
+ filter(
57
+ lambda x: datetime.fromisoformat(x.attributes.processingDate)
58
+ <= end_date,
59
+ instances.data,
60
+ )
61
+ )
62
+
63
+ return instances
64
+
65
+
66
+ def get_analytics_reports(
67
+ client: AppStoreConnectClientInterface,
68
+ app_ids: List[str],
69
+ report_name: str,
70
+ start_date: Optional[datetime],
71
+ end_date: Optional[datetime],
72
+ last_processing_date=dlt.sources.incremental("processing_date"),
73
+ ) -> Iterable[TDataItem]:
74
+ if last_processing_date.last_value:
75
+ start_date = datetime.fromisoformat(last_processing_date.last_value)
76
+ for app_id in app_ids:
77
+ yield from get_report(client, app_id, report_name, start_date, end_date)
78
+
79
+
80
+ def get_report(
81
+ client: AppStoreConnectClientInterface,
82
+ app_id: str,
83
+ report_name: str,
84
+ start_date: Optional[datetime],
85
+ end_date: Optional[datetime],
86
+ ) -> Iterable[TDataItem]:
87
+ report_requests = client.list_analytics_report_requests(app_id)
88
+ ongoing_requests = list(
89
+ filter(
90
+ lambda x: x.attributes.accessType == "ONGOING"
91
+ and not x.attributes.stoppedDueToInactivity,
92
+ report_requests.data,
93
+ )
94
+ )
95
+
96
+ if len(ongoing_requests) == 0:
97
+ raise NoOngoingReportRequestsFoundError()
98
+
99
+ reports = client.list_analytics_reports(ongoing_requests[0].id, report_name)
100
+ if len(reports.data) == 0:
101
+ raise NoSuchReportError(report_name)
102
+
103
+ for report in reports.data:
104
+ instances = client.list_report_instances(report.id)
105
+
106
+ instances = filter_instances_by_date(instances, start_date, end_date)
107
+
108
+ if len(instances.data) == 0:
109
+ raise NoReportsFoundError()
110
+
111
+ for instance in instances.data:
112
+ segments = client.list_report_segments(instance.id)
113
+ with tempfile.TemporaryDirectory() as temp_dir:
114
+ files = []
115
+ for segment in segments.data:
116
+ payload = requests.get(segment.attributes.url, stream=True)
117
+ payload.raise_for_status()
118
+
119
+ csv_path = os.path.join(
120
+ temp_dir, f"{segment.attributes.checksum}.csv"
121
+ )
122
+ with open(csv_path, "wb") as f:
123
+ for chunk in payload.iter_content(chunk_size=8192):
124
+ f.write(chunk)
125
+ files.append(csv_path)
126
+ for file in files:
127
+ with gzip.open(file, "rt") as f:
128
+ # TODO: infer delimiter from the file itself
129
+ delimiter = (
130
+ "," if report_name == "App Crashes Expanded" else "\t"
131
+ )
132
+ reader = csv.DictReader(f, delimiter=delimiter)
133
+ for row in reader:
134
+ yield {
135
+ "processing_date": instance.attributes.processingDate,
136
+ **row,
137
+ }
@@ -0,0 +1,126 @@
1
+ import abc
2
+ import time
3
+ from typing import Optional
4
+
5
+ import jwt
6
+ import requests
7
+ from requests.models import PreparedRequest
8
+
9
+ from .models import (
10
+ AnalyticsReportInstancesResponse,
11
+ AnalyticsReportRequestsResponse,
12
+ AnalyticsReportResponse,
13
+ AnalyticsReportSegmentsResponse,
14
+ )
15
+
16
+
17
+ class AppStoreConnectClientInterface(abc.ABC):
18
+ @abc.abstractmethod
19
+ def list_analytics_report_requests(self, app_id) -> AnalyticsReportRequestsResponse:
20
+ pass
21
+
22
+ @abc.abstractmethod
23
+ def list_analytics_reports(
24
+ self, req_id: str, report_name: str
25
+ ) -> AnalyticsReportResponse:
26
+ pass
27
+
28
+ @abc.abstractmethod
29
+ def list_report_instances(
30
+ self,
31
+ report_id: str,
32
+ granularity: str = "DAILY",
33
+ ) -> AnalyticsReportInstancesResponse:
34
+ pass
35
+
36
+ @abc.abstractmethod
37
+ def list_report_segments(self, instance_id: str) -> AnalyticsReportSegmentsResponse:
38
+ pass
39
+
40
+
41
+ class AppStoreConnectClient(AppStoreConnectClientInterface):
42
+ def __init__(self, key: bytes, key_id: str, issuer_id: str):
43
+ self.__key = key
44
+ self.__key_id = key_id
45
+ self.__issuer_id = issuer_id
46
+
47
+ def list_analytics_report_requests(self, app_id) -> AnalyticsReportRequestsResponse:
48
+ res = requests.get(
49
+ f"https://api.appstoreconnect.apple.com/v1/apps/{app_id}/analyticsReportRequests",
50
+ auth=self.auth,
51
+ )
52
+ res.raise_for_status()
53
+
54
+ return AnalyticsReportRequestsResponse.from_json(res.text) # type: ignore
55
+
56
+ def list_analytics_reports(
57
+ self, req_id: str, report_name: str
58
+ ) -> AnalyticsReportResponse:
59
+ params = {"filter[name]": report_name}
60
+ res = requests.get(
61
+ f"https://api.appstoreconnect.apple.com/v1/analyticsReportRequests/{req_id}/reports",
62
+ auth=self.auth,
63
+ params=params,
64
+ )
65
+ res.raise_for_status()
66
+ return AnalyticsReportResponse.from_json(res.text) # type: ignore
67
+
68
+ def list_report_instances(
69
+ self,
70
+ report_id: str,
71
+ granularity: str = "DAILY",
72
+ ) -> AnalyticsReportInstancesResponse:
73
+ data = []
74
+ url = f"https://api.appstoreconnect.apple.com/v1/analyticsReports/{report_id}/instances"
75
+ params: Optional[dict] = {"filter[granularity]": granularity}
76
+
77
+ while url:
78
+ res = requests.get(url, auth=self.auth, params=params)
79
+ res.raise_for_status()
80
+
81
+ response_data = AnalyticsReportInstancesResponse.from_json(res.text) # type: ignore
82
+ data.extend(response_data.data)
83
+
84
+ url = response_data.links.next
85
+ params = None # Clear params for subsequent requests
86
+
87
+ return AnalyticsReportInstancesResponse(
88
+ data=data,
89
+ links=response_data.links,
90
+ meta=response_data.meta,
91
+ )
92
+
93
+ def list_report_segments(self, instance_id: str) -> AnalyticsReportSegmentsResponse:
94
+ segments = []
95
+ url = f"https://api.appstoreconnect.apple.com/v1/analyticsReportInstances/{instance_id}/segments"
96
+
97
+ while url:
98
+ res = requests.get(url, auth=self.auth)
99
+ res.raise_for_status()
100
+
101
+ response_data = AnalyticsReportSegmentsResponse.from_json(res.text) # type: ignore
102
+ segments.extend(response_data.data)
103
+
104
+ url = response_data.links.next
105
+
106
+ return AnalyticsReportSegmentsResponse(
107
+ data=segments, links=response_data.links, meta=response_data.meta
108
+ )
109
+
110
+ def auth(self, req: PreparedRequest) -> PreparedRequest:
111
+ headers = {
112
+ "alg": "ES256",
113
+ "kid": self.__key_id,
114
+ }
115
+ payload = {
116
+ "iss": self.__issuer_id,
117
+ "exp": int(time.time()) + 600,
118
+ "aud": "appstoreconnect-v1",
119
+ }
120
+ req.headers["Authorization"] = jwt.encode(
121
+ payload,
122
+ self.__key,
123
+ algorithm="ES256",
124
+ headers=headers,
125
+ )
126
+ return req
@@ -0,0 +1,15 @@
1
+ class NoReportsFoundError(Exception):
2
+ def __init__(self):
3
+ super().__init__("No Report instances found for the given date range")
4
+
5
+
6
+ class NoOngoingReportRequestsFoundError(Exception):
7
+ def __init__(self):
8
+ super().__init__(
9
+ "No ONGOING report requests found (or they're stopped due to inactivity)"
10
+ )
11
+
12
+
13
+ class NoSuchReportError(Exception):
14
+ def __init__(self, report_name):
15
+ super().__init__(f"No such report found: {report_name}")
@@ -0,0 +1,117 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Optional
3
+
4
+ from dataclasses_json import dataclass_json
5
+
6
+
7
+ @dataclass_json
8
+ @dataclass
9
+ class Links:
10
+ self: str
11
+ next: Optional[str] = None
12
+
13
+
14
+ @dataclass_json
15
+ @dataclass
16
+ class ReportRequestAttributes:
17
+ accessType: str
18
+ stoppedDueToInactivity: bool
19
+
20
+
21
+ @dataclass_json
22
+ @dataclass
23
+ class ReportAttributes:
24
+ name: str
25
+ category: str
26
+
27
+
28
+ @dataclass_json
29
+ @dataclass
30
+ class ReportInstanceAttributes:
31
+ granularity: str
32
+ processingDate: str
33
+
34
+
35
+ @dataclass_json
36
+ @dataclass
37
+ class ReportSegmentAttributes:
38
+ checksum: str
39
+ url: str
40
+ sizeInBytes: int
41
+
42
+
43
+ @dataclass_json
44
+ @dataclass
45
+ class ReportRequest:
46
+ type: str
47
+ id: str
48
+ attributes: ReportRequestAttributes
49
+
50
+
51
+ @dataclass_json
52
+ @dataclass
53
+ class Report:
54
+ type: str
55
+ id: str
56
+ attributes: ReportAttributes
57
+
58
+
59
+ @dataclass_json
60
+ @dataclass
61
+ class ReportInstance:
62
+ type: str
63
+ id: str
64
+ attributes: ReportInstanceAttributes
65
+
66
+
67
+ @dataclass_json
68
+ @dataclass
69
+ class ReportSegment:
70
+ type: str
71
+ id: str
72
+ attributes: ReportSegmentAttributes
73
+
74
+
75
+ @dataclass_json
76
+ @dataclass
77
+ class PagingMeta:
78
+ total: int
79
+ limit: int
80
+
81
+
82
+ @dataclass_json
83
+ @dataclass
84
+ class Meta:
85
+ paging: PagingMeta
86
+
87
+
88
+ @dataclass_json
89
+ @dataclass
90
+ class AnalyticsReportRequestsResponse:
91
+ data: List[ReportRequest]
92
+ meta: Meta
93
+ links: Links
94
+
95
+
96
+ @dataclass_json
97
+ @dataclass
98
+ class AnalyticsReportResponse:
99
+ data: List[Report]
100
+ meta: Meta
101
+ links: Links
102
+
103
+
104
+ @dataclass_json
105
+ @dataclass
106
+ class AnalyticsReportInstancesResponse:
107
+ data: List[ReportInstance]
108
+ meta: Meta
109
+ links: Links
110
+
111
+
112
+ @dataclass_json
113
+ @dataclass
114
+ class AnalyticsReportSegmentsResponse:
115
+ data: List[ReportSegment]
116
+ meta: Meta
117
+ links: Links
@@ -0,0 +1,179 @@
1
+ from dataclasses import dataclass
2
+ from typing import List
3
+
4
+
5
+ @dataclass
6
+ class ResourceConfig:
7
+ name: str
8
+ primary_key: List[str]
9
+ columns: dict
10
+ report_name: str
11
+
12
+
13
+ RESOURCES: List[ResourceConfig] = [
14
+ ResourceConfig(
15
+ name="app-downloads-detailed",
16
+ primary_key=[
17
+ "App Apple Identifier",
18
+ "App Name",
19
+ "App Version",
20
+ "Campaign",
21
+ "Date",
22
+ "Device",
23
+ "Download Type",
24
+ "Page Title",
25
+ "Page Type",
26
+ "Platform Version",
27
+ "Pre-Order",
28
+ "Source Info",
29
+ "Source Type",
30
+ "Territory",
31
+ ],
32
+ columns={
33
+ "Date": {"data_type": "date"},
34
+ "App Apple Identifier": {"data_type": "bigint"},
35
+ "Counts": {"data_type": "bigint"},
36
+ "processing_date": {"data_type": "date"},
37
+ },
38
+ report_name="App Downloads Detailed",
39
+ ),
40
+ ResourceConfig(
41
+ name="app-store-discovery-and-engagement-detailed",
42
+ primary_key=[
43
+ "App Apple Identifier",
44
+ "App Name",
45
+ "Campaign",
46
+ "Date",
47
+ "Device",
48
+ "Engagement Type",
49
+ "Event",
50
+ "Page Title",
51
+ "Page Type",
52
+ "Platform Version",
53
+ "Source Info",
54
+ "Source Type",
55
+ "Territory",
56
+ ],
57
+ columns={
58
+ "Date": {"data_type": "date"},
59
+ "App Apple Identifier": {"data_type": "bigint"},
60
+ "Counts": {"data_type": "bigint"},
61
+ "Unique Counts": {"data_type": "bigint"},
62
+ "processing_date": {"data_type": "date"},
63
+ },
64
+ report_name="App Store Discovery and Engagement Detailed",
65
+ ),
66
+ ResourceConfig(
67
+ name="app-sessions-detailed",
68
+ primary_key=[
69
+ "Date",
70
+ "App Name",
71
+ "App Apple Identifier",
72
+ "App Version",
73
+ "Device",
74
+ "Platform Version",
75
+ "Source Type",
76
+ "Source Info",
77
+ "Campaign",
78
+ "Page Type",
79
+ "Page Title",
80
+ "App Download Date",
81
+ "Territory",
82
+ ],
83
+ columns={
84
+ "Date": {"data_type": "date"},
85
+ "App Apple Identifier": {"data_type": "bigint"},
86
+ "Sessions": {"data_type": "bigint"},
87
+ "Total Session Duration": {"data_type": "bigint"},
88
+ "Unique Devices": {"data_type": "bigint"},
89
+ "processing_date": {"data_type": "date"},
90
+ },
91
+ report_name="App Sessions Detailed",
92
+ ),
93
+ ResourceConfig(
94
+ name="app-store-installation-and-deletion-detailed",
95
+ primary_key=[
96
+ "App Apple Identifier",
97
+ "App Download Date",
98
+ "App Name",
99
+ "App Version",
100
+ "Campaign",
101
+ "Counts",
102
+ "Date",
103
+ "Device",
104
+ "Download Type",
105
+ "Event",
106
+ "Page Title",
107
+ "Page Type",
108
+ "Platform Version",
109
+ "Source Info",
110
+ "Source Type",
111
+ "Territory",
112
+ "Unique Devices",
113
+ ],
114
+ columns={
115
+ "Date": {"data_type": "date"},
116
+ "App Apple Identifier": {"data_type": "bigint"},
117
+ "Counts": {"data_type": "bigint"},
118
+ "Unique Devices": {"data_type": "bigint"},
119
+ "App Download Date": {"data_type": "date"},
120
+ "processing_date": {"data_type": "date"},
121
+ },
122
+ report_name="App Store Installation and Deletion Detailed",
123
+ ),
124
+ ResourceConfig(
125
+ name="app-store-purchases-detailed",
126
+ primary_key=[
127
+ "App Apple Identifier",
128
+ "App Download Date",
129
+ "App Name",
130
+ "Campaign",
131
+ "Content Apple Identifier",
132
+ "Content Name",
133
+ "Date",
134
+ "Device",
135
+ "Page Title",
136
+ "Page Type",
137
+ "Payment Method",
138
+ "Platform Version",
139
+ "Pre-Order",
140
+ "Purchase Type",
141
+ "Source Info",
142
+ "Source Type",
143
+ "Territory",
144
+ ],
145
+ columns={
146
+ "Date": {"data_type": "date"},
147
+ "App Apple Identifier": {"data_type": "bigint"},
148
+ "App Download Date": {"data_type": "date"},
149
+ "Content Apple Identifier": {"data_type": "bigint"},
150
+ "Purchases": {"data_type": "bigint"},
151
+ "Proceeds In USD": {"data_type": "double"},
152
+ "Sales In USD": {"data_type": "double"},
153
+ "Paying Users": {"data_type": "bigint"},
154
+ "processing_date": {"data_type": "date"},
155
+ },
156
+ report_name="App Store Purchases Detailed",
157
+ ),
158
+ ResourceConfig(
159
+ name="app-crashes-expanded",
160
+ primary_key=[
161
+ "App Name",
162
+ "App Version",
163
+ "Build",
164
+ "Date",
165
+ "Device",
166
+ "Platform",
167
+ "Release Type",
168
+ "Territory",
169
+ ],
170
+ columns={
171
+ "Date": {"data_type": "date"},
172
+ "processing_date": {"data_type": "date"},
173
+ "App Apple Identifier": {"data_type": "bigint"},
174
+ "Count": {"data_type": "bigint"},
175
+ "Unique Devices": {"data_type": "bigint"},
176
+ },
177
+ report_name="App Crashes Expanded",
178
+ ),
179
+ ]
ingestr/src/errors.py ADDED
@@ -0,0 +1,10 @@
1
+ class MissingValueError(Exception):
2
+ def __init__(self, value, source):
3
+ super().__init__(f"{value} is required to connect to {source}")
4
+
5
+
6
+ class UnsupportedResourceError(Exception):
7
+ def __init__(self, resource, source):
8
+ super().__init__(
9
+ f"Resource '{resource}' is not supported for {source} source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
10
+ )
ingestr/src/factory.py CHANGED
@@ -18,6 +18,7 @@ from ingestr.src.destinations import (
18
18
  from ingestr.src.sources import (
19
19
  AdjustSource,
20
20
  AirtableSource,
21
+ AppleAppStoreSource,
21
22
  AppsflyerSource,
22
23
  ArrowMemoryMappedSource,
23
24
  AsanaSource,
@@ -122,6 +123,7 @@ class SourceDestinationFactory:
122
123
  "asana": AsanaSource,
123
124
  "tiktok": TikTokSource,
124
125
  "googleanalytics": GoogleAnalyticsSource,
126
+ "appstore": AppleAppStoreSource,
125
127
  }
126
128
  destinations: Dict[str, Type[DestinationProtocol]] = {
127
129
  "bigquery": BigQueryDestination,
ingestr/src/sources.py CHANGED
@@ -3,7 +3,7 @@ import csv
3
3
  import json
4
4
  import os
5
5
  import re
6
- from datetime import date, datetime
6
+ from datetime import date, datetime, timedelta
7
7
  from typing import (
8
8
  Any,
9
9
  Callable,
@@ -46,10 +46,16 @@ from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
46
46
  from ingestr.src.adjust.adjust_helpers import parse_filters
47
47
  from ingestr.src.airtable import airtable_source
48
48
  from ingestr.src.appsflyer._init_ import appsflyer_source
49
+ from ingestr.src.appstore import app_store
50
+ from ingestr.src.appstore.client import AppStoreConnectClient
49
51
  from ingestr.src.arrow import memory_mapped_arrow
50
52
  from ingestr.src.asana_source import asana_source
51
53
  from ingestr.src.chess import source
52
54
  from ingestr.src.dynamodb import dynamodb
55
+ from ingestr.src.errors import (
56
+ MissingValueError,
57
+ UnsupportedResourceError,
58
+ )
53
59
  from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
54
60
  from ingestr.src.filesystem import readers
55
61
  from ingestr.src.filters import table_adapter_exclude_columns
@@ -1424,3 +1430,76 @@ class GitHubSource:
1424
1430
  raise ValueError(
1425
1431
  f"Resource '{table}' is not supported for GitHub source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
1426
1432
  )
1433
+
1434
+
1435
+ class AppleAppStoreSource:
1436
+ def handles_incrementality(self) -> bool:
1437
+ return True
1438
+
1439
+ def init_client(
1440
+ self,
1441
+ key_id: str,
1442
+ issuer_id: str,
1443
+ key_path: Optional[List[str]],
1444
+ key_base64: Optional[List[str]],
1445
+ ):
1446
+ key = None
1447
+ if key_path is not None:
1448
+ with open(key_path[0]) as f:
1449
+ key = f.read()
1450
+ else:
1451
+ key = base64.b64decode(key_base64[0]).decode() # type: ignore
1452
+
1453
+ return AppStoreConnectClient(key.encode(), key_id, issuer_id)
1454
+
1455
+ def dlt_source(self, uri: str, table: str, **kwargs):
1456
+ if kwargs.get("incremental_key"):
1457
+ raise ValueError(
1458
+ "App Store takes care of incrementality on its own, you should not provide incremental_key"
1459
+ )
1460
+ parsed_uri = urlparse(uri)
1461
+ params = parse_qs(parsed_uri.query)
1462
+
1463
+ key_id = params.get("key_id")
1464
+ if key_id is None:
1465
+ raise MissingValueError("key_id", "App Store")
1466
+
1467
+ key_path = params.get("key_path")
1468
+ key_base64 = params.get("key_base64")
1469
+ key_available = any(
1470
+ map(
1471
+ lambda x: x is not None,
1472
+ [key_path, key_base64],
1473
+ )
1474
+ )
1475
+ if key_available is False:
1476
+ raise MissingValueError("key_path or key_base64", "App Store")
1477
+
1478
+ issuer_id = params.get("issuer_id")
1479
+ if issuer_id is None:
1480
+ raise MissingValueError("issuer_id", "App Store")
1481
+
1482
+ client = self.init_client(key_id[0], issuer_id[0], key_path, key_base64)
1483
+
1484
+ app_ids = params.get("app_id")
1485
+ if ":" in table:
1486
+ intended_table, app_ids_override = table.split(":", maxsplit=1)
1487
+ app_ids = app_ids_override.split(",")
1488
+ table = intended_table
1489
+
1490
+ if app_ids is None:
1491
+ raise MissingValueError("app_id", "App Store")
1492
+
1493
+ src = app_store(
1494
+ client,
1495
+ app_ids,
1496
+ start_date=kwargs.get(
1497
+ "interval_start", datetime.now() - timedelta(days=30)
1498
+ ),
1499
+ end_date=kwargs.get("interval_end"),
1500
+ )
1501
+
1502
+ if table not in src.resources:
1503
+ raise UnsupportedResourceError(table, "AppStore")
1504
+
1505
+ return src.with_resources(table)
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.12.6"
1
+ __version__ = "0.12.8"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.6
3
+ Version: 0.12.8
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -17,6 +17,7 @@ Requires-Python: >=3.9
17
17
  Requires-Dist: asana==3.2.3
18
18
  Requires-Dist: confluent-kafka>=2.6.1
19
19
  Requires-Dist: databricks-sql-connector==2.9.3
20
+ Requires-Dist: dataclasses-json==0.6.7
20
21
  Requires-Dist: dlt==1.5.0
21
22
  Requires-Dist: duckdb-engine==0.13.5
22
23
  Requires-Dist: duckdb==1.1.3
@@ -1,17 +1,23 @@
1
1
  ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
4
- ingestr/src/factory.py,sha256=aE7TjHzONb4DKYcfh_6-CJJfvs4lmw7iUySvSm4yQbM,4516
4
+ ingestr/src/errors.py,sha256=MrdLY5Gpr3g3qbYjl-U8-m8kxBJQOJo4ZVOsQpQbRR8,447
5
+ ingestr/src/factory.py,sha256=jjxieXpSK02tNcg7f_t5xxqs49EnI739smRLX8qLsUU,4582
5
6
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
6
- ingestr/src/sources.py,sha256=GIskUoVL82x_mLerU9cgdixBNNhzBnDN-_MDraqK7hY,51166
7
+ ingestr/src/sources.py,sha256=dMXTfykbAZTN8SNpOWJbtl10krdJfg12S13at3Z4L38,53647
7
8
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
8
9
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
9
- ingestr/src/version.py,sha256=vb8hPdq1CrFlRl6aBYGOWE4MPv-N84JJm1f3KFvG8o4,23
10
- ingestr/src/adjust/__init__.py,sha256=NaRNwDhItG8Q7vUHw7zQvyfWjmT32M0CSc5ufjmBM9U,3067
11
- ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
10
+ ingestr/src/version.py,sha256=F7xxYe0dXryqS1cGEXFikx8AI7-UsZzdi89hJdyx-b0,23
11
+ ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
12
+ ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
12
13
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
13
14
  ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
14
15
  ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
16
+ ingestr/src/appstore/__init__.py,sha256=s39r3YUjdfStA6lBcPzqQzestiojC3U41LB3F6Y8gG0,4538
17
+ ingestr/src/appstore/client.py,sha256=qY9nBZPNIAveR-Dn-pW141Mr9xi9LMOz2HHfnfueHvE,3975
18
+ ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2Ms,481
19
+ ingestr/src/appstore/models.py,sha256=tW1JSATHBIxZ6a77-RTCBQptJk6iRC8fWcmx4NW7SVA,1716
20
+ ingestr/src/appstore/resources.py,sha256=DJxnNrBohVV0uSeruGV-N_e7UHSlhMhjhYNYdBuqECU,5375
15
21
  ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0,2848
16
22
  ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
17
23
  ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
@@ -85,8 +91,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
85
91
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
86
92
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
87
93
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
88
- ingestr-0.12.6.dist-info/METADATA,sha256=y-o_BL8nj7pVQU3sSaz9UJ9XsNVUi8Rjf5G0vNGi6io,7985
89
- ingestr-0.12.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
90
- ingestr-0.12.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
91
- ingestr-0.12.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
92
- ingestr-0.12.6.dist-info/RECORD,,
94
+ ingestr-0.12.8.dist-info/METADATA,sha256=zbhdTjqZrWDsmnXTxy1tfC79Q75vzHc-7UWLM62vocQ,8024
95
+ ingestr-0.12.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
96
+ ingestr-0.12.8.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
97
+ ingestr-0.12.8.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
98
+ ingestr-0.12.8.dist-info/RECORD,,