ingestr 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -323,10 +323,12 @@ def ingest(
323
323
  else "Platform-specific"
324
324
  )
325
325
 
326
+ source_table_print = source_table.split(":")[0]
327
+
326
328
  print()
327
329
  print("[bold green]Initiated the pipeline with the following:[/bold green]")
328
330
  print(
329
- f"[bold yellow] Source:[/bold yellow] {factory.source_scheme} / {source_table}"
331
+ f"[bold yellow] Source:[/bold yellow] {factory.source_scheme} / {source_table_print}"
330
332
  )
331
333
  print(
332
334
  f"[bold yellow] Destination:[/bold yellow] {factory.destination_scheme} / {dest_table}"
@@ -0,0 +1,100 @@
1
+ from typing import Optional, Sequence
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.sources import DltResource
6
+
7
+ from .adjust_helpers import DEFAULT_DIMENSIONS, DEFAULT_METRICS, AdjustAPI
8
+
9
+ REQUIRED_CUSTOM_DIMENSIONS = [
10
+ "hour",
11
+ "day",
12
+ "week",
13
+ "month",
14
+ "quarter",
15
+ "year",
16
+ ]
17
+ KNOWN_TYPE_HINTS = {
18
+ "hour": {"data_type": "timestamp"},
19
+ "day": {"data_type": "date"},
20
+ "week": {"data_type": "text"},
21
+ "month": {"data_type": "text"},
22
+ "quarter": {"data_type": "text"},
23
+ "year": {"data_type": "text"},
24
+ "campaign": {"data_type": "text"},
25
+ "adgroup": {"data_type": "text"},
26
+ "creative": {"data_type": "text"},
27
+ # metrics
28
+ "installs": {"data_type": "bigint"},
29
+ "clicks": {"data_type": "bigint"},
30
+ "cost": {"data_type": "decimal"},
31
+ "network_cost": {"data_type": "decimal"},
32
+ "impressions": {"data_type": "bigint"},
33
+ "ad_revenue": {"data_type": "decimal"},
34
+ "all_revenue": {"data_type": "decimal"},
35
+ }
36
+
37
+
38
+ @dlt.source(max_table_nesting=0)
39
+ def adjust_source(
40
+ start_date: pendulum.DateTime,
41
+ end_date: pendulum.DateTime,
42
+ api_key: str,
43
+ dimensions: Optional[list[str]] = None,
44
+ metrics: Optional[list[str]] = None,
45
+ merge_key: Optional[str] = None,
46
+ filters: Optional[dict] = None,
47
+ ) -> Sequence[DltResource]:
48
+ @dlt.resource(write_disposition="merge", merge_key="day")
49
+ def campaigns():
50
+ adjust_api = AdjustAPI(api_key=api_key)
51
+ yield from adjust_api.fetch_report_data(
52
+ start_date=start_date,
53
+ end_date=end_date,
54
+ dimensions=DEFAULT_DIMENSIONS,
55
+ metrics=DEFAULT_METRICS,
56
+ filters=filters,
57
+ )
58
+
59
+ @dlt.resource(write_disposition="merge", merge_key="day")
60
+ def creatives():
61
+ adjust_api = AdjustAPI(api_key=api_key)
62
+ yield from adjust_api.fetch_report_data(
63
+ start_date=start_date,
64
+ end_date=end_date,
65
+ dimensions=DEFAULT_DIMENSIONS + ["adgroup", "creative"],
66
+ metrics=DEFAULT_METRICS,
67
+ filters=filters,
68
+ )
69
+
70
+ merge_key = merge_key
71
+ for dimension in REQUIRED_CUSTOM_DIMENSIONS:
72
+ if dimension in dimensions:
73
+ merge_key = dimension
74
+ break
75
+
76
+ type_hints = {}
77
+ for dimension in dimensions:
78
+ if dimension in KNOWN_TYPE_HINTS:
79
+ type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
80
+ for metric in metrics:
81
+ if metric in KNOWN_TYPE_HINTS:
82
+ type_hints[metric] = KNOWN_TYPE_HINTS[metric]
83
+
84
+ @dlt.resource(
85
+ write_disposition={"disposition": "merge", "strategy": "delete+insert"},
86
+ merge_key=merge_key,
87
+ primary_key=dimensions,
88
+ columns=type_hints,
89
+ )
90
+ def custom():
91
+ adjust_api = AdjustAPI(api_key=api_key)
92
+ yield from adjust_api.fetch_report_data(
93
+ start_date=start_date,
94
+ end_date=end_date,
95
+ dimensions=dimensions,
96
+ metrics=metrics,
97
+ filters=filters,
98
+ )
99
+
100
+ return campaigns, creatives, custom
@@ -1,3 +1,6 @@
1
+ from typing import Optional
2
+
3
+ import pendulum
1
4
  import requests
2
5
  from dlt.sources.helpers.requests import Client
3
6
  from requests.exceptions import HTTPError
@@ -32,33 +35,32 @@ class AdjustAPI:
32
35
 
33
36
  def fetch_report_data(
34
37
  self,
35
- start_date,
36
- end_date,
38
+ start_date: pendulum.DateTime,
39
+ end_date: pendulum.DateTime,
37
40
  dimensions=DEFAULT_DIMENSIONS,
38
41
  metrics=DEFAULT_METRICS,
39
- utc_offset="+00:00",
40
- ad_spend_mode="network",
41
- attribution_source="first",
42
- attribution_type="all",
43
- cohort_maturity="immature",
44
- reattributed="all",
45
- sandbox="false",
42
+ filters: Optional[dict] = None,
46
43
  ):
47
44
  headers = {"Authorization": f"Bearer {self.api_key}"}
48
- comma_separated_dimensions = ",".join(dimensions)
49
- comma_separated_metrics = ",".join(metrics)
50
- params = {
51
- "date_period": f"{start_date}:{end_date}",
52
- "dimensions": comma_separated_dimensions,
53
- "metrics": comma_separated_metrics,
54
- "utc_offset": utc_offset,
55
- "ad_spend_mode": ad_spend_mode,
56
- "attribution_source": attribution_source,
57
- "attribution_type": attribution_type,
58
- "cohort_maturity": cohort_maturity,
59
- "reattributed": reattributed,
60
- "sandbox": sandbox,
61
- }
45
+ params = {}
46
+
47
+ if filters:
48
+ for key, value in filters.items():
49
+ if isinstance(value, list):
50
+ params[key] = ",".join(value)
51
+ else:
52
+ params[key] = value
53
+
54
+ params["date_period"] = (
55
+ f"{start_date.format('YYYY-MM-DD')}:{end_date.format('YYYY-MM-DD')}"
56
+ )
57
+ params["dimensions"] = ",".join(dimensions)
58
+ params["metrics"] = ",".join(metrics)
59
+
60
+ if start_date > end_date:
61
+ raise ValueError(
62
+ f"Invalid date range: Start date ({start_date}) must be earlier than end date ({end_date})."
63
+ )
62
64
 
63
65
  def retry_on_limit(
64
66
  response: requests.Response, exception: BaseException
@@ -80,3 +82,24 @@ class AdjustAPI:
80
82
  yield items
81
83
  else:
82
84
  raise HTTPError(f"Request failed with status code: {response.status_code}")
85
+
86
+
87
+ def parse_filters(filters_raw: str) -> dict:
88
+ # Parse filter string like "key1=value1,key2=value2,value3,value4"
89
+ filters = {}
90
+ current_key = None
91
+
92
+ for item in filters_raw.split(","):
93
+ if "=" in item:
94
+ # Start of a new key-value pair
95
+ key, value = item.split("=")
96
+ filters[key] = [value] # Always start with a list
97
+ current_key = key
98
+ elif current_key is not None:
99
+ # Additional value for the current key
100
+ filters[current_key].append(item)
101
+
102
+ # Convert single-item lists to simple values
103
+ filters = {k: v[0] if len(v) == 1 else v for k, v in filters.items()}
104
+
105
+ return filters
ingestr/src/sources.py CHANGED
@@ -1,15 +1,18 @@
1
1
  import base64
2
2
  import csv
3
3
  import json
4
- from datetime import date, datetime
4
+ from datetime import date
5
5
  from typing import Any, Callable, Optional
6
6
  from urllib.parse import parse_qs, urlparse
7
7
 
8
8
  import dlt
9
+ import pendulum
9
10
  from dlt.common.configuration.specs import AwsCredentials
11
+ from dlt.common.time import ensure_pendulum_datetime
10
12
  from dlt.common.typing import TSecretStrValue
11
13
 
12
- from ingestr.src.adjust._init_ import adjust_source
14
+ from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
15
+ from ingestr.src.adjust.adjust_helpers import parse_filters
13
16
  from ingestr.src.airtable import airtable_source
14
17
  from ingestr.src.appsflyer._init_ import appsflyer_source
15
18
  from ingestr.src.arrow import memory_mapped_arrow
@@ -719,10 +722,10 @@ class KafkaSource:
719
722
 
720
723
  class AdjustSource:
721
724
  def handles_incrementality(self) -> bool:
722
- return True
725
+ return False
723
726
 
724
727
  def dlt_source(self, uri: str, table: str, **kwargs):
725
- if kwargs.get("incremental_key"):
728
+ if kwargs.get("incremental_key") and not table.startswith("custom:"):
726
729
  raise ValueError(
727
730
  "Adjust takes care of incrementality on its own, you should not provide incremental_key"
728
731
  )
@@ -734,25 +737,62 @@ class AdjustSource:
734
737
  if not api_key:
735
738
  raise ValueError("api_key in the URI is required to connect to Adjust")
736
739
 
737
- interval_start = kwargs.get("interval_start")
738
- interval_end = kwargs.get("interval_end")
740
+ lookback_days = int(source_params.get("lookback_days", [30])[0])
739
741
 
740
742
  start_date = (
741
- interval_start.strftime("%Y-%m-%d") if interval_start else "2000-01-01"
742
- )
743
- end_date = (
744
- interval_end.strftime("%Y-%m-%d")
745
- if interval_end
746
- else datetime.now().strftime("%Y-%m-%d")
743
+ pendulum.now()
744
+ .replace(hour=0, minute=0, second=0, microsecond=0)
745
+ .subtract(days=lookback_days)
747
746
  )
747
+ if kwargs.get("interval_start"):
748
+ start_date = (
749
+ ensure_pendulum_datetime(str(kwargs.get("interval_start")))
750
+ .replace(hour=0, minute=0, second=0, microsecond=0)
751
+ .subtract(days=lookback_days)
752
+ )
748
753
 
749
- Endpoint = None
750
- if table in ["campaigns", "creatives"]:
751
- Endpoint = table
754
+ end_date = pendulum.now()
755
+ if kwargs.get("interval_end"):
756
+ end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
757
+
758
+ dimensions = None
759
+ metrics = None
760
+ filters = []
761
+ if table.startswith("custom:"):
762
+ fields = table.split(":")
763
+ if len(fields) != 3 and len(fields) != 4:
764
+ raise ValueError(
765
+ "Invalid Adjust custom table format. Expected format: custom:<dimensions>,<metrics> or custom:<dimensions>:<metrics>:<filters>"
766
+ )
767
+
768
+ dimensions = fields[1].split(",")
769
+ metrics = fields[2].split(",")
770
+ table = "custom"
771
+
772
+ found = False
773
+ for dimension in dimensions:
774
+ if dimension in REQUIRED_CUSTOM_DIMENSIONS:
775
+ found = True
776
+ break
777
+
778
+ if not found:
779
+ raise ValueError(
780
+ f"At least one of the required dimensions is missing for custom Adjust report: {REQUIRED_CUSTOM_DIMENSIONS}"
781
+ )
782
+
783
+ if len(fields) == 4:
784
+ filters_raw = fields[3]
785
+ filters = parse_filters(filters_raw)
752
786
 
753
787
  return adjust_source(
754
- start_date=start_date, end_date=end_date, api_key=api_key[0]
755
- ).with_resources(Endpoint)
788
+ start_date=start_date,
789
+ end_date=end_date,
790
+ api_key=api_key[0],
791
+ dimensions=dimensions,
792
+ metrics=metrics,
793
+ merge_key=kwargs.get("merge_key"),
794
+ filters=filters,
795
+ ).with_resources(table)
756
796
 
757
797
 
758
798
  class AppsflyerSource:
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.9.2"
1
+ __version__ = "0.9.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.9.2
3
+ Version: 0.9.4
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -51,7 +51,7 @@ Description-Content-Type: text/markdown
51
51
 
52
52
  <div align="center">
53
53
  <img src="https://github.com/bruin-data/ingestr/blob/main/resources/ingestr.svg?raw=true" width="500" />
54
- <p>Ingest & copy data from any source to any destination without any code</p>
54
+ <p>Copy data from any source to any destination without any code</p>
55
55
  <img src="https://github.com/bruin-data/ingestr/blob/main/resources/demo.gif?raw=true" width="750" />
56
56
  </div>
57
57
 
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
63
63
 
64
64
  ---
65
65
 
66
- Ingestr is a command-line application that allows you to ingest data from any source into any destination using simple command-line flags, no code necessary.
66
+ ingestr is a command-line app that allows you to ingest data from any source into any destination using simple command-line flags, no code necessary.
67
67
 
68
68
  - ✨ copy data from your database into any destination
69
69
  - ➕ incremental loading: `append`, `merge` or `delete+insert`
@@ -89,10 +89,10 @@ ingestr ingest \
89
89
 
90
90
  That's it.
91
91
 
92
- This command will:
92
+ This command:
93
93
 
94
- - get the table `public.some_data` from the Postgres instance.
95
- - upload this data to your BigQuery warehouse under the schema `ingestr` and table `some_data`.
94
+ - gets the table `public.some_data` from the Postgres instance.
95
+ - uploads this data to your BigQuery warehouse under the schema `ingestr` and table `some_data`.
96
96
 
97
97
  ## Documentation
98
98
 
@@ -102,7 +102,7 @@ You can see the full documentation [here](https://bruin-data.github.io/ingestr/g
102
102
 
103
103
  Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
104
104
 
105
- ## Supported Sources & Destinations
105
+ ## Supported sources & destinations
106
106
 
107
107
  <table>
108
108
  <tr>
@@ -256,7 +256,7 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
256
256
  </tr>
257
257
  </table>
258
258
 
259
- More to come soon!
259
+ Feel free to create an issue if you'd like to see support for another source or destination.
260
260
 
261
261
  ## Acknowledgements
262
262
 
@@ -1,12 +1,12 @@
1
- ingestr/main.py,sha256=U66TM57ePv-RdoAftQ0pFZx8woZUQnLepKa50C-bA5I,17655
1
+ ingestr/main.py,sha256=B8TAQotJoYSvmaQQm33o2lv99OVLYNz-1Aw_fgQahwE,17718
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
4
4
  ingestr/src/factory.py,sha256=ft81B-YJgvEROkHAZjMjTIS7IYvle-uZQv45b7-Wfk0,4947
5
- ingestr/src/sources.py,sha256=iZbCY-pzv6jbgdHOh0Vdsl3cBoC71eiFZgu_a5RoaDE,32188
5
+ ingestr/src/sources.py,sha256=0eLrkd3oVsK9bWG7rp2Asu5QNrAUP2ZeMIIkduEuLL8,33756
6
6
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
7
- ingestr/src/version.py,sha256=gqT-BGoeEItda9fICQDvLbxEjWRIBhFJxPxxKvmHLUo,22
8
- ingestr/src/adjust/_init_.py,sha256=_jJE3Ywvv-YyJ7ywICdht_X2Gnd1cKm6F1wAfnpXuWM,890
9
- ingestr/src/adjust/helpers.py,sha256=kkYC3MqMHLNucuQ50klZWrvd3o8VfUysNtZTQSsKZ_c,2588
7
+ ingestr/src/version.py,sha256=e56AvHfJCtG2ZwwINqsxINVbehWdKxMYgIDbjd7P-II,22
8
+ ingestr/src/adjust/__init__.py,sha256=oTM7XozDcMuUiCZ0w4gWEBXuCCtMZ0iBfkKdd2pVa1E,3007
9
+ ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
10
10
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
11
11
  ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
12
12
  ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
@@ -74,8 +74,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
74
74
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
75
75
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
76
76
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
77
- ingestr-0.9.2.dist-info/METADATA,sha256=ZnA32SeV-3jKU3g13UsczdNiKtvu1lXvF-Gb2mZUlzw,7004
78
- ingestr-0.9.2.dist-info/WHEEL,sha256=wukiCwsxxsuzcQTdnC_ZWHZECE4wwOh3xCCrap6i6Ts,87
79
- ingestr-0.9.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
80
- ingestr-0.9.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
81
- ingestr-0.9.2.dist-info/RECORD,,
77
+ ingestr-0.9.4.dist-info/METADATA,sha256=mQKVpJ_Z7Ur3CLMbieKRmUrTEV0KIUe3MSeH2A1AUoE,7058
78
+ ingestr-0.9.4.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
79
+ ingestr-0.9.4.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
80
+ ingestr-0.9.4.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
81
+ ingestr-0.9.4.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.26.0
2
+ Generator: hatchling 1.26.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,31 +0,0 @@
1
- from typing import Sequence
2
-
3
- import dlt
4
- from dlt.sources import DltResource
5
-
6
- from .helpers import DEFAULT_DIMENSIONS, AdjustAPI
7
-
8
-
9
- @dlt.source(max_table_nesting=0)
10
- def adjust_source(
11
- start_date: str,
12
- end_date: str,
13
- api_key: str,
14
- ) -> Sequence[DltResource]:
15
- @dlt.resource(write_disposition="merge", merge_key="day")
16
- def campaigns():
17
- adjust_api = AdjustAPI(api_key=api_key)
18
- yield from adjust_api.fetch_report_data(
19
- start_date=start_date,
20
- end_date=end_date,
21
- )
22
-
23
- @dlt.resource(write_disposition="merge", merge_key="day")
24
- def creatives():
25
- dimensions = DEFAULT_DIMENSIONS + ["adgroup", "creative"]
26
- adjust_api = AdjustAPI(api_key=api_key)
27
- yield from adjust_api.fetch_report_data(
28
- start_date=start_date, end_date=end_date, dimensions=dimensions
29
- )
30
-
31
- return campaigns, creatives