ingestr 0.13.58__py3-none-any.whl → 0.13.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.58"
1
+ version = "v0.13.59"
ingestr/src/factory.py CHANGED
@@ -42,6 +42,7 @@ from ingestr.src.sources import (
42
42
  GoogleSheetsSource,
43
43
  GorgiasSource,
44
44
  HubspotSource,
45
+ IsocPulseSource,
45
46
  KafkaSource,
46
47
  KinesisSource,
47
48
  KlaviyoSource,
@@ -52,6 +53,7 @@ from ingestr.src.sources import (
52
53
  NotionSource,
53
54
  PersonioSource,
54
55
  PhantombusterSource,
56
+ PinterestSource,
55
57
  PipedriveSource,
56
58
  QuickBooksSource,
57
59
  S3Source,
@@ -172,8 +174,10 @@ class SourceDestinationFactory:
172
174
  "attio": AttioSource,
173
175
  "solidgate": SolidgateSource,
174
176
  "quickbooks": QuickBooksSource,
177
+ "isoc-pulse": IsocPulseSource,
175
178
  "smartsheet": SmartsheetSource,
176
179
  "sftp": SFTPSource,
180
+ "pinterest": PinterestSource,
177
181
  }
178
182
  destinations: Dict[str, Type[DestinationProtocol]] = {
179
183
  "bigquery": BigQueryDestination,
@@ -0,0 +1,159 @@
1
+ import math
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from typing import Any, Dict, Iterable, List, Optional
5
+
6
+ import dlt
7
+ from dlt.sources.rest_api import RESTAPIConfig, rest_api_resources
8
+
9
+ METRICS: Dict[str, str] = {
10
+ "dnssec_adoption": "dnssec/adoption",
11
+ "dnssec_tld_adoption": "dnssec/adoption",
12
+ "dnssec_validation": "dnssec/validation",
13
+ "http": "http",
14
+ "http3": "http3",
15
+ "https": "https",
16
+ "ipv6": "ipv6",
17
+ "net_loss": "net-loss",
18
+ "resilience": "resilience",
19
+ "roa": "roa",
20
+ "rov": "rov",
21
+ "tls": "tls",
22
+ "tls13": "tls13",
23
+ }
24
+
25
+
26
+ @dlt.source
27
+ def pulse_source(
28
+ token: str,
29
+ start_date: str,
30
+ metric: str,
31
+ opts: List[str],
32
+ end_date: Optional[str] = None,
33
+ ) -> Iterable[dlt.sources.DltResource]:
34
+ validate(metric, opts)
35
+ cfg = get_metric_cfg(metric, opts, start_date)
36
+ endpoint: Dict[str, Any] = {
37
+ "path": cfg.path,
38
+ "params": {
39
+ "start_date": "{incremental.start_value}",
40
+ **cfg.params,
41
+ },
42
+ "incremental": {
43
+ "cursor_path": "date",
44
+ "start_param": "start_date",
45
+ "end_param": "end_date",
46
+ "initial_value": start_date,
47
+ "end_value": end_date,
48
+ "range_start": "closed",
49
+ "range_end": "closed",
50
+ },
51
+ "paginator": "single_page",
52
+ }
53
+
54
+ if end_date is not None:
55
+ endpoint["params"]["end_date"] = end_date
56
+
57
+ resources = [
58
+ {
59
+ "name": metric,
60
+ "write_disposition": "merge",
61
+ "primary_key": "date",
62
+ "columns": {"date": {"data_type": "date"}},
63
+ "endpoint": endpoint,
64
+ }
65
+ ]
66
+
67
+ config: RESTAPIConfig = {
68
+ "client": {
69
+ "base_url": "https://pulse.internetsociety.org/api/",
70
+ "headers": {"Authorization": f"Bearer {token}"},
71
+ },
72
+ "resource_defaults": {
73
+ "write_disposition": "merge",
74
+ "primary_key": "date",
75
+ },
76
+ "resources": resources, # type:ignore
77
+ }
78
+ res = rest_api_resources(config)
79
+ if metric == "net_loss":
80
+ res[0].add_map(add_date(start_date))
81
+ yield from res
82
+
83
+
84
+ @dataclass
85
+ class MetricCfg:
86
+ path: str
87
+ params: Dict[str, Any]
88
+
89
+
90
+ def get_metric_cfg(metric: str, opts: List[str], start_date: str) -> MetricCfg:
91
+ path = METRICS.get(metric)
92
+ if path is None:
93
+ raise ValueError(f"Unknown metric '{metric}'.")
94
+ if len(opts) == 0:
95
+ return MetricCfg(path=path, params={})
96
+
97
+ if metric == "https":
98
+ return MetricCfg(
99
+ path=f"{path}/country/{opts[-1]}",
100
+ params={
101
+ "topsites": True if "topsites" in opts else False,
102
+ },
103
+ )
104
+ elif metric in ["dnssec_validation", "dnssec_tld_adoption"]:
105
+ return MetricCfg(path=f"{path}/country/{opts[-1]}", params={})
106
+ elif metric == "dnssec_adoption":
107
+ return MetricCfg(path=f"{path}/domains/{opts[-1]}", params={})
108
+ elif metric == "ipv6":
109
+ if "topsites" in opts:
110
+ return MetricCfg(path=path, params={"topsites": True})
111
+ return MetricCfg(path=f"{path}/country/{opts[-1]}", params={})
112
+ elif metric == "roa":
113
+ if len(opts) > 1:
114
+ return MetricCfg(
115
+ path=f"{path}/country/{opts[-1]}", params={"ip_version": opts[-2]}
116
+ )
117
+ return MetricCfg(path=path, params={"ip_version": opts[-1]})
118
+ elif metric == "net_loss":
119
+ return MetricCfg(
120
+ path=path,
121
+ params={
122
+ "country": opts[-1],
123
+ "shutdown_type": opts[-2],
124
+ },
125
+ )
126
+ elif metric == "resilience":
127
+ date = datetime.strptime(start_date, "%Y-%m-%d")
128
+ return MetricCfg(
129
+ path=path,
130
+ params={
131
+ "country": opts[-1],
132
+ "year": date.year,
133
+ "quarter": math.floor(date.month / 4) + 1,
134
+ },
135
+ )
136
+ else:
137
+ raise ValueError(
138
+ f"Unsupported metric '{metric}' with options {opts}. "
139
+ "Please check the metric and options."
140
+ )
141
+
142
+
143
+ def add_date(start_date: str):
144
+ def transform(item: dict):
145
+ item["date"] = start_date
146
+ return item
147
+
148
+ return transform
149
+
150
+
151
+ def validate(metric: str, opts: List[str]) -> None:
152
+ nopts = len(opts)
153
+ if metric == "net_loss" and nopts != 2:
154
+ raise ValueError(
155
+ "For 'net_loss' metric, two options are required: "
156
+ "'shutdown_type' and 'country'."
157
+ )
158
+ if nopts > 0 and metric in ["http", "http3", "tls", "tls13", "rov"]:
159
+ raise ValueError(f"metric '{metric}' does not support options. ")
@@ -0,0 +1,82 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.time import ensure_pendulum_datetime
6
+ from dlt.common.typing import TDataItem
7
+ from dlt.sources import DltResource
8
+ from dlt.sources.helpers import requests
9
+
10
+
11
+ @dlt.source(name="pinterest", max_table_nesting=0)
12
+ def pinterest_source(
13
+ start_date: pendulum.DateTime,
14
+ access_token: str,
15
+ page_size: int = 200,
16
+ end_date: pendulum.DateTime | None = None,
17
+ ) -> Iterable[DltResource]:
18
+ session = requests.Session()
19
+ session.headers.update({"Authorization": f"Bearer {access_token}"})
20
+ base_url = "https://api.pinterest.com/v5"
21
+
22
+ def fetch_data(
23
+ endpoint: str,
24
+ start_dt: pendulum.DateTime,
25
+ end_dt: pendulum.DateTime,
26
+ ) -> Iterable[TDataItem]:
27
+ url = f"{base_url}/{endpoint}"
28
+ params = {"page_size": page_size}
29
+ bookmark = None
30
+ while True:
31
+ if bookmark:
32
+ params["bookmark"] = bookmark
33
+
34
+ resp = session.get(url, params=params)
35
+ resp.raise_for_status()
36
+ data = resp.json()
37
+ items = data.get("items") or []
38
+
39
+ for item in items:
40
+ item_created = ensure_pendulum_datetime(item["created_at"])
41
+ if item_created <= start_dt:
42
+ continue
43
+ if item_created > end_dt:
44
+ continue
45
+ item["created_at"] = item_created
46
+ yield item
47
+
48
+ bookmark = data.get("bookmark")
49
+ if not bookmark:
50
+ break
51
+
52
+ @dlt.resource(write_disposition="merge", primary_key="id")
53
+ def pins(
54
+ datetime=dlt.sources.incremental(
55
+ "created_at",
56
+ initial_value=start_date,
57
+ end_value=end_date,
58
+ ),
59
+ ) -> Iterable[TDataItem]:
60
+ _start_date = datetime.last_value or start_date
61
+ if end_date is None:
62
+ _end_date = pendulum.now("UTC")
63
+ else:
64
+ _end_date = datetime.end_value
65
+ yield from fetch_data("pins", _start_date, _end_date)
66
+
67
+ @dlt.resource(write_disposition="merge", primary_key="id")
68
+ def boards(
69
+ datetime=dlt.sources.incremental(
70
+ "created_at",
71
+ initial_value=start_date,
72
+ end_value=end_date,
73
+ ),
74
+ ) -> Iterable[TDataItem]:
75
+ _start_date = datetime.last_value or start_date
76
+ if end_date is None:
77
+ _end_date = pendulum.now("UTC")
78
+ else:
79
+ _end_date = datetime.end_value
80
+ yield from fetch_data("boards", _start_date, _end_date)
81
+
82
+ return pins, boards
ingestr/src/sources.py CHANGED
@@ -699,9 +699,7 @@ class StripeAnalyticsSource:
699
699
  )
700
700
 
701
701
  if incremental and not sync:
702
- raise ValueError(
703
- "incremental loads must be used with sync loading"
704
- )
702
+ raise ValueError("incremental loads must be used with sync loading")
705
703
 
706
704
  if incremental:
707
705
  from ingestr.src.stripe_analytics import incremental_stripe_source
@@ -2783,3 +2781,72 @@ class QuickBooksSource:
2783
2781
  minor_version=minor_version[0],
2784
2782
  object=table_name,
2785
2783
  ).with_resources(table_name)
2784
+
2785
+
2786
+ class IsocPulseSource:
2787
+ def handles_incrementality(self) -> bool:
2788
+ return True
2789
+
2790
+ def dlt_source(self, uri: str, table: str, **kwargs):
2791
+ parsed_uri = urlparse(uri)
2792
+ params = parse_qs(parsed_uri.query)
2793
+ token = params.get("token")
2794
+ if not token or not token[0].strip():
2795
+ raise MissingValueError("token", "Internet Society Pulse")
2796
+
2797
+ start_date = kwargs.get("interval_start")
2798
+ if start_date is None:
2799
+ start_date = pendulum.now().in_tz("UTC").subtract(days=30)
2800
+
2801
+ end_date = kwargs.get("interval_end")
2802
+
2803
+ metric = table
2804
+ opts = []
2805
+ if ":" in metric:
2806
+ metric, *opts = metric.strip().split(":")
2807
+ opts = [opt.strip() for opt in opts]
2808
+
2809
+ from ingestr.src.isoc_pulse import pulse_source
2810
+
2811
+ src = pulse_source(
2812
+ token=token[0],
2813
+ start_date=start_date.strftime("%Y-%m-%d"),
2814
+ end_date=str(end_date) if end_date else None,
2815
+ metric=metric,
2816
+ opts=opts,
2817
+ )
2818
+ return src.with_resources(metric)
2819
+
2820
+
2821
+ class PinterestSource:
2822
+ def handles_incrementality(self) -> bool:
2823
+ return True
2824
+
2825
+ def dlt_source(self, uri: str, table: str, **kwargs):
2826
+ parsed = urlparse(uri)
2827
+ params = parse_qs(parsed.query)
2828
+ access_token = params.get("access_token")
2829
+
2830
+ if not access_token:
2831
+ raise MissingValueError("access_token", "Pinterest")
2832
+
2833
+ start_date = kwargs.get("interval_start")
2834
+ if start_date is not None:
2835
+ start_date = ensure_pendulum_datetime(start_date)
2836
+ else:
2837
+ start_date = pendulum.datetime(2020, 1, 1).in_tz("UTC")
2838
+
2839
+ end_date = kwargs.get("interval_end")
2840
+ if end_date is not None:
2841
+ end_date = end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
2842
+
2843
+ from ingestr.src.pinterest import pinterest_source
2844
+
2845
+ if table not in {"pins", "boards"}:
2846
+ raise UnsupportedResourceError(table, "Pinterest")
2847
+
2848
+ return pinterest_source(
2849
+ access_token=access_token[0],
2850
+ start_date=start_date,
2851
+ end_date=end_date,
2852
+ ).with_resources(table)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.58
3
+ Version: 0.13.59
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -2,16 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
2
  ingestr/main.py,sha256=GkC1hdq8AVGrvolc95zMfjmibI95p2pmFkbgCOVf-Og,26311
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
5
- ingestr/src/buildinfo.py,sha256=dxeGdxKLnJ0k4MJj-q8e8oIgp6bKcINiIoXXhkYwxxg,21
5
+ ingestr/src/buildinfo.py,sha256=RkwXqGVjCaeOi85qaAT-2wI5-IYpZt76x8qkp2dVM1o,21
6
6
  ingestr/src/destinations.py,sha256=TcxM2rcwHfgMMP6U0yRNcfWKlEzkBbZbqCIDww7lkTY,16882
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
- ingestr/src/factory.py,sha256=R7KzGRQ9tYZ_N-daD9OtnEp0K-DrsP8bUyXWdv4LV4A,6200
8
+ ingestr/src/factory.py,sha256=OKqjYqvHhgaOF48-eSNSabcfXt4Gmr1yZ8cFGizXh0g,6319
9
9
  ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
10
10
  ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
11
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
12
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
13
13
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
14
- ingestr/src/sources.py,sha256=YiVKP36JM9EuAdXV2SQy5o9rhtIcVvVfsBnEoscvh6E,96824
14
+ ingestr/src/sources.py,sha256=C2qPplmvRQdm1nzSPvGbMpYG6oGCbGZMTlVtVS48n6k,98977
15
15
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
16
16
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
17
17
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -73,6 +73,7 @@ ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOIN
73
73
  ingestr/src/hubspot/__init__.py,sha256=wqHefhc_YRI5dNFCcpvH-UUilNThE49sbGouSBiHYsw,11776
74
74
  ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
75
75
  ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
76
+ ingestr/src/isoc_pulse/__init__.py,sha256=WDgKBn15gyQheXE6oJ_2OuMUQwKPbAjflKAsnucu7u8,4647
76
77
  ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
77
78
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
78
79
  ingestr/src/kinesis/__init__.py,sha256=YretSz4F28tbkcPhd55mBp2Xk7XE9unyWx0nmvl8iEc,6235
@@ -96,6 +97,7 @@ ingestr/src/personio/__init__.py,sha256=sHYpoV-rg-kA1YsflctChis0hKcTrL6mka9O0CHV
96
97
  ingestr/src/personio/helpers.py,sha256=EKmBN0Lf4R0lc3yqqs7D-RjoZ75E8gPcctt59xwHxrY,2901
97
98
  ingestr/src/phantombuster/__init__.py,sha256=8AQTiA8fp1NT8TellQQqwBCl6vGvGwUBLif6LIzgAik,1786
98
99
  ingestr/src/phantombuster/client.py,sha256=9zx58sFunXjUNh6jeEYLNfwNxGxX9odifwAmS0E9AaY,3018
100
+ ingestr/src/pinterest/__init__.py,sha256=5xTLNE2Vn_00PXMLKjY41Fh1LsyzM7UnBSKxKPITUl0,2581
99
101
  ingestr/src/pipedrive/__init__.py,sha256=iRrxeMwo8_83ptgGnTFTNHV1nYvIsFfg0a3XzugPYeI,6982
100
102
  ingestr/src/pipedrive/settings.py,sha256=q119Fy4C5Ip1rMoCILX2BkHV3bwiXC_dW58KIiDUzsY,708
101
103
  ingestr/src/pipedrive/typing.py,sha256=lEMXu4hhAA3XkhVSlBUa-juqyupisd3c-qSQKxFvzoE,69
@@ -141,8 +143,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
141
143
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
142
144
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
143
145
  ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
144
- ingestr-0.13.58.dist-info/METADATA,sha256=jwClpXi-k8pLn-LgxoUt1Ohc22YHmb3P7ZVEBrqUEZo,14993
145
- ingestr-0.13.58.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
146
- ingestr-0.13.58.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
147
- ingestr-0.13.58.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
148
- ingestr-0.13.58.dist-info/RECORD,,
146
+ ingestr-0.13.59.dist-info/METADATA,sha256=8yM2vLMiUV_zBq15gg_1Vf6UHgtwoRzawb_tcT_K3Wc,14993
147
+ ingestr-0.13.59.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
148
+ ingestr-0.13.59.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
149
+ ingestr-0.13.59.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
150
+ ingestr-0.13.59.dist-info/RECORD,,