ingestr 0.7.4__py3-none-any.whl → 0.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/factory.py CHANGED
@@ -22,6 +22,7 @@ from ingestr.src.sources import (
22
22
  NotionSource,
23
23
  ShopifySource,
24
24
  SqlSource,
25
+ StripeAnalyticsSource,
25
26
  )
26
27
 
27
28
  SQL_SOURCE_SCHEMES = [
@@ -102,6 +103,9 @@ class SourceDestinationFactory:
102
103
  return ShopifySource()
103
104
  elif self.source_scheme == "gorgias":
104
105
  return GorgiasSource()
106
+ elif self.source_scheme == "stripe":
107
+ return StripeAnalyticsSource()
108
+
105
109
  else:
106
110
  raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
107
111
 
ingestr/src/sources.py CHANGED
@@ -12,6 +12,7 @@ from ingestr.src.mongodb import mongodb_collection
12
12
  from ingestr.src.notion import notion_databases
13
13
  from ingestr.src.shopify import shopify_source
14
14
  from ingestr.src.sql_database import sql_table
15
+ from ingestr.src.stripe_analytics import stripe_source
15
16
  from ingestr.src.table_definition import table_string_to_dataclass
16
17
 
17
18
 
@@ -295,3 +296,57 @@ class GoogleSheetsSource:
295
296
  range_names=[table_fields.dataset],
296
297
  get_named_ranges=False,
297
298
  )
299
+
300
+
301
+ class StripeAnalyticsSource:
302
+ def handles_incrementality(self) -> bool:
303
+ return True
304
+
305
+ def dlt_source(self, uri: str, table: str, **kwargs):
306
+ if kwargs.get("incremental_key"):
307
+ raise ValueError(
308
+ "Stripe takes care of incrementality on its own, you should not provide incremental_key"
309
+ )
310
+
311
+ api_key = None
312
+ source_field = urlparse(uri)
313
+ source_params = parse_qs(source_field.query)
314
+ api_key = source_params.get("api_key")
315
+
316
+ if not api_key:
317
+ raise ValueError("api_key in the URI is required to connect to Stripe")
318
+
319
+ endpoint = None
320
+ table = str.capitalize(table)
321
+
322
+ if table in [
323
+ "Subscription",
324
+ "Account",
325
+ "Coupon",
326
+ "Customer",
327
+ "Product",
328
+ "Price",
329
+ "BalanceTransaction",
330
+ "Invoice",
331
+ "Event",
332
+ ]:
333
+ endpoint = table
334
+ else:
335
+ raise ValueError(
336
+ f"Resource '{table}' is not supported for stripe source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
337
+ )
338
+
339
+ date_args = {}
340
+ if kwargs.get("interval_start"):
341
+ date_args["start_date"] = kwargs.get("interval_start")
342
+
343
+ if kwargs.get("interval_end"):
344
+ date_args["end_date"] = kwargs.get("interval_end")
345
+
346
+ return stripe_source(
347
+ endpoints=[
348
+ endpoint,
349
+ ],
350
+ stripe_secret_key=api_key[0],
351
+ **date_args,
352
+ ).with_resources(endpoint)
@@ -0,0 +1,99 @@
1
+ """This source uses Stripe API and dlt to load data such as Customer, Subscription, Event etc. to the database and to calculate the MRR and churn rate."""
2
+
3
+ from typing import Any, Dict, Generator, Iterable, Optional, Tuple
4
+
5
+ import dlt
6
+ import stripe
7
+ from dlt.sources import DltResource
8
+ from pendulum import DateTime
9
+
10
+ from .helpers import pagination, transform_date
11
+ from .settings import ENDPOINTS, INCREMENTAL_ENDPOINTS
12
+
13
+
14
+ @dlt.source
15
+ def stripe_source(
16
+ endpoints: Tuple[str, ...] = ENDPOINTS,
17
+ stripe_secret_key: str = dlt.secrets.value,
18
+ start_date: Optional[DateTime] = None,
19
+ end_date: Optional[DateTime] = None,
20
+ ) -> Iterable[DltResource]:
21
+ """
22
+ Retrieves data from the Stripe API for the specified endpoints.
23
+
24
+ For all endpoints, Stripe API responses do not provide the key "updated",
25
+ so in most cases, we are forced to load the data in 'replace' mode.
26
+ This source is suitable for all types of endpoints, including 'Events', 'Invoice', etc.
27
+ but these endpoints can also be loaded in incremental mode (see source incremental_stripe_source).
28
+
29
+ Args:
30
+ endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from. Defaults to most popular Stripe API endpoints.
31
+ stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
32
+ start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None.
33
+ end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None.
34
+
35
+ Returns:
36
+ Iterable[DltResource]: Resources with data that was created during the period greater than or equal to 'start_date' and less than 'end_date'.
37
+ """
38
+ stripe.api_key = stripe_secret_key
39
+ stripe.api_version = "2022-11-15"
40
+
41
+ def stripe_resource(
42
+ endpoint: str,
43
+ ) -> Generator[Dict[Any, Any], Any, None]:
44
+ yield from pagination(endpoint, start_date, end_date)
45
+
46
+ for endpoint in endpoints:
47
+ yield dlt.resource(
48
+ stripe_resource,
49
+ name=endpoint,
50
+ write_disposition="replace",
51
+ )(endpoint)
52
+
53
+
54
+ @dlt.source
55
+ def incremental_stripe_source(
56
+ endpoints: Tuple[str, ...] = INCREMENTAL_ENDPOINTS,
57
+ stripe_secret_key: str = dlt.secrets.value,
58
+ initial_start_date: Optional[DateTime] = None,
59
+ end_date: Optional[DateTime] = None,
60
+ ) -> Iterable[DltResource]:
61
+ """
62
+ As Stripe API does not include the "updated" key in its responses,
63
+ we are only able to perform incremental downloads from endpoints where all objects are uneditable.
64
+ This source yields the resources with incremental loading based on "append" mode.
65
+ You will load only the newest data without duplicating and without downloading a huge amount of data each time.
66
+
67
+ Args:
68
+ endpoints (tuple): A tuple of endpoint names to retrieve data from. Defaults to Stripe API endpoints with uneditable data.
69
+ stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
70
+ initial_start_date (Optional[DateTime]): An optional parameter that specifies the initial value for dlt.sources.incremental.
71
+ If parameter is not None, then load only data that were created after initial_start_date on the first run.
72
+ Defaults to None. Format: datetime(YYYY, MM, DD).
73
+ end_date (Optional[DateTime]): An optional end date to limit the data retrieved.
74
+ Defaults to None. Format: datetime(YYYY, MM, DD).
75
+ Returns:
76
+ Iterable[DltResource]: Resources with only that data has not yet been loaded.
77
+ """
78
+ stripe.api_key = stripe_secret_key
79
+ stripe.api_version = "2022-11-15"
80
+ start_date_unix = (
81
+ transform_date(initial_start_date) if initial_start_date is not None else -1
82
+ )
83
+
84
+ def incremental_resource(
85
+ endpoint: str,
86
+ created: Optional[Any] = dlt.sources.incremental(
87
+ "created", initial_value=start_date_unix
88
+ ),
89
+ ) -> Generator[Dict[Any, Any], Any, None]:
90
+ start_value = created.last_value
91
+ yield from pagination(endpoint, start_date=start_value, end_date=end_date)
92
+
93
+ for endpoint in endpoints:
94
+ yield dlt.resource(
95
+ incremental_resource,
96
+ name=endpoint,
97
+ write_disposition="append",
98
+ primary_key="id",
99
+ )(endpoint)
@@ -0,0 +1,68 @@
1
+ """Stripe analytics source helpers"""
2
+
3
+ from typing import Any, Dict, Iterable, Optional, Union
4
+
5
+ import stripe
6
+ from dlt.common import pendulum
7
+ from dlt.common.typing import TDataItem
8
+ from pendulum import DateTime
9
+
10
+
11
+ def pagination(
12
+ endpoint: str, start_date: Optional[Any] = None, end_date: Optional[Any] = None
13
+ ) -> Iterable[TDataItem]:
14
+ """
15
+ Retrieves data from an endpoint with pagination.
16
+
17
+ Args:
18
+ endpoint (str): The endpoint to retrieve data from.
19
+ start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to None.
20
+ end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to None.
21
+
22
+ Returns:
23
+ Iterable[TDataItem]: Data items retrieved from the endpoint.
24
+ """
25
+ starting_after = None
26
+ while True:
27
+ response = stripe_get_data(
28
+ endpoint,
29
+ start_date=start_date,
30
+ end_date=end_date,
31
+ starting_after=starting_after,
32
+ )
33
+
34
+ if len(response["data"]) > 0:
35
+ starting_after = response["data"][-1]["id"]
36
+ yield response["data"]
37
+
38
+ if not response["has_more"]:
39
+ break
40
+
41
+
42
+ def transform_date(date: Union[str, DateTime, int]) -> int:
43
+ if isinstance(date, str):
44
+ date = pendulum.from_format(date, "%Y-%m-%dT%H:%M:%SZ")
45
+ if isinstance(date, DateTime):
46
+ # convert to unix timestamp
47
+ date = int(date.timestamp())
48
+ return date
49
+
50
+
51
+ def stripe_get_data(
52
+ resource: str,
53
+ start_date: Optional[Any] = None,
54
+ end_date: Optional[Any] = None,
55
+ **kwargs: Any,
56
+ ) -> Dict[Any, Any]:
57
+ if start_date:
58
+ start_date = transform_date(start_date)
59
+ if end_date:
60
+ end_date = transform_date(end_date)
61
+
62
+ if resource == "Subscription":
63
+ kwargs.update({"status": "all"})
64
+
65
+ resource_dict = getattr(stripe, resource).list(
66
+ created={"gte": start_date, "lt": end_date}, limit=100, **kwargs
67
+ )
68
+ return dict(resource_dict)
@@ -0,0 +1,14 @@
1
+ """Stripe analytics source settings and constants"""
2
+
3
+ # the most popular endpoints
4
+ # Full list of the Stripe API endpoints you can find here: https://stripe.com/docs/api.
5
+ ENDPOINTS = (
6
+ "Subscription",
7
+ "Account",
8
+ "Coupon",
9
+ "Customer",
10
+ "Product",
11
+ "Price",
12
+ )
13
+ # possible incremental endpoints
14
+ INCREMENTAL_ENDPOINTS = ("Event", "Invoice", "BalanceTransaction")
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.7.4"
1
+ __version__ = "0.7.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.7.4
3
+ Version: 0.7.5
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -38,6 +38,7 @@ Requires-Dist: sqlalchemy-hana==2.0.0
38
38
  Requires-Dist: sqlalchemy-redshift==0.8.14
39
39
  Requires-Dist: sqlalchemy2-stubs==0.0.2a38
40
40
  Requires-Dist: sqlalchemy==1.4.52
41
+ Requires-Dist: stripe==10.7.0
41
42
  Requires-Dist: tqdm==4.66.2
42
43
  Requires-Dist: typer==0.12.3
43
44
  Description-Content-Type: text/markdown
@@ -175,22 +176,27 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
175
176
  <tr>
176
177
  <td>Gorgias</td>
177
178
  <td>✅</td>
178
- <td>❌</td>
179
+ <td>-</td>
179
180
  </tr>
180
181
  <tr>
181
182
  <td>Google Sheets</td>
182
183
  <td>✅</td>
183
- <td>❌</td>
184
+ <td>-</td>
184
185
  </tr>
185
186
  <tr>
186
187
  <td>Notion</td>
187
188
  <td>✅</td>
188
- <td>❌</td>
189
+ <td>-</td>
189
190
  </tr>
190
191
  <tr>
191
192
  <td>Shopify</td>
192
193
  <td>✅</td>
193
- <td>❌</td>
194
+ <td>-</td>
195
+ </tr>
196
+ <tr>
197
+ <td>Stripe</td>
198
+ <td>✅</td>
199
+ <td>-</td>
194
200
  </tr>
195
201
  </table>
196
202
 
@@ -1,9 +1,9 @@
1
1
  ingestr/main.py,sha256=j0pscsPbeJ9oYJiTCvymneZwg4Lc7KaR3GAMX0GG4To,16432
2
2
  ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
3
- ingestr/src/factory.py,sha256=XuT_8LvWd7gBxOjoD_NiG-jtPvHNQ9nqOeoCJzhRb6Y,3630
4
- ingestr/src/sources.py,sha256=QbSvECvGbHJKOpE9_dbq11343pA5ajsS9BPPPab1ivw,10007
3
+ ingestr/src/factory.py,sha256=nDbeOwhjsXGDr2yzeBSq2Pi7KNl5JADOOFwIm0KpGD4,3746
4
+ ingestr/src/sources.py,sha256=DF10NUwhPlHujGrNy2umK0VpDi_vl4v1KdjQwWexueU,11691
5
5
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
6
- ingestr/src/version.py,sha256=A6fZ_oURo3l_Fa_K29LgV21A4Onqu3NquwGYzL05E1Y,22
6
+ ingestr/src/version.py,sha256=6qL_qyowXO9Pc6v11Zx2s-yd28_548ZZC-OsfzO_Pjc,22
7
7
  ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
8
8
  ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
9
9
  ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
@@ -27,6 +27,9 @@ ingestr/src/sql_database/arrow_helpers.py,sha256=yze1X3A9nUQA4HeuFDDWrfJVkCq8Uo5
27
27
  ingestr/src/sql_database/helpers.py,sha256=6o8e2_8MIuj3qlo40a2E6ns3gyK18ei1jCePONrMUjI,10191
28
28
  ingestr/src/sql_database/override.py,sha256=xbKGDztCzvrhJ5kJTXERal3LA56bEeVug4_rrTs8DgA,333
29
29
  ingestr/src/sql_database/schema_types.py,sha256=qXTanvFPE8wMCSDzQWPDi5yqaO-llfrFXjiGJALI4NA,5013
30
+ ingestr/src/stripe_analytics/__init__.py,sha256=8yy6i4DAhUqY4ZForetQ0DWc_YQrY0FBH6yk0Z3m-Mw,4493
31
+ ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
32
+ ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
30
33
  ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
31
34
  ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
32
35
  ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
@@ -37,8 +40,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
37
40
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
38
41
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
39
42
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
40
- ingestr-0.7.4.dist-info/METADATA,sha256=VLL2Um1BU3x6Oz89Gx6d48O9ukAk4Ro7uy2dFIPTIo8,5829
41
- ingestr-0.7.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
42
- ingestr-0.7.4.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
43
- ingestr-0.7.4.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
44
- ingestr-0.7.4.dist-info/RECORD,,
43
+ ingestr-0.7.5.dist-info/METADATA,sha256=UiIrc0eVS5xIqjw_T6AiJ1hftWlms432WJTqgccyn_U,5934
44
+ ingestr-0.7.5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
45
+ ingestr-0.7.5.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
46
+ ingestr-0.7.5.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
47
+ ingestr-0.7.5.dist-info/RECORD,,