ingestr 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import hashlib
2
2
  from datetime import datetime
3
3
  from enum import Enum
4
+ import tempfile
4
5
  from typing import Optional
5
6
 
6
7
  import dlt
@@ -236,6 +237,13 @@ def ingest(
236
237
  envvar="SCHEMA_NAMING",
237
238
  ),
238
239
  ] = SchemaNaming.default, # type: ignore
240
+ pipelines_dir: Annotated[
241
+ Optional[str],
242
+ typer.Option(
243
+ help="The path to store dlt-related pipeline metadata. By default, ingestr will create a temporary directory and delete it after the execution is done in order to make retries stateless.",
244
+ envvar="PIPELINES_DIR",
245
+ ),
246
+ ] = None, # type: ignore
239
247
  ):
240
248
  track(
241
249
  "command_triggered",
@@ -280,13 +288,18 @@ def ingest(
280
288
  if progress == Progress.log:
281
289
  progressInstance = LogCollector(dump_system_stats=False)
282
290
 
291
+ is_pipelines_dir_temp = False
292
+ if pipelines_dir is None:
293
+ pipelines_dir = tempfile.mkdtemp()
294
+ is_pipelines_dir_temp = True
295
+
283
296
  pipeline = dlt.pipeline(
284
297
  pipeline_name=m.hexdigest(),
285
298
  destination=destination.dlt_dest(
286
299
  uri=dest_uri,
287
300
  ),
288
301
  progress=progressInstance,
289
- pipelines_dir="pipeline_data",
302
+ pipelines_dir=pipelines_dir,
290
303
  refresh="drop_resources" if full_refresh else None,
291
304
  )
292
305
 
@@ -362,6 +375,8 @@ def ingest(
362
375
  if incremental_strategy != IncrementalStrategy.none:
363
376
  write_disposition = incremental_strategy.value
364
377
 
378
+ start_time = datetime.now()
379
+
365
380
  run_info: LoadInfo = pipeline.run(
366
381
  dlt_source,
367
382
  **destination.dlt_run_params(
@@ -389,11 +404,17 @@ def ingest(
389
404
 
390
405
  destination.post_load()
391
406
 
407
+ end_time = datetime.now()
392
408
  elapsedHuman = ""
393
409
  if run_info.started_at:
394
- elapsed = run_info.finished_at - run_info.started_at
410
+ elapsed = end_time - start_time
395
411
  elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
396
412
 
413
+ # remove the pipelines_dir folder if it was created by ingestr
414
+ if is_pipelines_dir_temp:
415
+ import shutil
416
+ shutil.rmtree(pipelines_dir)
417
+
397
418
  print(
398
419
  f"[bold green]Successfully finished loading data from '{factory.source_scheme}' to '{factory.destination_scheme}' {elapsedHuman} [/bold green]"
399
420
  )
@@ -0,0 +1,166 @@
1
+ """A source loading player profiles and games from chess.com api"""
2
+
3
+ from typing import Any, Callable, Dict, Iterator, List, Sequence
4
+
5
+ import dlt
6
+ from dlt.common import pendulum
7
+ from dlt.common.typing import TDataItem
8
+ from dlt.sources import DltResource
9
+ from dlt.sources.helpers import requests
10
+
11
+ from .helpers import get_path_with_retry, get_url_with_retry, validate_month_string
12
+ from .settings import UNOFFICIAL_CHESS_API_URL
13
+
14
+
15
+ @dlt.source(name="chess")
16
+ def source(
17
+ players: List[str], start_month: str = None, end_month: str = None
18
+ ) -> Sequence[DltResource]:
19
+ """
20
+ A dlt source for the chess.com api. It groups several resources (in this case chess.com API endpoints) containing
21
+ various types of data: user profiles or chess match results
22
+ Args:
23
+ players (List[str]): A list of the player usernames for which to get the data.
24
+ start_month (str, optional): Filters out all the matches happening before `start_month`. Defaults to None.
25
+ end_month (str, optional): Filters out all the matches happening after `end_month`. Defaults to None.
26
+ Returns:
27
+ Sequence[DltResource]: A sequence of resources that can be selected from including players_profiles,
28
+ players_archives, players_games, players_online_status
29
+ """
30
+ return (
31
+ players_profiles(players),
32
+ players_archives(players),
33
+ players_games(players, start_month=start_month, end_month=end_month),
34
+ players_online_status(players),
35
+ )
36
+
37
+
38
+ @dlt.resource(
39
+ write_disposition="replace",
40
+ columns={
41
+ "last_online": {"data_type": "timestamp"},
42
+ "joined": {"data_type": "timestamp"},
43
+ },
44
+ )
45
+ def players_profiles(players: List[str]) -> Iterator[TDataItem]:
46
+ """
47
+ Yields player profiles for a list of player usernames.
48
+ Args:
49
+ players (List[str]): List of player usernames to retrieve profiles for.
50
+ Yields:
51
+ Iterator[TDataItem]: An iterator over player profiles data.
52
+ """
53
+
54
+ # get archives in parallel by decorating the http request with defer
55
+ @dlt.defer
56
+ def _get_profile(username: str) -> TDataItem:
57
+ return get_path_with_retry(f"player/{username}")
58
+
59
+ for username in players:
60
+ yield _get_profile(username)
61
+
62
+
63
+ @dlt.resource(write_disposition="replace", selected=False)
64
+ def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
65
+ """
66
+ Yields url to game archives for specified players.
67
+ Args:
68
+ players (List[str]): List of player usernames to retrieve archives for.
69
+ Yields:
70
+ Iterator[List[TDataItem]]: An iterator over list of player archive data.
71
+ """
72
+ for username in players:
73
+ data = get_path_with_retry(f"player/{username}/games/archives")
74
+ yield data.get("archives", [])
75
+
76
+
77
+ @dlt.resource(
78
+ write_disposition="append", columns={"end_time": {"data_type": "timestamp"}}
79
+ )
80
+ def players_games(
81
+ players: List[str], start_month: str = None, end_month: str = None
82
+ ) -> Iterator[Callable[[], List[TDataItem]]]:
83
+ """
84
+ Yields `players` games that happened between `start_month` and `end_month`.
85
+ Args:
86
+ players (List[str]): List of player usernames to retrieve games for.
87
+ start_month (str, optional): The starting month in the format "YYYY/MM". Defaults to None.
88
+ end_month (str, optional): The ending month in the format "YYYY/MM". Defaults to None.
89
+ Yields:
90
+ Iterator[Callable[[], List[TDataItem]]]: An iterator over callables that return a list of games for each player.
91
+ """ # do a simple validation to prevent common mistakes in month format
92
+ validate_month_string(start_month)
93
+ validate_month_string(end_month)
94
+
95
+ # get a list of already checked archives
96
+ # from your point of view, the state is python dictionary that will have the same content the next time this function is called
97
+ checked_archives = dlt.current.resource_state().setdefault("archives", [])
98
+ # get player archives, note that you can call the resource like any other function and just iterate it like a list
99
+ archives = players_archives(players)
100
+
101
+ # get archives in parallel by decorating the http request with defer
102
+ @dlt.defer
103
+ def _get_archive(url: str) -> List[TDataItem]:
104
+ try:
105
+ games = get_url_with_retry(url).get("games", [])
106
+ return games # type: ignore
107
+ except requests.HTTPError as http_err:
108
+ # sometimes archives are not available and the error seems to be permanent
109
+ if http_err.response.status_code == 404:
110
+ return []
111
+ raise
112
+
113
+ # enumerate the archives
114
+ for url in archives:
115
+ # the `url` format is https://api.chess.com/pub/player/{username}/games/{YYYY}/{MM}
116
+ if start_month and url[-7:] < start_month:
117
+ continue
118
+ if end_month and url[-7:] > end_month:
119
+ continue
120
+ # do not download archive again
121
+ if url in checked_archives:
122
+ continue
123
+ checked_archives.append(url)
124
+ # get the filtered archive
125
+ yield _get_archive(url)
126
+
127
+
128
+ @dlt.resource(write_disposition="append")
129
+ def players_online_status(players: List[str]) -> Iterator[TDataItem]:
130
+ """
131
+ Returns current online status for a list of players.
132
+ Args:
133
+ players (List[str]): List of player usernames to check online status for.
134
+ Yields:
135
+ Iterator[TDataItem]: An iterator over the online status of each player.
136
+ """
137
+ # we'll use unofficial endpoint to get online status, the official seems to be removed
138
+ for player in players:
139
+ status = get_url_with_retry(f"{UNOFFICIAL_CHESS_API_URL}user/popup/{player}")
140
+ # return just relevant selection
141
+ yield {
142
+ "username": player,
143
+ "onlineStatus": status["onlineStatus"],
144
+ "lastLoginDate": status["lastLoginDate"],
145
+ "check_time": pendulum.now(), # dlt can deal with native python dates
146
+ }
147
+
148
+
149
+ @dlt.source
150
+ def chess_dlt_config_example(
151
+ secret_str: str = dlt.secrets.value,
152
+ secret_dict: Dict[str, Any] = dlt.secrets.value,
153
+ config_int: int = dlt.config.value,
154
+ ) -> DltResource:
155
+ """
156
+ An example of a source that uses dlt to provide secrets and config values.
157
+ Args:
158
+ secret_str (str, optional): Secret string provided by dlt.secrets.value. Defaults to dlt.secrets.value.
159
+ secret_dict (Dict[str, Any], optional): Secret dictionary provided by dlt.secrets.value. Defaults to dlt.secrets.value.
160
+ config_int (int, optional): Config integer provided by dlt.config.value. Defaults to dlt.config.value.
161
+ Returns:
162
+ DltResource: Returns a resource yielding the configured values.
163
+ """
164
+
165
+ # returns a resource yielding the configured values - it is just a test
166
+ return dlt.resource([secret_str, secret_dict, config_int], name="config_values")
@@ -0,0 +1,21 @@
1
+ """Chess source helpers"""
2
+
3
+ from dlt.common.typing import StrAny
4
+ from dlt.sources.helpers import requests
5
+
6
+ from .settings import OFFICIAL_CHESS_API_URL
7
+
8
+
9
+ def get_url_with_retry(url: str) -> StrAny:
10
+ r = requests.get(url)
11
+ return r.json() # type: ignore
12
+
13
+
14
+ def get_path_with_retry(path: str) -> StrAny:
15
+ return get_url_with_retry(f"{OFFICIAL_CHESS_API_URL}{path}")
16
+
17
+
18
+ def validate_month_string(string: str) -> None:
19
+ """Validates that the string is in YYYY/MM format"""
20
+ if string and string[4] != "/":
21
+ raise ValueError(string)
@@ -0,0 +1,4 @@
1
+ """Chess source settings and constants"""
2
+
3
+ OFFICIAL_CHESS_API_URL = "https://api.chess.com/pub/"
4
+ UNOFFICIAL_CHESS_API_URL = "https://www.chess.com/callback/"
ingestr/src/factory.py CHANGED
@@ -15,13 +15,16 @@ from ingestr.src.destinations import (
15
15
  SynapseDestination,
16
16
  )
17
17
  from ingestr.src.sources import (
18
+ ChessSource,
18
19
  GoogleSheetsSource,
19
20
  GorgiasSource,
21
+ HubspotSource,
20
22
  LocalCsvSource,
21
23
  MongoDbSource,
22
24
  NotionSource,
23
25
  ShopifySource,
24
26
  SqlSource,
27
+ StripeAnalyticsSource,
25
28
  )
26
29
 
27
30
  SQL_SOURCE_SCHEMES = [
@@ -102,6 +105,12 @@ class SourceDestinationFactory:
102
105
  return ShopifySource()
103
106
  elif self.source_scheme == "gorgias":
104
107
  return GorgiasSource()
108
+ elif self.source_scheme == "chess":
109
+ return ChessSource()
110
+ elif self.source_scheme == "stripe":
111
+ return StripeAnalyticsSource()
112
+ elif self.source_scheme == "hubspot":
113
+ return HubspotSource()
105
114
  else:
106
115
  raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
107
116
 
@@ -0,0 +1,281 @@
1
+ """
2
+ This is a module that provides a DLT source to retrieve data from multiple endpoints of the HubSpot API using a specified API key. The retrieved data is returned as a tuple of Dlt resources, one for each endpoint.
3
+
4
+ The source retrieves data from the following endpoints:
5
+ - CRM Companies
6
+ - CRM Contacts
7
+ - CRM Deals
8
+ - CRM Tickets
9
+ - CRM Products
10
+ - CRM Quotes
11
+ - Web Analytics Events
12
+
13
+ For each endpoint, a resource and transformer function are defined to retrieve data and transform it to a common format.
14
+ The resource functions yield the raw data retrieved from the API, while the transformer functions are used to retrieve
15
+ additional information from the Web Analytics Events endpoint.
16
+
17
+ The source also supports enabling Web Analytics Events for each endpoint by setting the corresponding enable flag to True.
18
+
19
+ Example:
20
+ To retrieve data from all endpoints, use the following code:
21
+
22
+ python
23
+
24
+ >>> resources = hubspot(api_key="your_api_key")
25
+ """
26
+
27
+ from typing import Any, Dict, Iterator, List, Literal, Sequence
28
+ from urllib.parse import quote
29
+
30
+ import dlt
31
+ from dlt.common import pendulum
32
+ from dlt.common.typing import TDataItems
33
+ from dlt.sources import DltResource
34
+
35
+ from .helpers import _get_property_names, fetch_data, fetch_property_history
36
+ from .settings import (
37
+ ALL,
38
+ CRM_OBJECT_ENDPOINTS,
39
+ DEFAULT_COMPANY_PROPS,
40
+ DEFAULT_CONTACT_PROPS,
41
+ DEFAULT_DEAL_PROPS,
42
+ DEFAULT_PRODUCT_PROPS,
43
+ DEFAULT_QUOTE_PROPS,
44
+ DEFAULT_TICKET_PROPS,
45
+ OBJECT_TYPE_PLURAL,
46
+ STARTDATE,
47
+ WEB_ANALYTICS_EVENTS_ENDPOINT,
48
+ )
49
+
50
+ THubspotObjectType = Literal["company", "contact", "deal", "ticket", "product", "quote"]
51
+
52
+
53
+ @dlt.source(name="hubspot")
54
+ def hubspot(
55
+ api_key: str = dlt.secrets.value,
56
+ include_history: bool = False,
57
+ include_custom_props: bool = True,
58
+ ) -> Sequence[DltResource]:
59
+ """
60
+ A DLT source that retrieves data from the HubSpot API using the
61
+ specified API key.
62
+
63
+ This function retrieves data for several HubSpot API endpoints,
64
+ including companies, contacts, deals, tickets, products and web
65
+ analytics events. It returns a tuple of Dlt resources, one for
66
+ each endpoint.
67
+
68
+ Args:
69
+ api_key (Optional[str]):
70
+ The API key used to authenticate with the HubSpot API. Defaults
71
+ to dlt.secrets.value.
72
+ include_history (Optional[bool]):
73
+ Whether to load history of property changes along with entities.
74
+ The history entries are loaded to separate tables.
75
+
76
+ Returns:
77
+ Sequence[DltResource]: Dlt resources, one for each HubSpot API endpoint.
78
+
79
+ Notes:
80
+ This function uses the `fetch_data` function to retrieve data from the
81
+ HubSpot CRM API. The API key is passed to `fetch_data` as the
82
+ `api_key` argument.
83
+ """
84
+
85
+ @dlt.resource(name="companies", write_disposition="replace")
86
+ def companies(
87
+ api_key: str = api_key,
88
+ include_history: bool = include_history,
89
+ props: Sequence[str] = DEFAULT_COMPANY_PROPS,
90
+ include_custom_props: bool = include_custom_props,
91
+ ) -> Iterator[TDataItems]:
92
+ """Hubspot companies resource"""
93
+ yield from crm_objects(
94
+ "company",
95
+ api_key,
96
+ include_history=include_history,
97
+ props=props,
98
+ include_custom_props=include_custom_props,
99
+ )
100
+
101
+ @dlt.resource(name="contacts", write_disposition="replace")
102
+ def contacts(
103
+ api_key: str = api_key,
104
+ include_history: bool = include_history,
105
+ props: Sequence[str] = DEFAULT_CONTACT_PROPS,
106
+ include_custom_props: bool = include_custom_props,
107
+ ) -> Iterator[TDataItems]:
108
+ """Hubspot contacts resource"""
109
+ yield from crm_objects(
110
+ "contact",
111
+ api_key,
112
+ include_history,
113
+ props,
114
+ include_custom_props,
115
+ )
116
+
117
+ @dlt.resource(name="deals", write_disposition="replace")
118
+ def deals(
119
+ api_key: str = api_key,
120
+ include_history: bool = include_history,
121
+ props: Sequence[str] = DEFAULT_DEAL_PROPS,
122
+ include_custom_props: bool = include_custom_props,
123
+ ) -> Iterator[TDataItems]:
124
+ """Hubspot deals resource"""
125
+ yield from crm_objects(
126
+ "deal",
127
+ api_key,
128
+ include_history,
129
+ props,
130
+ include_custom_props,
131
+ )
132
+
133
+ @dlt.resource(name="tickets", write_disposition="replace")
134
+ def tickets(
135
+ api_key: str = api_key,
136
+ include_history: bool = include_history,
137
+ props: Sequence[str] = DEFAULT_TICKET_PROPS,
138
+ include_custom_props: bool = include_custom_props,
139
+ ) -> Iterator[TDataItems]:
140
+ """Hubspot tickets resource"""
141
+ yield from crm_objects(
142
+ "ticket",
143
+ api_key,
144
+ include_history,
145
+ props,
146
+ include_custom_props,
147
+ )
148
+
149
+ @dlt.resource(name="products", write_disposition="replace")
150
+ def products(
151
+ api_key: str = api_key,
152
+ include_history: bool = include_history,
153
+ props: Sequence[str] = DEFAULT_PRODUCT_PROPS,
154
+ include_custom_props: bool = include_custom_props,
155
+ ) -> Iterator[TDataItems]:
156
+ """Hubspot products resource"""
157
+ yield from crm_objects(
158
+ "product",
159
+ api_key,
160
+ include_history,
161
+ props,
162
+ include_custom_props,
163
+ )
164
+
165
+ @dlt.resource(name="quotes", write_disposition="replace")
166
+ def quotes(
167
+ api_key: str = api_key,
168
+ include_history: bool = include_history,
169
+ props: Sequence[str] = DEFAULT_QUOTE_PROPS,
170
+ include_custom_props: bool = include_custom_props,
171
+ ) -> Iterator[TDataItems]:
172
+ """Hubspot quotes resource"""
173
+ yield from crm_objects(
174
+ "quote",
175
+ api_key,
176
+ include_history,
177
+ props,
178
+ include_custom_props,
179
+ )
180
+
181
+ return companies, contacts, deals, tickets, products, quotes
182
+
183
+
184
+ def crm_objects(
185
+ object_type: str,
186
+ api_key: str = dlt.secrets.value,
187
+ include_history: bool = False,
188
+ props: Sequence[str] = None,
189
+ include_custom_props: bool = True,
190
+ ) -> Iterator[TDataItems]:
191
+ """Building blocks for CRM resources."""
192
+ if props == ALL:
193
+ props = list(_get_property_names(api_key, object_type))
194
+
195
+ if include_custom_props:
196
+ all_props = _get_property_names(api_key, object_type)
197
+ custom_props = [prop for prop in all_props if not prop.startswith("hs_")]
198
+ props = props + custom_props # type: ignore
199
+
200
+ props = ",".join(sorted(list(set(props))))
201
+
202
+ if len(props) > 2000:
203
+ raise ValueError(
204
+ "Your request to Hubspot is too long to process. "
205
+ "Maximum allowed query length is 2000 symbols, while "
206
+ f"your list of properties `{props[:200]}`... is {len(props)} "
207
+ "symbols long. Use the `props` argument of the resource to "
208
+ "set the list of properties to extract from the endpoint."
209
+ )
210
+
211
+ params = {"properties": props, "limit": 100}
212
+
213
+ yield from fetch_data(CRM_OBJECT_ENDPOINTS[object_type], api_key, params=params)
214
+ if include_history:
215
+ # Get history separately, as requesting both all properties and history together
216
+ # is likely to hit hubspot's URL length limit
217
+ for history_entries in fetch_property_history(
218
+ CRM_OBJECT_ENDPOINTS[object_type],
219
+ api_key,
220
+ props,
221
+ ):
222
+ yield dlt.mark.with_table_name(
223
+ history_entries,
224
+ OBJECT_TYPE_PLURAL[object_type] + "_property_history",
225
+ )
226
+
227
+
228
+ @dlt.resource
229
+ def hubspot_events_for_objects(
230
+ object_type: THubspotObjectType,
231
+ object_ids: List[str],
232
+ api_key: str = dlt.secrets.value,
233
+ start_date: pendulum.DateTime = STARTDATE,
234
+ ) -> DltResource:
235
+ """
236
+ A standalone DLT resources that retrieves web analytics events from the HubSpot API for a particular object type and list of object ids.
237
+
238
+ Args:
239
+ object_type(THubspotObjectType, required): One of the hubspot object types see definition of THubspotObjectType literal
240
+ object_ids: (List[THubspotObjectType], required): List of object ids to track events
241
+ api_key (str, optional): The API key used to authenticate with the HubSpot API. Defaults to dlt.secrets.value.
242
+ start_date (datetime, optional): The initial date time from which start getting events, default to STARTDATE
243
+
244
+ Returns:
245
+ incremental dlt resource to track events for objects from the list
246
+ """
247
+
248
+ end_date = pendulum.now().isoformat()
249
+ name = object_type + "_events"
250
+
251
+ def get_web_analytics_events(
252
+ occurred_at: dlt.sources.incremental[str],
253
+ ) -> Iterator[List[Dict[str, Any]]]:
254
+ """
255
+ A helper function that retrieves web analytics events for a given object type from the HubSpot API.
256
+
257
+ Args:
258
+ object_type (str): The type of object for which to retrieve web analytics events.
259
+
260
+ Yields:
261
+ dict: A dictionary representing a web analytics event.
262
+ """
263
+ for object_id in object_ids:
264
+ yield from fetch_data(
265
+ WEB_ANALYTICS_EVENTS_ENDPOINT.format(
266
+ objectType=object_type,
267
+ objectId=object_id,
268
+ occurredAfter=quote(occurred_at.last_value),
269
+ occurredBefore=quote(end_date),
270
+ ),
271
+ api_key=api_key,
272
+ )
273
+
274
+ return dlt.resource(
275
+ get_web_analytics_events,
276
+ name=name,
277
+ primary_key="id",
278
+ write_disposition="append",
279
+ selected=True,
280
+ table_name=lambda e: name + "_" + str(e["eventType"]),
281
+ )(dlt.sources.incremental("occurredAt", initial_value=start_date.isoformat()))
@@ -0,0 +1,188 @@
1
+ """Hubspot source helpers"""
2
+
3
+ import urllib.parse
4
+ from typing import Any, Dict, Iterator, List, Optional
5
+
6
+ from dlt.sources.helpers import requests
7
+
8
+ from .settings import OBJECT_TYPE_PLURAL
9
+
10
+ BASE_URL = "https://api.hubapi.com/"
11
+
12
+
13
+ def get_url(endpoint: str) -> str:
14
+ """Get absolute hubspot endpoint URL"""
15
+ return urllib.parse.urljoin(BASE_URL, endpoint)
16
+
17
+
18
+ def _get_headers(api_key: str) -> Dict[str, str]:
19
+ """
20
+ Return a dictionary of HTTP headers to use for API requests, including the specified API key.
21
+
22
+ Args:
23
+ api_key (str): The API key to use for authentication, as a string.
24
+
25
+ Returns:
26
+ dict: A dictionary of HTTP headers to include in API requests, with the `Authorization` header
27
+ set to the specified API key in the format `Bearer {api_key}`.
28
+
29
+ """
30
+ # Construct the dictionary of HTTP headers to use for API requests
31
+ return dict(authorization=f"Bearer {api_key}")
32
+
33
+
34
+ def extract_property_history(objects: List[Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
35
+ for item in objects:
36
+ history = item.get("propertiesWithHistory")
37
+ if not history:
38
+ return
39
+ # Yield a flat list of property history entries
40
+ for key, changes in history.items():
41
+ if not changes:
42
+ continue
43
+ for entry in changes:
44
+ yield {"object_id": item["id"], "property_name": key, **entry}
45
+
46
+
47
+ def fetch_property_history(
48
+ endpoint: str,
49
+ api_key: str,
50
+ props: str,
51
+ params: Optional[Dict[str, Any]] = None,
52
+ ) -> Iterator[List[Dict[str, Any]]]:
53
+ """Fetch property history from the given CRM endpoint.
54
+
55
+ Args:
56
+ endpoint: The endpoint to fetch data from, as a string.
57
+ api_key: The API key to use for authentication, as a string.
58
+ props: A comma separated list of properties to retrieve the history for
59
+ params: Optional dict of query params to include in the request
60
+
61
+ Yields:
62
+ List of property history entries (dicts)
63
+ """
64
+ # Construct the URL and headers for the API request
65
+ url = get_url(endpoint)
66
+ headers = _get_headers(api_key)
67
+
68
+ params = dict(params or {})
69
+ params["propertiesWithHistory"] = props
70
+ params["limit"] = 50
71
+ # Make the API request
72
+ r = requests.get(url, headers=headers, params=params)
73
+ # Parse the API response and yield the properties of each result
74
+
75
+ # Parse the response JSON data
76
+ _data = r.json()
77
+ while _data is not None:
78
+ if "results" in _data:
79
+ yield list(extract_property_history(_data["results"]))
80
+
81
+ # Follow pagination links if they exist
82
+ _next = _data.get("paging", {}).get("next", None)
83
+ if _next:
84
+ next_url = _next["link"]
85
+ # Get the next page response
86
+ r = requests.get(next_url, headers=headers)
87
+ _data = r.json()
88
+ else:
89
+ _data = None
90
+
91
+
92
+ def fetch_data(
93
+ endpoint: str, api_key: str, params: Optional[Dict[str, Any]] = None
94
+ ) -> Iterator[List[Dict[str, Any]]]:
95
+ """
96
+ Fetch data from HUBSPOT endpoint using a specified API key and yield the properties of each result.
97
+ For paginated endpoint this function yields item from all pages.
98
+
99
+ Args:
100
+ endpoint (str): The endpoint to fetch data from, as a string.
101
+ api_key (str): The API key to use for authentication, as a string.
102
+ params: Optional dict of query params to include in the request
103
+
104
+ Yields:
105
+ A List of CRM object dicts
106
+
107
+ Raises:
108
+ requests.exceptions.HTTPError: If the API returns an HTTP error status code.
109
+
110
+ Notes:
111
+ This function uses the `requests` library to make a GET request to the specified endpoint, with
112
+ the API key included in the headers. If the API returns a non-successful HTTP status code (e.g.
113
+ 404 Not Found), a `requests.exceptions.HTTPError` exception will be raised.
114
+
115
+ The `endpoint` argument should be a relative URL, which will be appended to the base URL for the
116
+ API. The `params` argument is used to pass additional query parameters to the request
117
+
118
+ This function also includes a retry decorator that will automatically retry the API call up to
119
+ 3 times with a 5-second delay between retries, using an exponential backoff strategy.
120
+ """
121
+ # Construct the URL and headers for the API request
122
+ url = get_url(endpoint)
123
+ headers = _get_headers(api_key)
124
+
125
+ # Make the API request
126
+ r = requests.get(url, headers=headers, params=params)
127
+ # Parse the API response and yield the properties of each result
128
+ # Parse the response JSON data
129
+ _data = r.json()
130
+ # Yield the properties of each result in the API response
131
+ while _data is not None:
132
+ if "results" in _data:
133
+ _objects: List[Dict[str, Any]] = []
134
+ for _result in _data["results"]:
135
+ _obj = _result.get("properties", _result)
136
+ if "id" not in _obj and "id" in _result:
137
+ # Move id from properties to top level
138
+ _obj["id"] = _result["id"]
139
+ if "associations" in _result:
140
+ for association in _result["associations"]:
141
+ __values = [
142
+ {
143
+ "value": _obj["hs_object_id"],
144
+ f"{association}_id": __r["id"],
145
+ }
146
+ for __r in _result["associations"][association]["results"]
147
+ ]
148
+
149
+ # remove duplicates from list of dicts
150
+ __values = [
151
+ dict(t) for t in {tuple(d.items()) for d in __values}
152
+ ]
153
+
154
+ _obj[association] = __values
155
+ _objects.append(_obj)
156
+ yield _objects
157
+
158
+ # Follow pagination links if they exist
159
+ _next = _data.get("paging", {}).get("next", None)
160
+ if _next:
161
+ next_url = _next["link"]
162
+ # Get the next page response
163
+ r = requests.get(next_url, headers=headers)
164
+ _data = r.json()
165
+ else:
166
+ _data = None
167
+
168
+
169
+ def _get_property_names(api_key: str, object_type: str) -> List[str]:
170
+ """
171
+ Retrieve property names for a given entity from the HubSpot API.
172
+
173
+ Args:
174
+ entity: The entity name for which to retrieve property names.
175
+
176
+ Returns:
177
+ A list of property names.
178
+
179
+ Raises:
180
+ Exception: If an error occurs during the API request.
181
+ """
182
+ properties = []
183
+ endpoint = f"/crm/v3/properties/{OBJECT_TYPE_PLURAL[object_type]}"
184
+
185
+ for page in fetch_data(endpoint, api_key):
186
+ properties.extend([prop["name"] for prop in page])
187
+
188
+ return properties
@@ -0,0 +1,99 @@
1
+ """Hubspot source settings and constants"""
2
+
3
+ from dlt.common import pendulum
4
+
5
+ STARTDATE = pendulum.datetime(year=2000, month=1, day=1)
6
+
7
+ CRM_CONTACTS_ENDPOINT = (
8
+ "/crm/v3/objects/contacts?associations=deals,products,tickets,quotes"
9
+ )
10
+ CRM_COMPANIES_ENDPOINT = (
11
+ "/crm/v3/objects/companies?associations=contacts,deals,products,tickets,quotes"
12
+ )
13
+ CRM_DEALS_ENDPOINT = "/crm/v3/objects/deals"
14
+ CRM_PRODUCTS_ENDPOINT = "/crm/v3/objects/products"
15
+ CRM_TICKETS_ENDPOINT = "/crm/v3/objects/tickets"
16
+ CRM_QUOTES_ENDPOINT = "/crm/v3/objects/quotes"
17
+
18
+ CRM_OBJECT_ENDPOINTS = {
19
+ "contact": CRM_CONTACTS_ENDPOINT,
20
+ "company": CRM_COMPANIES_ENDPOINT,
21
+ "deal": CRM_DEALS_ENDPOINT,
22
+ "product": CRM_PRODUCTS_ENDPOINT,
23
+ "ticket": CRM_TICKETS_ENDPOINT,
24
+ "quote": CRM_QUOTES_ENDPOINT,
25
+ }
26
+
27
+ WEB_ANALYTICS_EVENTS_ENDPOINT = "/events/v3/events?objectType={objectType}&objectId={objectId}&occurredAfter={occurredAfter}&occurredBefore={occurredBefore}&sort=-occurredAt"
28
+
29
+ OBJECT_TYPE_SINGULAR = {
30
+ "companies": "company",
31
+ "contacts": "contact",
32
+ "deals": "deal",
33
+ "tickets": "ticket",
34
+ "products": "product",
35
+ "quotes": "quote",
36
+ }
37
+
38
+ OBJECT_TYPE_PLURAL = {v: k for k, v in OBJECT_TYPE_SINGULAR.items()}
39
+
40
+ DEFAULT_DEAL_PROPS = [
41
+ "amount",
42
+ "closedate",
43
+ "createdate",
44
+ "dealname",
45
+ "dealstage",
46
+ "hs_lastmodifieddate",
47
+ "hs_object_id",
48
+ "pipeline",
49
+ ]
50
+
51
+ DEFAULT_COMPANY_PROPS = [
52
+ "createdate",
53
+ "domain",
54
+ "hs_lastmodifieddate",
55
+ "hs_object_id",
56
+ "name",
57
+ ]
58
+
59
+ DEFAULT_CONTACT_PROPS = [
60
+ "createdate",
61
+ "email",
62
+ "firstname",
63
+ "hs_object_id",
64
+ "lastmodifieddate",
65
+ "lastname",
66
+ ]
67
+
68
+ DEFAULT_TICKET_PROPS = [
69
+ "createdate",
70
+ "content",
71
+ "hs_lastmodifieddate",
72
+ "hs_object_id",
73
+ "hs_pipeline",
74
+ "hs_pipeline_stage",
75
+ "hs_ticket_category",
76
+ "hs_ticket_priority",
77
+ "subject",
78
+ ]
79
+
80
+ DEFAULT_PRODUCT_PROPS = [
81
+ "createdate",
82
+ "description",
83
+ "hs_lastmodifieddate",
84
+ "hs_object_id",
85
+ "name",
86
+ "price",
87
+ ]
88
+
89
+ DEFAULT_QUOTE_PROPS = [
90
+ "hs_createdate",
91
+ "hs_expiration_date",
92
+ "hs_lastmodifieddate",
93
+ "hs_object_id",
94
+ "hs_public_url_key",
95
+ "hs_status",
96
+ "hs_title",
97
+ ]
98
+
99
+ ALL = ("ALL",)
ingestr/src/sources.py CHANGED
@@ -1,17 +1,21 @@
1
1
  import base64
2
2
  import csv
3
3
  import json
4
+ from datetime import date
4
5
  from typing import Any, Callable, Optional
5
6
  from urllib.parse import parse_qs, urlparse
6
7
 
7
8
  import dlt
8
9
 
10
+ from ingestr.src.chess import source
9
11
  from ingestr.src.google_sheets import google_spreadsheet
10
12
  from ingestr.src.gorgias import gorgias_source
13
+ from ingestr.src.hubspot import hubspot
11
14
  from ingestr.src.mongodb import mongodb_collection
12
15
  from ingestr.src.notion import notion_databases
13
16
  from ingestr.src.shopify import shopify_source
14
17
  from ingestr.src.sql_database import sql_table
18
+ from ingestr.src.stripe_analytics import stripe_source
15
19
  from ingestr.src.table_definition import table_string_to_dataclass
16
20
 
17
21
 
@@ -295,3 +299,135 @@ class GoogleSheetsSource:
295
299
  range_names=[table_fields.dataset],
296
300
  get_named_ranges=False,
297
301
  )
302
+
303
+
304
+ class ChessSource:
305
+ def handles_incrementality(self) -> bool:
306
+ return True
307
+
308
+ # chess://?players=john,peter
309
+ def dlt_source(self, uri: str, table: str, **kwargs):
310
+ if kwargs.get("incremental_key"):
311
+ raise ValueError(
312
+ "Chess takes care of incrementality on its own, you should not provide incremental_key"
313
+ )
314
+
315
+ source_fields = urlparse(uri)
316
+ source_params = parse_qs(source_fields.query)
317
+ list_players = None
318
+ if "players" in source_params:
319
+ list_players = source_params["players"][0].split(",")
320
+ else:
321
+ list_players = [
322
+ "MagnusCarlsen",
323
+ "HikaruNakamura",
324
+ "ArjunErigaisi",
325
+ "IanNepomniachtchi",
326
+ ]
327
+
328
+ date_args = {}
329
+ start_date = kwargs.get("interval_start")
330
+ end_date = kwargs.get("interval_end")
331
+ if start_date and end_date:
332
+ if isinstance(start_date, date) and isinstance(end_date, date):
333
+ date_args["start_month"] = start_date.strftime("%Y/%m")
334
+ date_args["end_month"] = end_date.strftime("%Y/%m")
335
+
336
+ table_mapping = {
337
+ "profiles": "players_profiles",
338
+ "games": "players_games",
339
+ "archives": "players_archives",
340
+ }
341
+
342
+ if table not in table_mapping:
343
+ raise ValueError(
344
+ f"Resource '{table}' is not supported for Chess source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
345
+ )
346
+
347
+ return source(players=list_players, **date_args).with_resources(table_mapping[table])
348
+
349
+
350
+ class StripeAnalyticsSource:
351
+ def handles_incrementality(self) -> bool:
352
+ return True
353
+
354
+ def dlt_source(self, uri: str, table: str, **kwargs):
355
+ if kwargs.get("incremental_key"):
356
+ raise ValueError(
357
+ "Stripe takes care of incrementality on its own, you should not provide incremental_key"
358
+ )
359
+
360
+ api_key = None
361
+ source_field = urlparse(uri)
362
+ source_params = parse_qs(source_field.query)
363
+ api_key = source_params.get("api_key")
364
+
365
+ if not api_key:
366
+ raise ValueError("api_key in the URI is required to connect to Stripe")
367
+
368
+ endpoint = None
369
+ table = str.capitalize(table)
370
+
371
+ if table in [
372
+ "Subscription",
373
+ "Account",
374
+ "Coupon",
375
+ "Customer",
376
+ "Product",
377
+ "Price",
378
+ "BalanceTransaction",
379
+ "Invoice",
380
+ "Event",
381
+ ]:
382
+ endpoint = table
383
+ else:
384
+ raise ValueError(
385
+ f"Resource '{table}' is not supported for stripe source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
386
+ )
387
+
388
+ date_args = {}
389
+ if kwargs.get("interval_start"):
390
+ date_args["start_date"] = kwargs.get("interval_start")
391
+
392
+ if kwargs.get("interval_end"):
393
+ date_args["end_date"] = kwargs.get("interval_end")
394
+
395
+ return stripe_source(
396
+ endpoints=[
397
+ endpoint,
398
+ ],
399
+ stripe_secret_key=api_key[0],
400
+ **date_args,
401
+ ).with_resources(endpoint)
402
+
403
+
404
+ class HubspotSource:
405
+ def handles_incrementality(self) -> bool:
406
+ return True
407
+
408
+ # hubspot://?api_key=<api_key>
409
+ def dlt_source(self, uri: str, table: str, **kwargs):
410
+ if kwargs.get("incremental_key"):
411
+ raise ValueError(
412
+ "Hubspot takes care of incrementality on its own, you should not provide incremental_key"
413
+ )
414
+
415
+ api_key = None
416
+ source_parts = urlparse(uri)
417
+ source_parmas = parse_qs(source_parts.query)
418
+ api_key = source_parmas.get("api_key")
419
+
420
+ if not api_key:
421
+ raise ValueError("api_key in the URI is required to connect to Hubspot")
422
+
423
+ endpoint = None
424
+ if table in ["contacts", "companies", "deals", "tickets", "products", "quotes"]:
425
+ endpoint = table
426
+ else:
427
+ raise ValueError(
428
+ f"Resource '{table}' is not supported for Hubspot source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
429
+ )
430
+
431
+ return hubspot(
432
+ api_key=api_key[0],
433
+ ).with_resources(endpoint)
@@ -0,0 +1,99 @@
1
+ """This source uses Stripe API and dlt to load data such as Customer, Subscription, Event etc. to the database and to calculate the MRR and churn rate."""
2
+
3
+ from typing import Any, Dict, Generator, Iterable, Optional, Tuple
4
+
5
+ import dlt
6
+ import stripe
7
+ from dlt.sources import DltResource
8
+ from pendulum import DateTime
9
+
10
+ from .helpers import pagination, transform_date
11
+ from .settings import ENDPOINTS, INCREMENTAL_ENDPOINTS
12
+
13
+
14
+ @dlt.source
15
+ def stripe_source(
16
+ endpoints: Tuple[str, ...] = ENDPOINTS,
17
+ stripe_secret_key: str = dlt.secrets.value,
18
+ start_date: Optional[DateTime] = None,
19
+ end_date: Optional[DateTime] = None,
20
+ ) -> Iterable[DltResource]:
21
+ """
22
+ Retrieves data from the Stripe API for the specified endpoints.
23
+
24
+ For all endpoints, Stripe API responses do not provide the key "updated",
25
+ so in most cases, we are forced to load the data in 'replace' mode.
26
+ This source is suitable for all types of endpoints, including 'Events', 'Invoice', etc.
27
+ but these endpoints can also be loaded in incremental mode (see source incremental_stripe_source).
28
+
29
+ Args:
30
+ endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from. Defaults to most popular Stripe API endpoints.
31
+ stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
32
+ start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None.
33
+ end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None.
34
+
35
+ Returns:
36
+ Iterable[DltResource]: Resources with data that was created during the period greater than or equal to 'start_date' and less than 'end_date'.
37
+ """
38
+ stripe.api_key = stripe_secret_key
39
+ stripe.api_version = "2022-11-15"
40
+
41
+ def stripe_resource(
42
+ endpoint: str,
43
+ ) -> Generator[Dict[Any, Any], Any, None]:
44
+ yield from pagination(endpoint, start_date, end_date)
45
+
46
+ for endpoint in endpoints:
47
+ yield dlt.resource(
48
+ stripe_resource,
49
+ name=endpoint,
50
+ write_disposition="replace",
51
+ )(endpoint)
52
+
53
+
54
+ @dlt.source
55
+ def incremental_stripe_source(
56
+ endpoints: Tuple[str, ...] = INCREMENTAL_ENDPOINTS,
57
+ stripe_secret_key: str = dlt.secrets.value,
58
+ initial_start_date: Optional[DateTime] = None,
59
+ end_date: Optional[DateTime] = None,
60
+ ) -> Iterable[DltResource]:
61
+ """
62
+ As Stripe API does not include the "updated" key in its responses,
63
+ we are only able to perform incremental downloads from endpoints where all objects are uneditable.
64
+ This source yields the resources with incremental loading based on "append" mode.
65
+ You will load only the newest data without duplicating and without downloading a huge amount of data each time.
66
+
67
+ Args:
68
+ endpoints (tuple): A tuple of endpoint names to retrieve data from. Defaults to Stripe API endpoints with uneditable data.
69
+ stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
70
+ initial_start_date (Optional[DateTime]): An optional parameter that specifies the initial value for dlt.sources.incremental.
71
+ If parameter is not None, then load only data that were created after initial_start_date on the first run.
72
+ Defaults to None. Format: datetime(YYYY, MM, DD).
73
+ end_date (Optional[DateTime]): An optional end date to limit the data retrieved.
74
+ Defaults to None. Format: datetime(YYYY, MM, DD).
75
+ Returns:
76
+ Iterable[DltResource]: Resources with only that data has not yet been loaded.
77
+ """
78
+ stripe.api_key = stripe_secret_key
79
+ stripe.api_version = "2022-11-15"
80
+ start_date_unix = (
81
+ transform_date(initial_start_date) if initial_start_date is not None else -1
82
+ )
83
+
84
+ def incremental_resource(
85
+ endpoint: str,
86
+ created: Optional[Any] = dlt.sources.incremental(
87
+ "created", initial_value=start_date_unix
88
+ ),
89
+ ) -> Generator[Dict[Any, Any], Any, None]:
90
+ start_value = created.last_value
91
+ yield from pagination(endpoint, start_date=start_value, end_date=end_date)
92
+
93
+ for endpoint in endpoints:
94
+ yield dlt.resource(
95
+ incremental_resource,
96
+ name=endpoint,
97
+ write_disposition="append",
98
+ primary_key="id",
99
+ )(endpoint)
@@ -0,0 +1,68 @@
1
+ """Stripe analytics source helpers"""
2
+
3
+ from typing import Any, Dict, Iterable, Optional, Union
4
+
5
+ import stripe
6
+ from dlt.common import pendulum
7
+ from dlt.common.typing import TDataItem
8
+ from pendulum import DateTime
9
+
10
+
11
+ def pagination(
12
+ endpoint: str, start_date: Optional[Any] = None, end_date: Optional[Any] = None
13
+ ) -> Iterable[TDataItem]:
14
+ """
15
+ Retrieves data from an endpoint with pagination.
16
+
17
+ Args:
18
+ endpoint (str): The endpoint to retrieve data from.
19
+ start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to None.
20
+ end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to None.
21
+
22
+ Returns:
23
+ Iterable[TDataItem]: Data items retrieved from the endpoint.
24
+ """
25
+ starting_after = None
26
+ while True:
27
+ response = stripe_get_data(
28
+ endpoint,
29
+ start_date=start_date,
30
+ end_date=end_date,
31
+ starting_after=starting_after,
32
+ )
33
+
34
+ if len(response["data"]) > 0:
35
+ starting_after = response["data"][-1]["id"]
36
+ yield response["data"]
37
+
38
+ if not response["has_more"]:
39
+ break
40
+
41
+
42
+ def transform_date(date: Union[str, DateTime, int]) -> int:
43
+ if isinstance(date, str):
44
+ date = pendulum.from_format(date, "%Y-%m-%dT%H:%M:%SZ")
45
+ if isinstance(date, DateTime):
46
+ # convert to unix timestamp
47
+ date = int(date.timestamp())
48
+ return date
49
+
50
+
51
+ def stripe_get_data(
52
+ resource: str,
53
+ start_date: Optional[Any] = None,
54
+ end_date: Optional[Any] = None,
55
+ **kwargs: Any,
56
+ ) -> Dict[Any, Any]:
57
+ if start_date:
58
+ start_date = transform_date(start_date)
59
+ if end_date:
60
+ end_date = transform_date(end_date)
61
+
62
+ if resource == "Subscription":
63
+ kwargs.update({"status": "all"})
64
+
65
+ resource_dict = getattr(stripe, resource).list(
66
+ created={"gte": start_date, "lt": end_date}, limit=100, **kwargs
67
+ )
68
+ return dict(resource_dict)
@@ -0,0 +1,14 @@
1
+ """Stripe analytics source settings and constants"""
2
+
3
+ # the most popular endpoints
4
+ # Full list of the Stripe API endpoints you can find here: https://stripe.com/docs/api.
5
+ ENDPOINTS = (
6
+ "Subscription",
7
+ "Account",
8
+ "Coupon",
9
+ "Customer",
10
+ "Product",
11
+ "Price",
12
+ )
13
+ # possible incremental endpoints
14
+ INCREMENTAL_ENDPOINTS = ("Event", "Invoice", "BalanceTransaction")
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.7.4"
1
+ __version__ = "0.7.6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.7.4
3
+ Version: 0.7.6
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -38,6 +38,7 @@ Requires-Dist: sqlalchemy-hana==2.0.0
38
38
  Requires-Dist: sqlalchemy-redshift==0.8.14
39
39
  Requires-Dist: sqlalchemy2-stubs==0.0.2a38
40
40
  Requires-Dist: sqlalchemy==1.4.52
41
+ Requires-Dist: stripe==10.7.0
41
42
  Requires-Dist: tqdm==4.66.2
42
43
  Requires-Dist: typer==0.12.3
43
44
  Description-Content-Type: text/markdown
@@ -172,25 +173,40 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
172
173
  <tr>
173
174
  <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
174
175
  </tr>
176
+ <tr>
177
+ <td>Chess.com</td>
178
+ <td>✅</td>
179
+ <td>-</td>
180
+ </tr>
175
181
  <tr>
176
182
  <td>Gorgias</td>
177
183
  <td>✅</td>
178
- <td>❌</td>
184
+ <td>-</td>
179
185
  </tr>
180
186
  <tr>
181
187
  <td>Google Sheets</td>
182
188
  <td>✅</td>
183
- <td>❌</td>
189
+ <td>-</td>
190
+ </tr>
191
+ <tr>
192
+ <td>HubSpot</td>
193
+ <td>✅</td>
194
+ <td>-</td>
184
195
  </tr>
185
196
  <tr>
186
197
  <td>Notion</td>
187
198
  <td>✅</td>
188
- <td>❌</td>
199
+ <td>-</td>
189
200
  </tr>
190
201
  <tr>
191
202
  <td>Shopify</td>
192
203
  <td>✅</td>
193
- <td>❌</td>
204
+ <td>-</td>
205
+ </tr>
206
+ <tr>
207
+ <td>Stripe</td>
208
+ <td>✅</td>
209
+ <td>-</td>
194
210
  </tr>
195
211
  </table>
196
212
 
@@ -1,9 +1,12 @@
1
- ingestr/main.py,sha256=j0pscsPbeJ9oYJiTCvymneZwg4Lc7KaR3GAMX0GG4To,16432
1
+ ingestr/main.py,sha256=0J_bMCWLZT0tdRW0df8iKw05In55LJ_vUoN3X2TgXlc,17183
2
2
  ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
3
- ingestr/src/factory.py,sha256=XuT_8LvWd7gBxOjoD_NiG-jtPvHNQ9nqOeoCJzhRb6Y,3630
4
- ingestr/src/sources.py,sha256=QbSvECvGbHJKOpE9_dbq11343pA5ajsS9BPPPab1ivw,10007
3
+ ingestr/src/factory.py,sha256=Fp_MaeiAhU7IHT6RMLTEhvXgmDyu6j1IHGnjC4qsPLI,3939
4
+ ingestr/src/sources.py,sha256=7dy_KvoGI9vZSJwpkW3iNh8M1tjh1pBSz8qLpo0GTIo,14589
5
5
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
6
- ingestr/src/version.py,sha256=A6fZ_oURo3l_Fa_K29LgV21A4Onqu3NquwGYzL05E1Y,22
6
+ ingestr/src/version.py,sha256=wu65dmVM9fKR1rBHH263ls8Ca2FZzb0ejYcrP_Ld0iY,22
7
+ ingestr/src/chess/__init__.py,sha256=PaxT2DObudOGlhyoENE5LjR6rTdsxiqKKpAZeyzVLCA,6791
8
+ ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
9
+ ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
7
10
  ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
8
11
  ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
9
12
  ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
@@ -11,6 +14,9 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
11
14
  ingestr/src/google_sheets/helpers/data_processing.py,sha256=WYO6z4XjGcG0Hat2J2enb-eLX5mSNVb2vaqRE83FBWU,11000
12
15
  ingestr/src/gorgias/__init__.py,sha256=BzX9X1Yc_1Mch6NP1pn26hjRIiaadErgHxkdJHw4P3o,21227
13
16
  ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
17
+ ingestr/src/hubspot/__init__.py,sha256=eSD_lEIEd16YijAtUATFG8FGO8YGPm-MtAk94KKsx6o,9740
18
+ ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KFE,6727
19
+ ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
14
20
  ingestr/src/mongodb/__init__.py,sha256=E7SDeCyYNkYZZ_RFhjCRDZUGpKtaxpPG5sFSmKJV62U,4336
15
21
  ingestr/src/mongodb/helpers.py,sha256=80vtAeNyUn1iMN0CeLrTlKqYN6I6fHF81Kd2UuE8Kns,5653
16
22
  ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
@@ -27,6 +33,9 @@ ingestr/src/sql_database/arrow_helpers.py,sha256=yze1X3A9nUQA4HeuFDDWrfJVkCq8Uo5
27
33
  ingestr/src/sql_database/helpers.py,sha256=6o8e2_8MIuj3qlo40a2E6ns3gyK18ei1jCePONrMUjI,10191
28
34
  ingestr/src/sql_database/override.py,sha256=xbKGDztCzvrhJ5kJTXERal3LA56bEeVug4_rrTs8DgA,333
29
35
  ingestr/src/sql_database/schema_types.py,sha256=qXTanvFPE8wMCSDzQWPDi5yqaO-llfrFXjiGJALI4NA,5013
36
+ ingestr/src/stripe_analytics/__init__.py,sha256=8yy6i4DAhUqY4ZForetQ0DWc_YQrY0FBH6yk0Z3m-Mw,4493
37
+ ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
38
+ ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
30
39
  ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
31
40
  ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
32
41
  ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
@@ -37,8 +46,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
37
46
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
38
47
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
39
48
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
40
- ingestr-0.7.4.dist-info/METADATA,sha256=VLL2Um1BU3x6Oz89Gx6d48O9ukAk4Ro7uy2dFIPTIo8,5829
41
- ingestr-0.7.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
42
- ingestr-0.7.4.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
43
- ingestr-0.7.4.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
44
- ingestr-0.7.4.dist-info/RECORD,,
49
+ ingestr-0.7.6.dist-info/METADATA,sha256=5QLxKZE65TQH_z6dH0g52FZmseLc2hIJBipDwcuCg5g,6104
50
+ ingestr-0.7.6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
51
+ ingestr-0.7.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
52
+ ingestr-0.7.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
53
+ ingestr-0.7.6.dist-info/RECORD,,