ingestr 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +23 -2
- ingestr/src/chess/__init__.py +166 -0
- ingestr/src/chess/helpers.py +21 -0
- ingestr/src/chess/settings.py +4 -0
- ingestr/src/factory.py +9 -0
- ingestr/src/hubspot/__init__.py +281 -0
- ingestr/src/hubspot/helpers.py +188 -0
- ingestr/src/hubspot/settings.py +99 -0
- ingestr/src/sources.py +136 -0
- ingestr/src/stripe_analytics/__init__.py +99 -0
- ingestr/src/stripe_analytics/helpers.py +68 -0
- ingestr/src/stripe_analytics/settings.py +14 -0
- ingestr/src/version.py +1 -1
- {ingestr-0.7.4.dist-info → ingestr-0.7.6.dist-info}/METADATA +21 -5
- {ingestr-0.7.4.dist-info → ingestr-0.7.6.dist-info}/RECORD +18 -9
- {ingestr-0.7.4.dist-info → ingestr-0.7.6.dist-info}/WHEEL +0 -0
- {ingestr-0.7.4.dist-info → ingestr-0.7.6.dist-info}/entry_points.txt +0 -0
- {ingestr-0.7.4.dist-info → ingestr-0.7.6.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from enum import Enum
|
|
4
|
+
import tempfile
|
|
4
5
|
from typing import Optional
|
|
5
6
|
|
|
6
7
|
import dlt
|
|
@@ -236,6 +237,13 @@ def ingest(
|
|
|
236
237
|
envvar="SCHEMA_NAMING",
|
|
237
238
|
),
|
|
238
239
|
] = SchemaNaming.default, # type: ignore
|
|
240
|
+
pipelines_dir: Annotated[
|
|
241
|
+
Optional[str],
|
|
242
|
+
typer.Option(
|
|
243
|
+
help="The path to store dlt-related pipeline metadata. By default, ingestr will create a temporary directory and delete it after the execution is done in order to make retries stateless.",
|
|
244
|
+
envvar="PIPELINES_DIR",
|
|
245
|
+
),
|
|
246
|
+
] = None, # type: ignore
|
|
239
247
|
):
|
|
240
248
|
track(
|
|
241
249
|
"command_triggered",
|
|
@@ -280,13 +288,18 @@ def ingest(
|
|
|
280
288
|
if progress == Progress.log:
|
|
281
289
|
progressInstance = LogCollector(dump_system_stats=False)
|
|
282
290
|
|
|
291
|
+
is_pipelines_dir_temp = False
|
|
292
|
+
if pipelines_dir is None:
|
|
293
|
+
pipelines_dir = tempfile.mkdtemp()
|
|
294
|
+
is_pipelines_dir_temp = True
|
|
295
|
+
|
|
283
296
|
pipeline = dlt.pipeline(
|
|
284
297
|
pipeline_name=m.hexdigest(),
|
|
285
298
|
destination=destination.dlt_dest(
|
|
286
299
|
uri=dest_uri,
|
|
287
300
|
),
|
|
288
301
|
progress=progressInstance,
|
|
289
|
-
pipelines_dir=
|
|
302
|
+
pipelines_dir=pipelines_dir,
|
|
290
303
|
refresh="drop_resources" if full_refresh else None,
|
|
291
304
|
)
|
|
292
305
|
|
|
@@ -362,6 +375,8 @@ def ingest(
|
|
|
362
375
|
if incremental_strategy != IncrementalStrategy.none:
|
|
363
376
|
write_disposition = incremental_strategy.value
|
|
364
377
|
|
|
378
|
+
start_time = datetime.now()
|
|
379
|
+
|
|
365
380
|
run_info: LoadInfo = pipeline.run(
|
|
366
381
|
dlt_source,
|
|
367
382
|
**destination.dlt_run_params(
|
|
@@ -389,11 +404,17 @@ def ingest(
|
|
|
389
404
|
|
|
390
405
|
destination.post_load()
|
|
391
406
|
|
|
407
|
+
end_time = datetime.now()
|
|
392
408
|
elapsedHuman = ""
|
|
393
409
|
if run_info.started_at:
|
|
394
|
-
elapsed =
|
|
410
|
+
elapsed = end_time - start_time
|
|
395
411
|
elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
|
|
396
412
|
|
|
413
|
+
# remove the pipelines_dir folder if it was created by ingestr
|
|
414
|
+
if is_pipelines_dir_temp:
|
|
415
|
+
import shutil
|
|
416
|
+
shutil.rmtree(pipelines_dir)
|
|
417
|
+
|
|
397
418
|
print(
|
|
398
419
|
f"[bold green]Successfully finished loading data from '{factory.source_scheme}' to '{factory.destination_scheme}' {elapsedHuman} [/bold green]"
|
|
399
420
|
)
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""A source loading player profiles and games from chess.com api"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Callable, Dict, Iterator, List, Sequence
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.common import pendulum
|
|
7
|
+
from dlt.common.typing import TDataItem
|
|
8
|
+
from dlt.sources import DltResource
|
|
9
|
+
from dlt.sources.helpers import requests
|
|
10
|
+
|
|
11
|
+
from .helpers import get_path_with_retry, get_url_with_retry, validate_month_string
|
|
12
|
+
from .settings import UNOFFICIAL_CHESS_API_URL
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dlt.source(name="chess")
|
|
16
|
+
def source(
|
|
17
|
+
players: List[str], start_month: str = None, end_month: str = None
|
|
18
|
+
) -> Sequence[DltResource]:
|
|
19
|
+
"""
|
|
20
|
+
A dlt source for the chess.com api. It groups several resources (in this case chess.com API endpoints) containing
|
|
21
|
+
various types of data: user profiles or chess match results
|
|
22
|
+
Args:
|
|
23
|
+
players (List[str]): A list of the player usernames for which to get the data.
|
|
24
|
+
start_month (str, optional): Filters out all the matches happening before `start_month`. Defaults to None.
|
|
25
|
+
end_month (str, optional): Filters out all the matches happening after `end_month`. Defaults to None.
|
|
26
|
+
Returns:
|
|
27
|
+
Sequence[DltResource]: A sequence of resources that can be selected from including players_profiles,
|
|
28
|
+
players_archives, players_games, players_online_status
|
|
29
|
+
"""
|
|
30
|
+
return (
|
|
31
|
+
players_profiles(players),
|
|
32
|
+
players_archives(players),
|
|
33
|
+
players_games(players, start_month=start_month, end_month=end_month),
|
|
34
|
+
players_online_status(players),
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dlt.resource(
|
|
39
|
+
write_disposition="replace",
|
|
40
|
+
columns={
|
|
41
|
+
"last_online": {"data_type": "timestamp"},
|
|
42
|
+
"joined": {"data_type": "timestamp"},
|
|
43
|
+
},
|
|
44
|
+
)
|
|
45
|
+
def players_profiles(players: List[str]) -> Iterator[TDataItem]:
|
|
46
|
+
"""
|
|
47
|
+
Yields player profiles for a list of player usernames.
|
|
48
|
+
Args:
|
|
49
|
+
players (List[str]): List of player usernames to retrieve profiles for.
|
|
50
|
+
Yields:
|
|
51
|
+
Iterator[TDataItem]: An iterator over player profiles data.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
# get archives in parallel by decorating the http request with defer
|
|
55
|
+
@dlt.defer
|
|
56
|
+
def _get_profile(username: str) -> TDataItem:
|
|
57
|
+
return get_path_with_retry(f"player/{username}")
|
|
58
|
+
|
|
59
|
+
for username in players:
|
|
60
|
+
yield _get_profile(username)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dlt.resource(write_disposition="replace", selected=False)
|
|
64
|
+
def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
|
|
65
|
+
"""
|
|
66
|
+
Yields url to game archives for specified players.
|
|
67
|
+
Args:
|
|
68
|
+
players (List[str]): List of player usernames to retrieve archives for.
|
|
69
|
+
Yields:
|
|
70
|
+
Iterator[List[TDataItem]]: An iterator over list of player archive data.
|
|
71
|
+
"""
|
|
72
|
+
for username in players:
|
|
73
|
+
data = get_path_with_retry(f"player/{username}/games/archives")
|
|
74
|
+
yield data.get("archives", [])
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dlt.resource(
|
|
78
|
+
write_disposition="append", columns={"end_time": {"data_type": "timestamp"}}
|
|
79
|
+
)
|
|
80
|
+
def players_games(
|
|
81
|
+
players: List[str], start_month: str = None, end_month: str = None
|
|
82
|
+
) -> Iterator[Callable[[], List[TDataItem]]]:
|
|
83
|
+
"""
|
|
84
|
+
Yields `players` games that happened between `start_month` and `end_month`.
|
|
85
|
+
Args:
|
|
86
|
+
players (List[str]): List of player usernames to retrieve games for.
|
|
87
|
+
start_month (str, optional): The starting month in the format "YYYY/MM". Defaults to None.
|
|
88
|
+
end_month (str, optional): The ending month in the format "YYYY/MM". Defaults to None.
|
|
89
|
+
Yields:
|
|
90
|
+
Iterator[Callable[[], List[TDataItem]]]: An iterator over callables that return a list of games for each player.
|
|
91
|
+
""" # do a simple validation to prevent common mistakes in month format
|
|
92
|
+
validate_month_string(start_month)
|
|
93
|
+
validate_month_string(end_month)
|
|
94
|
+
|
|
95
|
+
# get a list of already checked archives
|
|
96
|
+
# from your point of view, the state is python dictionary that will have the same content the next time this function is called
|
|
97
|
+
checked_archives = dlt.current.resource_state().setdefault("archives", [])
|
|
98
|
+
# get player archives, note that you can call the resource like any other function and just iterate it like a list
|
|
99
|
+
archives = players_archives(players)
|
|
100
|
+
|
|
101
|
+
# get archives in parallel by decorating the http request with defer
|
|
102
|
+
@dlt.defer
|
|
103
|
+
def _get_archive(url: str) -> List[TDataItem]:
|
|
104
|
+
try:
|
|
105
|
+
games = get_url_with_retry(url).get("games", [])
|
|
106
|
+
return games # type: ignore
|
|
107
|
+
except requests.HTTPError as http_err:
|
|
108
|
+
# sometimes archives are not available and the error seems to be permanent
|
|
109
|
+
if http_err.response.status_code == 404:
|
|
110
|
+
return []
|
|
111
|
+
raise
|
|
112
|
+
|
|
113
|
+
# enumerate the archives
|
|
114
|
+
for url in archives:
|
|
115
|
+
# the `url` format is https://api.chess.com/pub/player/{username}/games/{YYYY}/{MM}
|
|
116
|
+
if start_month and url[-7:] < start_month:
|
|
117
|
+
continue
|
|
118
|
+
if end_month and url[-7:] > end_month:
|
|
119
|
+
continue
|
|
120
|
+
# do not download archive again
|
|
121
|
+
if url in checked_archives:
|
|
122
|
+
continue
|
|
123
|
+
checked_archives.append(url)
|
|
124
|
+
# get the filtered archive
|
|
125
|
+
yield _get_archive(url)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@dlt.resource(write_disposition="append")
|
|
129
|
+
def players_online_status(players: List[str]) -> Iterator[TDataItem]:
|
|
130
|
+
"""
|
|
131
|
+
Returns current online status for a list of players.
|
|
132
|
+
Args:
|
|
133
|
+
players (List[str]): List of player usernames to check online status for.
|
|
134
|
+
Yields:
|
|
135
|
+
Iterator[TDataItem]: An iterator over the online status of each player.
|
|
136
|
+
"""
|
|
137
|
+
# we'll use unofficial endpoint to get online status, the official seems to be removed
|
|
138
|
+
for player in players:
|
|
139
|
+
status = get_url_with_retry(f"{UNOFFICIAL_CHESS_API_URL}user/popup/{player}")
|
|
140
|
+
# return just relevant selection
|
|
141
|
+
yield {
|
|
142
|
+
"username": player,
|
|
143
|
+
"onlineStatus": status["onlineStatus"],
|
|
144
|
+
"lastLoginDate": status["lastLoginDate"],
|
|
145
|
+
"check_time": pendulum.now(), # dlt can deal with native python dates
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dlt.source
|
|
150
|
+
def chess_dlt_config_example(
|
|
151
|
+
secret_str: str = dlt.secrets.value,
|
|
152
|
+
secret_dict: Dict[str, Any] = dlt.secrets.value,
|
|
153
|
+
config_int: int = dlt.config.value,
|
|
154
|
+
) -> DltResource:
|
|
155
|
+
"""
|
|
156
|
+
An example of a source that uses dlt to provide secrets and config values.
|
|
157
|
+
Args:
|
|
158
|
+
secret_str (str, optional): Secret string provided by dlt.secrets.value. Defaults to dlt.secrets.value.
|
|
159
|
+
secret_dict (Dict[str, Any], optional): Secret dictionary provided by dlt.secrets.value. Defaults to dlt.secrets.value.
|
|
160
|
+
config_int (int, optional): Config integer provided by dlt.config.value. Defaults to dlt.config.value.
|
|
161
|
+
Returns:
|
|
162
|
+
DltResource: Returns a resource yielding the configured values.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
# returns a resource yielding the configured values - it is just a test
|
|
166
|
+
return dlt.resource([secret_str, secret_dict, config_int], name="config_values")
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Chess source helpers"""
|
|
2
|
+
|
|
3
|
+
from dlt.common.typing import StrAny
|
|
4
|
+
from dlt.sources.helpers import requests
|
|
5
|
+
|
|
6
|
+
from .settings import OFFICIAL_CHESS_API_URL
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_url_with_retry(url: str) -> StrAny:
|
|
10
|
+
r = requests.get(url)
|
|
11
|
+
return r.json() # type: ignore
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_path_with_retry(path: str) -> StrAny:
|
|
15
|
+
return get_url_with_retry(f"{OFFICIAL_CHESS_API_URL}{path}")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def validate_month_string(string: str) -> None:
|
|
19
|
+
"""Validates that the string is in YYYY/MM format"""
|
|
20
|
+
if string and string[4] != "/":
|
|
21
|
+
raise ValueError(string)
|
ingestr/src/factory.py
CHANGED
|
@@ -15,13 +15,16 @@ from ingestr.src.destinations import (
|
|
|
15
15
|
SynapseDestination,
|
|
16
16
|
)
|
|
17
17
|
from ingestr.src.sources import (
|
|
18
|
+
ChessSource,
|
|
18
19
|
GoogleSheetsSource,
|
|
19
20
|
GorgiasSource,
|
|
21
|
+
HubspotSource,
|
|
20
22
|
LocalCsvSource,
|
|
21
23
|
MongoDbSource,
|
|
22
24
|
NotionSource,
|
|
23
25
|
ShopifySource,
|
|
24
26
|
SqlSource,
|
|
27
|
+
StripeAnalyticsSource,
|
|
25
28
|
)
|
|
26
29
|
|
|
27
30
|
SQL_SOURCE_SCHEMES = [
|
|
@@ -102,6 +105,12 @@ class SourceDestinationFactory:
|
|
|
102
105
|
return ShopifySource()
|
|
103
106
|
elif self.source_scheme == "gorgias":
|
|
104
107
|
return GorgiasSource()
|
|
108
|
+
elif self.source_scheme == "chess":
|
|
109
|
+
return ChessSource()
|
|
110
|
+
elif self.source_scheme == "stripe":
|
|
111
|
+
return StripeAnalyticsSource()
|
|
112
|
+
elif self.source_scheme == "hubspot":
|
|
113
|
+
return HubspotSource()
|
|
105
114
|
else:
|
|
106
115
|
raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
|
|
107
116
|
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a module that provides a DLT source to retrieve data from multiple endpoints of the HubSpot API using a specified API key. The retrieved data is returned as a tuple of Dlt resources, one for each endpoint.
|
|
3
|
+
|
|
4
|
+
The source retrieves data from the following endpoints:
|
|
5
|
+
- CRM Companies
|
|
6
|
+
- CRM Contacts
|
|
7
|
+
- CRM Deals
|
|
8
|
+
- CRM Tickets
|
|
9
|
+
- CRM Products
|
|
10
|
+
- CRM Quotes
|
|
11
|
+
- Web Analytics Events
|
|
12
|
+
|
|
13
|
+
For each endpoint, a resource and transformer function are defined to retrieve data and transform it to a common format.
|
|
14
|
+
The resource functions yield the raw data retrieved from the API, while the transformer functions are used to retrieve
|
|
15
|
+
additional information from the Web Analytics Events endpoint.
|
|
16
|
+
|
|
17
|
+
The source also supports enabling Web Analytics Events for each endpoint by setting the corresponding enable flag to True.
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
To retrieve data from all endpoints, use the following code:
|
|
21
|
+
|
|
22
|
+
python
|
|
23
|
+
|
|
24
|
+
>>> resources = hubspot(api_key="your_api_key")
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from typing import Any, Dict, Iterator, List, Literal, Sequence
|
|
28
|
+
from urllib.parse import quote
|
|
29
|
+
|
|
30
|
+
import dlt
|
|
31
|
+
from dlt.common import pendulum
|
|
32
|
+
from dlt.common.typing import TDataItems
|
|
33
|
+
from dlt.sources import DltResource
|
|
34
|
+
|
|
35
|
+
from .helpers import _get_property_names, fetch_data, fetch_property_history
|
|
36
|
+
from .settings import (
|
|
37
|
+
ALL,
|
|
38
|
+
CRM_OBJECT_ENDPOINTS,
|
|
39
|
+
DEFAULT_COMPANY_PROPS,
|
|
40
|
+
DEFAULT_CONTACT_PROPS,
|
|
41
|
+
DEFAULT_DEAL_PROPS,
|
|
42
|
+
DEFAULT_PRODUCT_PROPS,
|
|
43
|
+
DEFAULT_QUOTE_PROPS,
|
|
44
|
+
DEFAULT_TICKET_PROPS,
|
|
45
|
+
OBJECT_TYPE_PLURAL,
|
|
46
|
+
STARTDATE,
|
|
47
|
+
WEB_ANALYTICS_EVENTS_ENDPOINT,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
THubspotObjectType = Literal["company", "contact", "deal", "ticket", "product", "quote"]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dlt.source(name="hubspot")
|
|
54
|
+
def hubspot(
|
|
55
|
+
api_key: str = dlt.secrets.value,
|
|
56
|
+
include_history: bool = False,
|
|
57
|
+
include_custom_props: bool = True,
|
|
58
|
+
) -> Sequence[DltResource]:
|
|
59
|
+
"""
|
|
60
|
+
A DLT source that retrieves data from the HubSpot API using the
|
|
61
|
+
specified API key.
|
|
62
|
+
|
|
63
|
+
This function retrieves data for several HubSpot API endpoints,
|
|
64
|
+
including companies, contacts, deals, tickets, products and web
|
|
65
|
+
analytics events. It returns a tuple of Dlt resources, one for
|
|
66
|
+
each endpoint.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
api_key (Optional[str]):
|
|
70
|
+
The API key used to authenticate with the HubSpot API. Defaults
|
|
71
|
+
to dlt.secrets.value.
|
|
72
|
+
include_history (Optional[bool]):
|
|
73
|
+
Whether to load history of property changes along with entities.
|
|
74
|
+
The history entries are loaded to separate tables.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Sequence[DltResource]: Dlt resources, one for each HubSpot API endpoint.
|
|
78
|
+
|
|
79
|
+
Notes:
|
|
80
|
+
This function uses the `fetch_data` function to retrieve data from the
|
|
81
|
+
HubSpot CRM API. The API key is passed to `fetch_data` as the
|
|
82
|
+
`api_key` argument.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
@dlt.resource(name="companies", write_disposition="replace")
|
|
86
|
+
def companies(
|
|
87
|
+
api_key: str = api_key,
|
|
88
|
+
include_history: bool = include_history,
|
|
89
|
+
props: Sequence[str] = DEFAULT_COMPANY_PROPS,
|
|
90
|
+
include_custom_props: bool = include_custom_props,
|
|
91
|
+
) -> Iterator[TDataItems]:
|
|
92
|
+
"""Hubspot companies resource"""
|
|
93
|
+
yield from crm_objects(
|
|
94
|
+
"company",
|
|
95
|
+
api_key,
|
|
96
|
+
include_history=include_history,
|
|
97
|
+
props=props,
|
|
98
|
+
include_custom_props=include_custom_props,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
@dlt.resource(name="contacts", write_disposition="replace")
|
|
102
|
+
def contacts(
|
|
103
|
+
api_key: str = api_key,
|
|
104
|
+
include_history: bool = include_history,
|
|
105
|
+
props: Sequence[str] = DEFAULT_CONTACT_PROPS,
|
|
106
|
+
include_custom_props: bool = include_custom_props,
|
|
107
|
+
) -> Iterator[TDataItems]:
|
|
108
|
+
"""Hubspot contacts resource"""
|
|
109
|
+
yield from crm_objects(
|
|
110
|
+
"contact",
|
|
111
|
+
api_key,
|
|
112
|
+
include_history,
|
|
113
|
+
props,
|
|
114
|
+
include_custom_props,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
@dlt.resource(name="deals", write_disposition="replace")
|
|
118
|
+
def deals(
|
|
119
|
+
api_key: str = api_key,
|
|
120
|
+
include_history: bool = include_history,
|
|
121
|
+
props: Sequence[str] = DEFAULT_DEAL_PROPS,
|
|
122
|
+
include_custom_props: bool = include_custom_props,
|
|
123
|
+
) -> Iterator[TDataItems]:
|
|
124
|
+
"""Hubspot deals resource"""
|
|
125
|
+
yield from crm_objects(
|
|
126
|
+
"deal",
|
|
127
|
+
api_key,
|
|
128
|
+
include_history,
|
|
129
|
+
props,
|
|
130
|
+
include_custom_props,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
@dlt.resource(name="tickets", write_disposition="replace")
|
|
134
|
+
def tickets(
|
|
135
|
+
api_key: str = api_key,
|
|
136
|
+
include_history: bool = include_history,
|
|
137
|
+
props: Sequence[str] = DEFAULT_TICKET_PROPS,
|
|
138
|
+
include_custom_props: bool = include_custom_props,
|
|
139
|
+
) -> Iterator[TDataItems]:
|
|
140
|
+
"""Hubspot tickets resource"""
|
|
141
|
+
yield from crm_objects(
|
|
142
|
+
"ticket",
|
|
143
|
+
api_key,
|
|
144
|
+
include_history,
|
|
145
|
+
props,
|
|
146
|
+
include_custom_props,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
@dlt.resource(name="products", write_disposition="replace")
|
|
150
|
+
def products(
|
|
151
|
+
api_key: str = api_key,
|
|
152
|
+
include_history: bool = include_history,
|
|
153
|
+
props: Sequence[str] = DEFAULT_PRODUCT_PROPS,
|
|
154
|
+
include_custom_props: bool = include_custom_props,
|
|
155
|
+
) -> Iterator[TDataItems]:
|
|
156
|
+
"""Hubspot products resource"""
|
|
157
|
+
yield from crm_objects(
|
|
158
|
+
"product",
|
|
159
|
+
api_key,
|
|
160
|
+
include_history,
|
|
161
|
+
props,
|
|
162
|
+
include_custom_props,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
@dlt.resource(name="quotes", write_disposition="replace")
|
|
166
|
+
def quotes(
|
|
167
|
+
api_key: str = api_key,
|
|
168
|
+
include_history: bool = include_history,
|
|
169
|
+
props: Sequence[str] = DEFAULT_QUOTE_PROPS,
|
|
170
|
+
include_custom_props: bool = include_custom_props,
|
|
171
|
+
) -> Iterator[TDataItems]:
|
|
172
|
+
"""Hubspot quotes resource"""
|
|
173
|
+
yield from crm_objects(
|
|
174
|
+
"quote",
|
|
175
|
+
api_key,
|
|
176
|
+
include_history,
|
|
177
|
+
props,
|
|
178
|
+
include_custom_props,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return companies, contacts, deals, tickets, products, quotes
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def crm_objects(
|
|
185
|
+
object_type: str,
|
|
186
|
+
api_key: str = dlt.secrets.value,
|
|
187
|
+
include_history: bool = False,
|
|
188
|
+
props: Sequence[str] = None,
|
|
189
|
+
include_custom_props: bool = True,
|
|
190
|
+
) -> Iterator[TDataItems]:
|
|
191
|
+
"""Building blocks for CRM resources."""
|
|
192
|
+
if props == ALL:
|
|
193
|
+
props = list(_get_property_names(api_key, object_type))
|
|
194
|
+
|
|
195
|
+
if include_custom_props:
|
|
196
|
+
all_props = _get_property_names(api_key, object_type)
|
|
197
|
+
custom_props = [prop for prop in all_props if not prop.startswith("hs_")]
|
|
198
|
+
props = props + custom_props # type: ignore
|
|
199
|
+
|
|
200
|
+
props = ",".join(sorted(list(set(props))))
|
|
201
|
+
|
|
202
|
+
if len(props) > 2000:
|
|
203
|
+
raise ValueError(
|
|
204
|
+
"Your request to Hubspot is too long to process. "
|
|
205
|
+
"Maximum allowed query length is 2000 symbols, while "
|
|
206
|
+
f"your list of properties `{props[:200]}`... is {len(props)} "
|
|
207
|
+
"symbols long. Use the `props` argument of the resource to "
|
|
208
|
+
"set the list of properties to extract from the endpoint."
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
params = {"properties": props, "limit": 100}
|
|
212
|
+
|
|
213
|
+
yield from fetch_data(CRM_OBJECT_ENDPOINTS[object_type], api_key, params=params)
|
|
214
|
+
if include_history:
|
|
215
|
+
# Get history separately, as requesting both all properties and history together
|
|
216
|
+
# is likely to hit hubspot's URL length limit
|
|
217
|
+
for history_entries in fetch_property_history(
|
|
218
|
+
CRM_OBJECT_ENDPOINTS[object_type],
|
|
219
|
+
api_key,
|
|
220
|
+
props,
|
|
221
|
+
):
|
|
222
|
+
yield dlt.mark.with_table_name(
|
|
223
|
+
history_entries,
|
|
224
|
+
OBJECT_TYPE_PLURAL[object_type] + "_property_history",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@dlt.resource
|
|
229
|
+
def hubspot_events_for_objects(
|
|
230
|
+
object_type: THubspotObjectType,
|
|
231
|
+
object_ids: List[str],
|
|
232
|
+
api_key: str = dlt.secrets.value,
|
|
233
|
+
start_date: pendulum.DateTime = STARTDATE,
|
|
234
|
+
) -> DltResource:
|
|
235
|
+
"""
|
|
236
|
+
A standalone DLT resources that retrieves web analytics events from the HubSpot API for a particular object type and list of object ids.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
object_type(THubspotObjectType, required): One of the hubspot object types see definition of THubspotObjectType literal
|
|
240
|
+
object_ids: (List[THubspotObjectType], required): List of object ids to track events
|
|
241
|
+
api_key (str, optional): The API key used to authenticate with the HubSpot API. Defaults to dlt.secrets.value.
|
|
242
|
+
start_date (datetime, optional): The initial date time from which start getting events, default to STARTDATE
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
incremental dlt resource to track events for objects from the list
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
end_date = pendulum.now().isoformat()
|
|
249
|
+
name = object_type + "_events"
|
|
250
|
+
|
|
251
|
+
def get_web_analytics_events(
|
|
252
|
+
occurred_at: dlt.sources.incremental[str],
|
|
253
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
254
|
+
"""
|
|
255
|
+
A helper function that retrieves web analytics events for a given object type from the HubSpot API.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
object_type (str): The type of object for which to retrieve web analytics events.
|
|
259
|
+
|
|
260
|
+
Yields:
|
|
261
|
+
dict: A dictionary representing a web analytics event.
|
|
262
|
+
"""
|
|
263
|
+
for object_id in object_ids:
|
|
264
|
+
yield from fetch_data(
|
|
265
|
+
WEB_ANALYTICS_EVENTS_ENDPOINT.format(
|
|
266
|
+
objectType=object_type,
|
|
267
|
+
objectId=object_id,
|
|
268
|
+
occurredAfter=quote(occurred_at.last_value),
|
|
269
|
+
occurredBefore=quote(end_date),
|
|
270
|
+
),
|
|
271
|
+
api_key=api_key,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
return dlt.resource(
|
|
275
|
+
get_web_analytics_events,
|
|
276
|
+
name=name,
|
|
277
|
+
primary_key="id",
|
|
278
|
+
write_disposition="append",
|
|
279
|
+
selected=True,
|
|
280
|
+
table_name=lambda e: name + "_" + str(e["eventType"]),
|
|
281
|
+
)(dlt.sources.incremental("occurredAt", initial_value=start_date.isoformat()))
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""Hubspot source helpers"""
|
|
2
|
+
|
|
3
|
+
import urllib.parse
|
|
4
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
5
|
+
|
|
6
|
+
from dlt.sources.helpers import requests
|
|
7
|
+
|
|
8
|
+
from .settings import OBJECT_TYPE_PLURAL
|
|
9
|
+
|
|
10
|
+
BASE_URL = "https://api.hubapi.com/"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_url(endpoint: str) -> str:
|
|
14
|
+
"""Get absolute hubspot endpoint URL"""
|
|
15
|
+
return urllib.parse.urljoin(BASE_URL, endpoint)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _get_headers(api_key: str) -> Dict[str, str]:
|
|
19
|
+
"""
|
|
20
|
+
Return a dictionary of HTTP headers to use for API requests, including the specified API key.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
api_key (str): The API key to use for authentication, as a string.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
dict: A dictionary of HTTP headers to include in API requests, with the `Authorization` header
|
|
27
|
+
set to the specified API key in the format `Bearer {api_key}`.
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
# Construct the dictionary of HTTP headers to use for API requests
|
|
31
|
+
return dict(authorization=f"Bearer {api_key}")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def extract_property_history(objects: List[Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
|
|
35
|
+
for item in objects:
|
|
36
|
+
history = item.get("propertiesWithHistory")
|
|
37
|
+
if not history:
|
|
38
|
+
return
|
|
39
|
+
# Yield a flat list of property history entries
|
|
40
|
+
for key, changes in history.items():
|
|
41
|
+
if not changes:
|
|
42
|
+
continue
|
|
43
|
+
for entry in changes:
|
|
44
|
+
yield {"object_id": item["id"], "property_name": key, **entry}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def fetch_property_history(
|
|
48
|
+
endpoint: str,
|
|
49
|
+
api_key: str,
|
|
50
|
+
props: str,
|
|
51
|
+
params: Optional[Dict[str, Any]] = None,
|
|
52
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
53
|
+
"""Fetch property history from the given CRM endpoint.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
endpoint: The endpoint to fetch data from, as a string.
|
|
57
|
+
api_key: The API key to use for authentication, as a string.
|
|
58
|
+
props: A comma separated list of properties to retrieve the history for
|
|
59
|
+
params: Optional dict of query params to include in the request
|
|
60
|
+
|
|
61
|
+
Yields:
|
|
62
|
+
List of property history entries (dicts)
|
|
63
|
+
"""
|
|
64
|
+
# Construct the URL and headers for the API request
|
|
65
|
+
url = get_url(endpoint)
|
|
66
|
+
headers = _get_headers(api_key)
|
|
67
|
+
|
|
68
|
+
params = dict(params or {})
|
|
69
|
+
params["propertiesWithHistory"] = props
|
|
70
|
+
params["limit"] = 50
|
|
71
|
+
# Make the API request
|
|
72
|
+
r = requests.get(url, headers=headers, params=params)
|
|
73
|
+
# Parse the API response and yield the properties of each result
|
|
74
|
+
|
|
75
|
+
# Parse the response JSON data
|
|
76
|
+
_data = r.json()
|
|
77
|
+
while _data is not None:
|
|
78
|
+
if "results" in _data:
|
|
79
|
+
yield list(extract_property_history(_data["results"]))
|
|
80
|
+
|
|
81
|
+
# Follow pagination links if they exist
|
|
82
|
+
_next = _data.get("paging", {}).get("next", None)
|
|
83
|
+
if _next:
|
|
84
|
+
next_url = _next["link"]
|
|
85
|
+
# Get the next page response
|
|
86
|
+
r = requests.get(next_url, headers=headers)
|
|
87
|
+
_data = r.json()
|
|
88
|
+
else:
|
|
89
|
+
_data = None
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def fetch_data(
|
|
93
|
+
endpoint: str, api_key: str, params: Optional[Dict[str, Any]] = None
|
|
94
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
95
|
+
"""
|
|
96
|
+
Fetch data from HUBSPOT endpoint using a specified API key and yield the properties of each result.
|
|
97
|
+
For paginated endpoint this function yields item from all pages.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
endpoint (str): The endpoint to fetch data from, as a string.
|
|
101
|
+
api_key (str): The API key to use for authentication, as a string.
|
|
102
|
+
params: Optional dict of query params to include in the request
|
|
103
|
+
|
|
104
|
+
Yields:
|
|
105
|
+
A List of CRM object dicts
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
requests.exceptions.HTTPError: If the API returns an HTTP error status code.
|
|
109
|
+
|
|
110
|
+
Notes:
|
|
111
|
+
This function uses the `requests` library to make a GET request to the specified endpoint, with
|
|
112
|
+
the API key included in the headers. If the API returns a non-successful HTTP status code (e.g.
|
|
113
|
+
404 Not Found), a `requests.exceptions.HTTPError` exception will be raised.
|
|
114
|
+
|
|
115
|
+
The `endpoint` argument should be a relative URL, which will be appended to the base URL for the
|
|
116
|
+
API. The `params` argument is used to pass additional query parameters to the request
|
|
117
|
+
|
|
118
|
+
This function also includes a retry decorator that will automatically retry the API call up to
|
|
119
|
+
3 times with a 5-second delay between retries, using an exponential backoff strategy.
|
|
120
|
+
"""
|
|
121
|
+
# Construct the URL and headers for the API request
|
|
122
|
+
url = get_url(endpoint)
|
|
123
|
+
headers = _get_headers(api_key)
|
|
124
|
+
|
|
125
|
+
# Make the API request
|
|
126
|
+
r = requests.get(url, headers=headers, params=params)
|
|
127
|
+
# Parse the API response and yield the properties of each result
|
|
128
|
+
# Parse the response JSON data
|
|
129
|
+
_data = r.json()
|
|
130
|
+
# Yield the properties of each result in the API response
|
|
131
|
+
while _data is not None:
|
|
132
|
+
if "results" in _data:
|
|
133
|
+
_objects: List[Dict[str, Any]] = []
|
|
134
|
+
for _result in _data["results"]:
|
|
135
|
+
_obj = _result.get("properties", _result)
|
|
136
|
+
if "id" not in _obj and "id" in _result:
|
|
137
|
+
# Move id from properties to top level
|
|
138
|
+
_obj["id"] = _result["id"]
|
|
139
|
+
if "associations" in _result:
|
|
140
|
+
for association in _result["associations"]:
|
|
141
|
+
__values = [
|
|
142
|
+
{
|
|
143
|
+
"value": _obj["hs_object_id"],
|
|
144
|
+
f"{association}_id": __r["id"],
|
|
145
|
+
}
|
|
146
|
+
for __r in _result["associations"][association]["results"]
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
# remove duplicates from list of dicts
|
|
150
|
+
__values = [
|
|
151
|
+
dict(t) for t in {tuple(d.items()) for d in __values}
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
_obj[association] = __values
|
|
155
|
+
_objects.append(_obj)
|
|
156
|
+
yield _objects
|
|
157
|
+
|
|
158
|
+
# Follow pagination links if they exist
|
|
159
|
+
_next = _data.get("paging", {}).get("next", None)
|
|
160
|
+
if _next:
|
|
161
|
+
next_url = _next["link"]
|
|
162
|
+
# Get the next page response
|
|
163
|
+
r = requests.get(next_url, headers=headers)
|
|
164
|
+
_data = r.json()
|
|
165
|
+
else:
|
|
166
|
+
_data = None
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _get_property_names(api_key: str, object_type: str) -> List[str]:
|
|
170
|
+
"""
|
|
171
|
+
Retrieve property names for a given entity from the HubSpot API.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
entity: The entity name for which to retrieve property names.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
A list of property names.
|
|
178
|
+
|
|
179
|
+
Raises:
|
|
180
|
+
Exception: If an error occurs during the API request.
|
|
181
|
+
"""
|
|
182
|
+
properties = []
|
|
183
|
+
endpoint = f"/crm/v3/properties/{OBJECT_TYPE_PLURAL[object_type]}"
|
|
184
|
+
|
|
185
|
+
for page in fetch_data(endpoint, api_key):
|
|
186
|
+
properties.extend([prop["name"] for prop in page])
|
|
187
|
+
|
|
188
|
+
return properties
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Hubspot source settings and constants"""
|
|
2
|
+
|
|
3
|
+
from dlt.common import pendulum
|
|
4
|
+
|
|
5
|
+
STARTDATE = pendulum.datetime(year=2000, month=1, day=1)
|
|
6
|
+
|
|
7
|
+
CRM_CONTACTS_ENDPOINT = (
|
|
8
|
+
"/crm/v3/objects/contacts?associations=deals,products,tickets,quotes"
|
|
9
|
+
)
|
|
10
|
+
CRM_COMPANIES_ENDPOINT = (
|
|
11
|
+
"/crm/v3/objects/companies?associations=contacts,deals,products,tickets,quotes"
|
|
12
|
+
)
|
|
13
|
+
CRM_DEALS_ENDPOINT = "/crm/v3/objects/deals"
|
|
14
|
+
CRM_PRODUCTS_ENDPOINT = "/crm/v3/objects/products"
|
|
15
|
+
CRM_TICKETS_ENDPOINT = "/crm/v3/objects/tickets"
|
|
16
|
+
CRM_QUOTES_ENDPOINT = "/crm/v3/objects/quotes"
|
|
17
|
+
|
|
18
|
+
CRM_OBJECT_ENDPOINTS = {
|
|
19
|
+
"contact": CRM_CONTACTS_ENDPOINT,
|
|
20
|
+
"company": CRM_COMPANIES_ENDPOINT,
|
|
21
|
+
"deal": CRM_DEALS_ENDPOINT,
|
|
22
|
+
"product": CRM_PRODUCTS_ENDPOINT,
|
|
23
|
+
"ticket": CRM_TICKETS_ENDPOINT,
|
|
24
|
+
"quote": CRM_QUOTES_ENDPOINT,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
WEB_ANALYTICS_EVENTS_ENDPOINT = "/events/v3/events?objectType={objectType}&objectId={objectId}&occurredAfter={occurredAfter}&occurredBefore={occurredBefore}&sort=-occurredAt"
|
|
28
|
+
|
|
29
|
+
OBJECT_TYPE_SINGULAR = {
|
|
30
|
+
"companies": "company",
|
|
31
|
+
"contacts": "contact",
|
|
32
|
+
"deals": "deal",
|
|
33
|
+
"tickets": "ticket",
|
|
34
|
+
"products": "product",
|
|
35
|
+
"quotes": "quote",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
OBJECT_TYPE_PLURAL = {v: k for k, v in OBJECT_TYPE_SINGULAR.items()}
|
|
39
|
+
|
|
40
|
+
DEFAULT_DEAL_PROPS = [
|
|
41
|
+
"amount",
|
|
42
|
+
"closedate",
|
|
43
|
+
"createdate",
|
|
44
|
+
"dealname",
|
|
45
|
+
"dealstage",
|
|
46
|
+
"hs_lastmodifieddate",
|
|
47
|
+
"hs_object_id",
|
|
48
|
+
"pipeline",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
DEFAULT_COMPANY_PROPS = [
|
|
52
|
+
"createdate",
|
|
53
|
+
"domain",
|
|
54
|
+
"hs_lastmodifieddate",
|
|
55
|
+
"hs_object_id",
|
|
56
|
+
"name",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
DEFAULT_CONTACT_PROPS = [
|
|
60
|
+
"createdate",
|
|
61
|
+
"email",
|
|
62
|
+
"firstname",
|
|
63
|
+
"hs_object_id",
|
|
64
|
+
"lastmodifieddate",
|
|
65
|
+
"lastname",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
DEFAULT_TICKET_PROPS = [
|
|
69
|
+
"createdate",
|
|
70
|
+
"content",
|
|
71
|
+
"hs_lastmodifieddate",
|
|
72
|
+
"hs_object_id",
|
|
73
|
+
"hs_pipeline",
|
|
74
|
+
"hs_pipeline_stage",
|
|
75
|
+
"hs_ticket_category",
|
|
76
|
+
"hs_ticket_priority",
|
|
77
|
+
"subject",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
DEFAULT_PRODUCT_PROPS = [
|
|
81
|
+
"createdate",
|
|
82
|
+
"description",
|
|
83
|
+
"hs_lastmodifieddate",
|
|
84
|
+
"hs_object_id",
|
|
85
|
+
"name",
|
|
86
|
+
"price",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
DEFAULT_QUOTE_PROPS = [
|
|
90
|
+
"hs_createdate",
|
|
91
|
+
"hs_expiration_date",
|
|
92
|
+
"hs_lastmodifieddate",
|
|
93
|
+
"hs_object_id",
|
|
94
|
+
"hs_public_url_key",
|
|
95
|
+
"hs_status",
|
|
96
|
+
"hs_title",
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
ALL = ("ALL",)
|
ingestr/src/sources.py
CHANGED
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import csv
|
|
3
3
|
import json
|
|
4
|
+
from datetime import date
|
|
4
5
|
from typing import Any, Callable, Optional
|
|
5
6
|
from urllib.parse import parse_qs, urlparse
|
|
6
7
|
|
|
7
8
|
import dlt
|
|
8
9
|
|
|
10
|
+
from ingestr.src.chess import source
|
|
9
11
|
from ingestr.src.google_sheets import google_spreadsheet
|
|
10
12
|
from ingestr.src.gorgias import gorgias_source
|
|
13
|
+
from ingestr.src.hubspot import hubspot
|
|
11
14
|
from ingestr.src.mongodb import mongodb_collection
|
|
12
15
|
from ingestr.src.notion import notion_databases
|
|
13
16
|
from ingestr.src.shopify import shopify_source
|
|
14
17
|
from ingestr.src.sql_database import sql_table
|
|
18
|
+
from ingestr.src.stripe_analytics import stripe_source
|
|
15
19
|
from ingestr.src.table_definition import table_string_to_dataclass
|
|
16
20
|
|
|
17
21
|
|
|
@@ -295,3 +299,135 @@ class GoogleSheetsSource:
|
|
|
295
299
|
range_names=[table_fields.dataset],
|
|
296
300
|
get_named_ranges=False,
|
|
297
301
|
)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class ChessSource:
|
|
305
|
+
def handles_incrementality(self) -> bool:
|
|
306
|
+
return True
|
|
307
|
+
|
|
308
|
+
# chess://?players=john,peter
|
|
309
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
310
|
+
if kwargs.get("incremental_key"):
|
|
311
|
+
raise ValueError(
|
|
312
|
+
"Chess takes care of incrementality on its own, you should not provide incremental_key"
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
source_fields = urlparse(uri)
|
|
316
|
+
source_params = parse_qs(source_fields.query)
|
|
317
|
+
list_players = None
|
|
318
|
+
if "players" in source_params:
|
|
319
|
+
list_players = source_params["players"][0].split(",")
|
|
320
|
+
else:
|
|
321
|
+
list_players = [
|
|
322
|
+
"MagnusCarlsen",
|
|
323
|
+
"HikaruNakamura",
|
|
324
|
+
"ArjunErigaisi",
|
|
325
|
+
"IanNepomniachtchi",
|
|
326
|
+
]
|
|
327
|
+
|
|
328
|
+
date_args = {}
|
|
329
|
+
start_date = kwargs.get("interval_start")
|
|
330
|
+
end_date = kwargs.get("interval_end")
|
|
331
|
+
if start_date and end_date:
|
|
332
|
+
if isinstance(start_date, date) and isinstance(end_date, date):
|
|
333
|
+
date_args["start_month"] = start_date.strftime("%Y/%m")
|
|
334
|
+
date_args["end_month"] = end_date.strftime("%Y/%m")
|
|
335
|
+
|
|
336
|
+
table_mapping = {
|
|
337
|
+
"profiles": "players_profiles",
|
|
338
|
+
"games": "players_games",
|
|
339
|
+
"archives": "players_archives",
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
if table not in table_mapping:
|
|
343
|
+
raise ValueError(
|
|
344
|
+
f"Resource '{table}' is not supported for Chess source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
return source(players=list_players, **date_args).with_resources(table_mapping[table])
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
class StripeAnalyticsSource:
|
|
351
|
+
def handles_incrementality(self) -> bool:
|
|
352
|
+
return True
|
|
353
|
+
|
|
354
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
355
|
+
if kwargs.get("incremental_key"):
|
|
356
|
+
raise ValueError(
|
|
357
|
+
"Stripe takes care of incrementality on its own, you should not provide incremental_key"
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
api_key = None
|
|
361
|
+
source_field = urlparse(uri)
|
|
362
|
+
source_params = parse_qs(source_field.query)
|
|
363
|
+
api_key = source_params.get("api_key")
|
|
364
|
+
|
|
365
|
+
if not api_key:
|
|
366
|
+
raise ValueError("api_key in the URI is required to connect to Stripe")
|
|
367
|
+
|
|
368
|
+
endpoint = None
|
|
369
|
+
table = str.capitalize(table)
|
|
370
|
+
|
|
371
|
+
if table in [
|
|
372
|
+
"Subscription",
|
|
373
|
+
"Account",
|
|
374
|
+
"Coupon",
|
|
375
|
+
"Customer",
|
|
376
|
+
"Product",
|
|
377
|
+
"Price",
|
|
378
|
+
"BalanceTransaction",
|
|
379
|
+
"Invoice",
|
|
380
|
+
"Event",
|
|
381
|
+
]:
|
|
382
|
+
endpoint = table
|
|
383
|
+
else:
|
|
384
|
+
raise ValueError(
|
|
385
|
+
f"Resource '{table}' is not supported for stripe source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
date_args = {}
|
|
389
|
+
if kwargs.get("interval_start"):
|
|
390
|
+
date_args["start_date"] = kwargs.get("interval_start")
|
|
391
|
+
|
|
392
|
+
if kwargs.get("interval_end"):
|
|
393
|
+
date_args["end_date"] = kwargs.get("interval_end")
|
|
394
|
+
|
|
395
|
+
return stripe_source(
|
|
396
|
+
endpoints=[
|
|
397
|
+
endpoint,
|
|
398
|
+
],
|
|
399
|
+
stripe_secret_key=api_key[0],
|
|
400
|
+
**date_args,
|
|
401
|
+
).with_resources(endpoint)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
class HubspotSource:
|
|
405
|
+
def handles_incrementality(self) -> bool:
|
|
406
|
+
return True
|
|
407
|
+
|
|
408
|
+
# hubspot://?api_key=<api_key>
|
|
409
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
410
|
+
if kwargs.get("incremental_key"):
|
|
411
|
+
raise ValueError(
|
|
412
|
+
"Hubspot takes care of incrementality on its own, you should not provide incremental_key"
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
api_key = None
|
|
416
|
+
source_parts = urlparse(uri)
|
|
417
|
+
source_parmas = parse_qs(source_parts.query)
|
|
418
|
+
api_key = source_parmas.get("api_key")
|
|
419
|
+
|
|
420
|
+
if not api_key:
|
|
421
|
+
raise ValueError("api_key in the URI is required to connect to Hubspot")
|
|
422
|
+
|
|
423
|
+
endpoint = None
|
|
424
|
+
if table in ["contacts", "companies", "deals", "tickets", "products", "quotes"]:
|
|
425
|
+
endpoint = table
|
|
426
|
+
else:
|
|
427
|
+
raise ValueError(
|
|
428
|
+
f"Resource '{table}' is not supported for Hubspot source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
return hubspot(
|
|
432
|
+
api_key=api_key[0],
|
|
433
|
+
).with_resources(endpoint)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""This source uses Stripe API and dlt to load data such as Customer, Subscription, Event etc. to the database and to calculate the MRR and churn rate."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Generator, Iterable, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
import stripe
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
from pendulum import DateTime
|
|
9
|
+
|
|
10
|
+
from .helpers import pagination, transform_date
|
|
11
|
+
from .settings import ENDPOINTS, INCREMENTAL_ENDPOINTS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dlt.source
|
|
15
|
+
def stripe_source(
|
|
16
|
+
endpoints: Tuple[str, ...] = ENDPOINTS,
|
|
17
|
+
stripe_secret_key: str = dlt.secrets.value,
|
|
18
|
+
start_date: Optional[DateTime] = None,
|
|
19
|
+
end_date: Optional[DateTime] = None,
|
|
20
|
+
) -> Iterable[DltResource]:
|
|
21
|
+
"""
|
|
22
|
+
Retrieves data from the Stripe API for the specified endpoints.
|
|
23
|
+
|
|
24
|
+
For all endpoints, Stripe API responses do not provide the key "updated",
|
|
25
|
+
so in most cases, we are forced to load the data in 'replace' mode.
|
|
26
|
+
This source is suitable for all types of endpoints, including 'Events', 'Invoice', etc.
|
|
27
|
+
but these endpoints can also be loaded in incremental mode (see source incremental_stripe_source).
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from. Defaults to most popular Stripe API endpoints.
|
|
31
|
+
stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
|
|
32
|
+
start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None.
|
|
33
|
+
end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Iterable[DltResource]: Resources with data that was created during the period greater than or equal to 'start_date' and less than 'end_date'.
|
|
37
|
+
"""
|
|
38
|
+
stripe.api_key = stripe_secret_key
|
|
39
|
+
stripe.api_version = "2022-11-15"
|
|
40
|
+
|
|
41
|
+
def stripe_resource(
|
|
42
|
+
endpoint: str,
|
|
43
|
+
) -> Generator[Dict[Any, Any], Any, None]:
|
|
44
|
+
yield from pagination(endpoint, start_date, end_date)
|
|
45
|
+
|
|
46
|
+
for endpoint in endpoints:
|
|
47
|
+
yield dlt.resource(
|
|
48
|
+
stripe_resource,
|
|
49
|
+
name=endpoint,
|
|
50
|
+
write_disposition="replace",
|
|
51
|
+
)(endpoint)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dlt.source
|
|
55
|
+
def incremental_stripe_source(
|
|
56
|
+
endpoints: Tuple[str, ...] = INCREMENTAL_ENDPOINTS,
|
|
57
|
+
stripe_secret_key: str = dlt.secrets.value,
|
|
58
|
+
initial_start_date: Optional[DateTime] = None,
|
|
59
|
+
end_date: Optional[DateTime] = None,
|
|
60
|
+
) -> Iterable[DltResource]:
|
|
61
|
+
"""
|
|
62
|
+
As Stripe API does not include the "updated" key in its responses,
|
|
63
|
+
we are only able to perform incremental downloads from endpoints where all objects are uneditable.
|
|
64
|
+
This source yields the resources with incremental loading based on "append" mode.
|
|
65
|
+
You will load only the newest data without duplicating and without downloading a huge amount of data each time.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
endpoints (tuple): A tuple of endpoint names to retrieve data from. Defaults to Stripe API endpoints with uneditable data.
|
|
69
|
+
stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
|
|
70
|
+
initial_start_date (Optional[DateTime]): An optional parameter that specifies the initial value for dlt.sources.incremental.
|
|
71
|
+
If parameter is not None, then load only data that were created after initial_start_date on the first run.
|
|
72
|
+
Defaults to None. Format: datetime(YYYY, MM, DD).
|
|
73
|
+
end_date (Optional[DateTime]): An optional end date to limit the data retrieved.
|
|
74
|
+
Defaults to None. Format: datetime(YYYY, MM, DD).
|
|
75
|
+
Returns:
|
|
76
|
+
Iterable[DltResource]: Resources with only that data has not yet been loaded.
|
|
77
|
+
"""
|
|
78
|
+
stripe.api_key = stripe_secret_key
|
|
79
|
+
stripe.api_version = "2022-11-15"
|
|
80
|
+
start_date_unix = (
|
|
81
|
+
transform_date(initial_start_date) if initial_start_date is not None else -1
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def incremental_resource(
|
|
85
|
+
endpoint: str,
|
|
86
|
+
created: Optional[Any] = dlt.sources.incremental(
|
|
87
|
+
"created", initial_value=start_date_unix
|
|
88
|
+
),
|
|
89
|
+
) -> Generator[Dict[Any, Any], Any, None]:
|
|
90
|
+
start_value = created.last_value
|
|
91
|
+
yield from pagination(endpoint, start_date=start_value, end_date=end_date)
|
|
92
|
+
|
|
93
|
+
for endpoint in endpoints:
|
|
94
|
+
yield dlt.resource(
|
|
95
|
+
incremental_resource,
|
|
96
|
+
name=endpoint,
|
|
97
|
+
write_disposition="append",
|
|
98
|
+
primary_key="id",
|
|
99
|
+
)(endpoint)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Stripe analytics source helpers"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Iterable, Optional, Union
|
|
4
|
+
|
|
5
|
+
import stripe
|
|
6
|
+
from dlt.common import pendulum
|
|
7
|
+
from dlt.common.typing import TDataItem
|
|
8
|
+
from pendulum import DateTime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def pagination(
|
|
12
|
+
endpoint: str, start_date: Optional[Any] = None, end_date: Optional[Any] = None
|
|
13
|
+
) -> Iterable[TDataItem]:
|
|
14
|
+
"""
|
|
15
|
+
Retrieves data from an endpoint with pagination.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
endpoint (str): The endpoint to retrieve data from.
|
|
19
|
+
start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to None.
|
|
20
|
+
end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to None.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Iterable[TDataItem]: Data items retrieved from the endpoint.
|
|
24
|
+
"""
|
|
25
|
+
starting_after = None
|
|
26
|
+
while True:
|
|
27
|
+
response = stripe_get_data(
|
|
28
|
+
endpoint,
|
|
29
|
+
start_date=start_date,
|
|
30
|
+
end_date=end_date,
|
|
31
|
+
starting_after=starting_after,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if len(response["data"]) > 0:
|
|
35
|
+
starting_after = response["data"][-1]["id"]
|
|
36
|
+
yield response["data"]
|
|
37
|
+
|
|
38
|
+
if not response["has_more"]:
|
|
39
|
+
break
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def transform_date(date: Union[str, DateTime, int]) -> int:
|
|
43
|
+
if isinstance(date, str):
|
|
44
|
+
date = pendulum.from_format(date, "%Y-%m-%dT%H:%M:%SZ")
|
|
45
|
+
if isinstance(date, DateTime):
|
|
46
|
+
# convert to unix timestamp
|
|
47
|
+
date = int(date.timestamp())
|
|
48
|
+
return date
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def stripe_get_data(
|
|
52
|
+
resource: str,
|
|
53
|
+
start_date: Optional[Any] = None,
|
|
54
|
+
end_date: Optional[Any] = None,
|
|
55
|
+
**kwargs: Any,
|
|
56
|
+
) -> Dict[Any, Any]:
|
|
57
|
+
if start_date:
|
|
58
|
+
start_date = transform_date(start_date)
|
|
59
|
+
if end_date:
|
|
60
|
+
end_date = transform_date(end_date)
|
|
61
|
+
|
|
62
|
+
if resource == "Subscription":
|
|
63
|
+
kwargs.update({"status": "all"})
|
|
64
|
+
|
|
65
|
+
resource_dict = getattr(stripe, resource).list(
|
|
66
|
+
created={"gte": start_date, "lt": end_date}, limit=100, **kwargs
|
|
67
|
+
)
|
|
68
|
+
return dict(resource_dict)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Stripe analytics source settings and constants"""
|
|
2
|
+
|
|
3
|
+
# the most popular endpoints
|
|
4
|
+
# Full list of the Stripe API endpoints you can find here: https://stripe.com/docs/api.
|
|
5
|
+
ENDPOINTS = (
|
|
6
|
+
"Subscription",
|
|
7
|
+
"Account",
|
|
8
|
+
"Coupon",
|
|
9
|
+
"Customer",
|
|
10
|
+
"Product",
|
|
11
|
+
"Price",
|
|
12
|
+
)
|
|
13
|
+
# possible incremental endpoints
|
|
14
|
+
INCREMENTAL_ENDPOINTS = ("Event", "Invoice", "BalanceTransaction")
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.7.
|
|
1
|
+
__version__ = "0.7.6"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.6
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -38,6 +38,7 @@ Requires-Dist: sqlalchemy-hana==2.0.0
|
|
|
38
38
|
Requires-Dist: sqlalchemy-redshift==0.8.14
|
|
39
39
|
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
40
40
|
Requires-Dist: sqlalchemy==1.4.52
|
|
41
|
+
Requires-Dist: stripe==10.7.0
|
|
41
42
|
Requires-Dist: tqdm==4.66.2
|
|
42
43
|
Requires-Dist: typer==0.12.3
|
|
43
44
|
Description-Content-Type: text/markdown
|
|
@@ -172,25 +173,40 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
172
173
|
<tr>
|
|
173
174
|
<td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
|
|
174
175
|
</tr>
|
|
176
|
+
<tr>
|
|
177
|
+
<td>Chess.com</td>
|
|
178
|
+
<td>✅</td>
|
|
179
|
+
<td>-</td>
|
|
180
|
+
</tr>
|
|
175
181
|
<tr>
|
|
176
182
|
<td>Gorgias</td>
|
|
177
183
|
<td>✅</td>
|
|
178
|
-
<td
|
|
184
|
+
<td>-</td>
|
|
179
185
|
</tr>
|
|
180
186
|
<tr>
|
|
181
187
|
<td>Google Sheets</td>
|
|
182
188
|
<td>✅</td>
|
|
183
|
-
<td
|
|
189
|
+
<td>-</td>
|
|
190
|
+
</tr>
|
|
191
|
+
<tr>
|
|
192
|
+
<td>HubSpot</td>
|
|
193
|
+
<td>✅</td>
|
|
194
|
+
<td>-</td>
|
|
184
195
|
</tr>
|
|
185
196
|
<tr>
|
|
186
197
|
<td>Notion</td>
|
|
187
198
|
<td>✅</td>
|
|
188
|
-
<td
|
|
199
|
+
<td>-</td>
|
|
189
200
|
</tr>
|
|
190
201
|
<tr>
|
|
191
202
|
<td>Shopify</td>
|
|
192
203
|
<td>✅</td>
|
|
193
|
-
<td
|
|
204
|
+
<td>-</td>
|
|
205
|
+
</tr>
|
|
206
|
+
<tr>
|
|
207
|
+
<td>Stripe</td>
|
|
208
|
+
<td>✅</td>
|
|
209
|
+
<td>-</td>
|
|
194
210
|
</tr>
|
|
195
211
|
</table>
|
|
196
212
|
|
|
@@ -1,9 +1,12 @@
|
|
|
1
|
-
ingestr/main.py,sha256=
|
|
1
|
+
ingestr/main.py,sha256=0J_bMCWLZT0tdRW0df8iKw05In55LJ_vUoN3X2TgXlc,17183
|
|
2
2
|
ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
|
|
3
|
-
ingestr/src/factory.py,sha256=
|
|
4
|
-
ingestr/src/sources.py,sha256=
|
|
3
|
+
ingestr/src/factory.py,sha256=Fp_MaeiAhU7IHT6RMLTEhvXgmDyu6j1IHGnjC4qsPLI,3939
|
|
4
|
+
ingestr/src/sources.py,sha256=7dy_KvoGI9vZSJwpkW3iNh8M1tjh1pBSz8qLpo0GTIo,14589
|
|
5
5
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
6
|
-
ingestr/src/version.py,sha256=
|
|
6
|
+
ingestr/src/version.py,sha256=wu65dmVM9fKR1rBHH263ls8Ca2FZzb0ejYcrP_Ld0iY,22
|
|
7
|
+
ingestr/src/chess/__init__.py,sha256=PaxT2DObudOGlhyoENE5LjR6rTdsxiqKKpAZeyzVLCA,6791
|
|
8
|
+
ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
|
|
9
|
+
ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
|
|
7
10
|
ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
|
|
8
11
|
ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
|
|
9
12
|
ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
|
|
@@ -11,6 +14,9 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
|
|
|
11
14
|
ingestr/src/google_sheets/helpers/data_processing.py,sha256=WYO6z4XjGcG0Hat2J2enb-eLX5mSNVb2vaqRE83FBWU,11000
|
|
12
15
|
ingestr/src/gorgias/__init__.py,sha256=BzX9X1Yc_1Mch6NP1pn26hjRIiaadErgHxkdJHw4P3o,21227
|
|
13
16
|
ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
|
|
17
|
+
ingestr/src/hubspot/__init__.py,sha256=eSD_lEIEd16YijAtUATFG8FGO8YGPm-MtAk94KKsx6o,9740
|
|
18
|
+
ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KFE,6727
|
|
19
|
+
ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
|
|
14
20
|
ingestr/src/mongodb/__init__.py,sha256=E7SDeCyYNkYZZ_RFhjCRDZUGpKtaxpPG5sFSmKJV62U,4336
|
|
15
21
|
ingestr/src/mongodb/helpers.py,sha256=80vtAeNyUn1iMN0CeLrTlKqYN6I6fHF81Kd2UuE8Kns,5653
|
|
16
22
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
@@ -27,6 +33,9 @@ ingestr/src/sql_database/arrow_helpers.py,sha256=yze1X3A9nUQA4HeuFDDWrfJVkCq8Uo5
|
|
|
27
33
|
ingestr/src/sql_database/helpers.py,sha256=6o8e2_8MIuj3qlo40a2E6ns3gyK18ei1jCePONrMUjI,10191
|
|
28
34
|
ingestr/src/sql_database/override.py,sha256=xbKGDztCzvrhJ5kJTXERal3LA56bEeVug4_rrTs8DgA,333
|
|
29
35
|
ingestr/src/sql_database/schema_types.py,sha256=qXTanvFPE8wMCSDzQWPDi5yqaO-llfrFXjiGJALI4NA,5013
|
|
36
|
+
ingestr/src/stripe_analytics/__init__.py,sha256=8yy6i4DAhUqY4ZForetQ0DWc_YQrY0FBH6yk0Z3m-Mw,4493
|
|
37
|
+
ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
|
|
38
|
+
ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
|
|
30
39
|
ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
|
|
31
40
|
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
32
41
|
ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
|
|
@@ -37,8 +46,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
37
46
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
38
47
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
39
48
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
40
|
-
ingestr-0.7.
|
|
41
|
-
ingestr-0.7.
|
|
42
|
-
ingestr-0.7.
|
|
43
|
-
ingestr-0.7.
|
|
44
|
-
ingestr-0.7.
|
|
49
|
+
ingestr-0.7.6.dist-info/METADATA,sha256=5QLxKZE65TQH_z6dH0g52FZmseLc2hIJBipDwcuCg5g,6104
|
|
50
|
+
ingestr-0.7.6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
51
|
+
ingestr-0.7.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
52
|
+
ingestr-0.7.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
53
|
+
ingestr-0.7.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|