ingestr 0.13.16__py3-none-any.whl → 0.13.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/factory.py +2 -0
- ingestr/src/hubspot/__init__.py +0 -8
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +65 -0
- ingestr/src/sources.py +37 -0
- {ingestr-0.13.16.dist-info → ingestr-0.13.18.dist-info}/METADATA +137 -6
- {ingestr-0.13.16.dist-info → ingestr-0.13.18.dist-info}/RECORD +11 -9
- {ingestr-0.13.16.dist-info → ingestr-0.13.18.dist-info}/WHEEL +0 -0
- {ingestr-0.13.16.dist-info → ingestr-0.13.18.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.16.dist-info → ingestr-0.13.18.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.18"
|
ingestr/src/factory.py
CHANGED
|
@@ -36,6 +36,7 @@ from ingestr.src.sources import (
|
|
|
36
36
|
GorgiasSource,
|
|
37
37
|
HubspotSource,
|
|
38
38
|
KafkaSource,
|
|
39
|
+
KinesisSource,
|
|
39
40
|
KlaviyoSource,
|
|
40
41
|
LinkedInAdsSource,
|
|
41
42
|
LocalCsvSource,
|
|
@@ -141,6 +142,7 @@ class SourceDestinationFactory:
|
|
|
141
142
|
"applovinmax": ApplovinMaxSource,
|
|
142
143
|
"salesforce": SalesforceSource,
|
|
143
144
|
"personio": PersonioSource,
|
|
145
|
+
"kinesis": KinesisSource,
|
|
144
146
|
}
|
|
145
147
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
146
148
|
"bigquery": BigQueryDestination,
|
ingestr/src/hubspot/__init__.py
CHANGED
|
@@ -199,14 +199,6 @@ def crm_objects(
|
|
|
199
199
|
|
|
200
200
|
props = ",".join(sorted(list(set(props))))
|
|
201
201
|
|
|
202
|
-
if len(props) > 2000:
|
|
203
|
-
raise ValueError(
|
|
204
|
-
"Your request to Hubspot is too long to process. "
|
|
205
|
-
"Maximum allowed query length is 2000 symbols, while "
|
|
206
|
-
f"your list of properties `{props[:200]}`... is {len(props)} "
|
|
207
|
-
"symbols long. Use the `props` argument of the resource to "
|
|
208
|
-
"set the list of properties to extract from the endpoint."
|
|
209
|
-
)
|
|
210
202
|
|
|
211
203
|
params = {"properties": props, "limit": 100}
|
|
212
204
|
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Reads messages from Kinesis queue."""
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, List, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.common import json, pendulum
|
|
7
|
+
from dlt.common.configuration.specs import AwsCredentials
|
|
8
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
9
|
+
from dlt.common.typing import StrStr, TAnyDateTime, TDataItem
|
|
10
|
+
from dlt.common.utils import digest128
|
|
11
|
+
|
|
12
|
+
from .helpers import get_shard_iterator, max_sequence_by_shard
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dlt.resource(
|
|
16
|
+
name=lambda args: args["stream_name"],
|
|
17
|
+
primary_key="kinesis_msg_id",
|
|
18
|
+
standalone=True,
|
|
19
|
+
)
|
|
20
|
+
def kinesis_stream(
|
|
21
|
+
stream_name: str,
|
|
22
|
+
initial_at_timestamp: TAnyDateTime,
|
|
23
|
+
credentials: AwsCredentials,
|
|
24
|
+
last_msg: Optional[dlt.sources.incremental[StrStr]] = dlt.sources.incremental(
|
|
25
|
+
"kinesis", last_value_func=max_sequence_by_shard
|
|
26
|
+
),
|
|
27
|
+
max_number_of_messages: int = None, # type: ignore
|
|
28
|
+
milliseconds_behind_latest: int = 1000,
|
|
29
|
+
parse_json: bool = True,
|
|
30
|
+
chunk_size: int = 1000,
|
|
31
|
+
) -> Iterable[TDataItem]:
|
|
32
|
+
"""Reads a kinesis stream and yields messages. Supports incremental loading. Parses messages as json by default.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
stream_name (str): The name of the stream to read from. If not provided, the
|
|
36
|
+
value must be present in config/secrets
|
|
37
|
+
credentials (AwsCredentials): The credentials to use to connect to kinesis. If not provided,
|
|
38
|
+
the value from secrets or credentials present on the device will be used.
|
|
39
|
+
last_msg (Optional[dlt.sources.incremental]): An incremental over a mapping from shard_id to message sequence
|
|
40
|
+
that will be used to create shard iterators of type AFTER_SEQUENCE_NUMBER when loading incrementally.
|
|
41
|
+
initial_at_timestamp (TAnyDateTime): An initial timestamp used to generate AT_TIMESTAMP or LATEST iterator when timestamp value is 0
|
|
42
|
+
max_number_of_messages (int): Maximum number of messages to read in one run. Actual read may exceed that number by up to chunk_size. Defaults to None (no limit).
|
|
43
|
+
milliseconds_behind_latest (int): The number of milliseconds behind the top of the shard to stop reading messages, defaults to 1000.
|
|
44
|
+
parse_json (bool): If True, assumes that messages are json strings, parses them and returns instead of `data` (otherwise). Defaults to False.
|
|
45
|
+
chunk_size (int): The number of records to fetch at once. Defaults to 1000.
|
|
46
|
+
Yields:
|
|
47
|
+
Iterable[TDataItem]: Messages. Contain Kinesis envelope in `kinesis` and bytes data in `data` (if `parse_json` disabled)
|
|
48
|
+
|
|
49
|
+
"""
|
|
50
|
+
session = credentials._to_botocore_session()
|
|
51
|
+
# the default timeouts are (60, 60) which is fine
|
|
52
|
+
kinesis_client = session.create_client("kinesis")
|
|
53
|
+
# normalize at_timestamp to pendulum
|
|
54
|
+
initial_at_datetime = (
|
|
55
|
+
None
|
|
56
|
+
if initial_at_timestamp is None
|
|
57
|
+
else ensure_pendulum_datetime(initial_at_timestamp)
|
|
58
|
+
)
|
|
59
|
+
# set it in state
|
|
60
|
+
resource_state = dlt.current.resource_state()
|
|
61
|
+
initial_at_datetime = resource_state.get(
|
|
62
|
+
"initial_at_timestamp", initial_at_datetime
|
|
63
|
+
)
|
|
64
|
+
# so next time we request shards at AT_TIMESTAMP that is now
|
|
65
|
+
resource_state["initial_at_timestamp"] = pendulum.now("UTC").subtract(seconds=1)
|
|
66
|
+
|
|
67
|
+
shards_list = kinesis_client.list_shards(StreamName=stream_name)
|
|
68
|
+
shards: List[StrStr] = shards_list["Shards"]
|
|
69
|
+
while next_token := shards_list.get("NextToken"):
|
|
70
|
+
shards_list = kinesis_client.list_shards(NextToken=next_token)
|
|
71
|
+
shards.extend(shards_list)
|
|
72
|
+
|
|
73
|
+
shard_ids = [shard["ShardId"] for shard in shards]
|
|
74
|
+
|
|
75
|
+
# get next shard to fetch messages from
|
|
76
|
+
while shard_id := shard_ids.pop(0) if shard_ids else None:
|
|
77
|
+
|
|
78
|
+
shard_iterator, _ = get_shard_iterator(
|
|
79
|
+
kinesis_client,
|
|
80
|
+
stream_name,
|
|
81
|
+
shard_id,
|
|
82
|
+
last_msg, # type: ignore
|
|
83
|
+
initial_at_datetime, # type: ignore
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
while shard_iterator:
|
|
87
|
+
records = []
|
|
88
|
+
records_response = kinesis_client.get_records(
|
|
89
|
+
ShardIterator=shard_iterator,
|
|
90
|
+
Limit=chunk_size, # The size of data can be up to 1 MB, it must be controlled by the user
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
for record in records_response["Records"]:
|
|
94
|
+
sequence_number = record["SequenceNumber"]
|
|
95
|
+
content = record["Data"]
|
|
96
|
+
|
|
97
|
+
arrival_time = record["ApproximateArrivalTimestamp"]
|
|
98
|
+
arrival_timestamp = arrival_time.astimezone(pendulum.UTC)
|
|
99
|
+
|
|
100
|
+
message = {
|
|
101
|
+
"kinesis": {
|
|
102
|
+
"shard_id": shard_id,
|
|
103
|
+
"seq_no": sequence_number,
|
|
104
|
+
"ts": ensure_pendulum_datetime(arrival_timestamp),
|
|
105
|
+
"partition": record["PartitionKey"],
|
|
106
|
+
"stream_name": stream_name,
|
|
107
|
+
},
|
|
108
|
+
"kinesis_msg_id": digest128(shard_id + sequence_number),
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if parse_json:
|
|
112
|
+
message.update(json.loadb(content))
|
|
113
|
+
else:
|
|
114
|
+
message["data"] = content
|
|
115
|
+
records.append(message)
|
|
116
|
+
yield records
|
|
117
|
+
|
|
118
|
+
# do not load more max_number_of_messages
|
|
119
|
+
if max_number_of_messages is not None:
|
|
120
|
+
max_number_of_messages -= len(records)
|
|
121
|
+
if max_number_of_messages <= 0:
|
|
122
|
+
return
|
|
123
|
+
|
|
124
|
+
# add child shards so we can request messages from them
|
|
125
|
+
child_shards = records_response.get("ChildShards", None)
|
|
126
|
+
if child_shards:
|
|
127
|
+
for child_shard in child_shards:
|
|
128
|
+
child_shard_id = child_shard["ShardId"]
|
|
129
|
+
if child_shard_id not in shards:
|
|
130
|
+
shard_ids.append(child_shard_id)
|
|
131
|
+
|
|
132
|
+
# gets 0 when no messages so we cutoff empty shards
|
|
133
|
+
records_ms_behind_latest = records_response.get("MillisBehindLatest", 0)
|
|
134
|
+
if records_ms_behind_latest < milliseconds_behind_latest:
|
|
135
|
+
# stop taking messages from shard
|
|
136
|
+
shard_iterator = None # type: ignore
|
|
137
|
+
else:
|
|
138
|
+
# continue taking messages
|
|
139
|
+
shard_iterator = records_response["NextShardIterator"]
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Any, Sequence, Tuple
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
from dlt.common import pendulum
|
|
5
|
+
from dlt.common.typing import DictStrAny, StrAny, StrStr
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_shard_iterator(
|
|
9
|
+
kinesis_client: Any,
|
|
10
|
+
stream_name: str,
|
|
11
|
+
shard_id: str,
|
|
12
|
+
last_msg: dlt.sources.incremental[StrStr],
|
|
13
|
+
initial_at_timestamp: pendulum.DateTime | None,
|
|
14
|
+
) -> Tuple[str, StrAny]:
|
|
15
|
+
"""Gets shard `shard_id` of `stream_name` iterator. If `last_msg` incremental is present it may
|
|
16
|
+
contain last message sequence for shard_id. in that case AFTER_SEQUENCE_NUMBER is created.
|
|
17
|
+
If no message sequence is present, `initial_at_timestamp` is used for AT_TIMESTAMP or LATEST.
|
|
18
|
+
The final fallback is TRIM_HORIZON
|
|
19
|
+
"""
|
|
20
|
+
sequence_state = (
|
|
21
|
+
{} if last_msg is None else last_msg.last_value or last_msg.initial_value or {}
|
|
22
|
+
)
|
|
23
|
+
iterator_params: DictStrAny
|
|
24
|
+
msg_sequence = sequence_state.get(shard_id, None)
|
|
25
|
+
if msg_sequence:
|
|
26
|
+
iterator_params = dict(
|
|
27
|
+
ShardIteratorType="AFTER_SEQUENCE_NUMBER",
|
|
28
|
+
StartingSequenceNumber=msg_sequence,
|
|
29
|
+
)
|
|
30
|
+
elif initial_at_timestamp is None:
|
|
31
|
+
# Fetch all records from the beginning
|
|
32
|
+
iterator_params = dict(ShardIteratorType="TRIM_HORIZON")
|
|
33
|
+
|
|
34
|
+
elif initial_at_timestamp.timestamp() == 0.0:
|
|
35
|
+
# will sets to latest i.e only the messages at the tip of the stream are read
|
|
36
|
+
iterator_params = dict(ShardIteratorType="LATEST")
|
|
37
|
+
else:
|
|
38
|
+
iterator_params = dict(
|
|
39
|
+
ShardIteratorType="AT_TIMESTAMP", Timestamp=initial_at_timestamp.timestamp()
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
shard_iterator: StrStr = kinesis_client.get_shard_iterator(
|
|
43
|
+
StreamName=stream_name, ShardId=shard_id, **iterator_params
|
|
44
|
+
)
|
|
45
|
+
return shard_iterator["ShardIterator"], iterator_params
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def max_sequence_by_shard(values: Sequence[StrStr]) -> StrStr:
|
|
49
|
+
"""A last_value_function that operates on mapping of shard_id:msg_sequence defining the max"""
|
|
50
|
+
last_value = None
|
|
51
|
+
# if tuple/list contains only one element then return it
|
|
52
|
+
if len(values) == 1:
|
|
53
|
+
item = values[0]
|
|
54
|
+
else:
|
|
55
|
+
# item is kinesis metadata, last_value is previous state of the shards
|
|
56
|
+
item, last_value = values
|
|
57
|
+
|
|
58
|
+
if last_value is None:
|
|
59
|
+
last_value = {}
|
|
60
|
+
else:
|
|
61
|
+
last_value = dict(last_value) # always make a copy
|
|
62
|
+
shard_id = item["shard_id"]
|
|
63
|
+
# we compare message sequence at shard_id
|
|
64
|
+
last_value[shard_id] = max(item["seq_no"], last_value.get(shard_id, ""))
|
|
65
|
+
return last_value
|
ingestr/src/sources.py
CHANGED
|
@@ -75,6 +75,7 @@ from ingestr.src.gorgias import gorgias_source
|
|
|
75
75
|
from ingestr.src.hubspot import hubspot
|
|
76
76
|
from ingestr.src.kafka import kafka_consumer
|
|
77
77
|
from ingestr.src.kafka.helpers import KafkaCredentials
|
|
78
|
+
from ingestr.src.kinesis import kinesis_stream
|
|
78
79
|
from ingestr.src.klaviyo._init_ import klaviyo_source
|
|
79
80
|
from ingestr.src.linkedin_ads import linked_in_ads_source
|
|
80
81
|
from ingestr.src.linkedin_ads.dimension_time_enum import (
|
|
@@ -1969,3 +1970,39 @@ class PersonioSource:
|
|
|
1969
1970
|
start_date=interval_start_date,
|
|
1970
1971
|
end_date=interval_end_date,
|
|
1971
1972
|
).with_resources(table)
|
|
1973
|
+
|
|
1974
|
+
|
|
1975
|
+
class KinesisSource:
|
|
1976
|
+
def handles_incrementality(self) -> bool:
|
|
1977
|
+
return True
|
|
1978
|
+
|
|
1979
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1980
|
+
# kinesis://?aws_access_key_id=<AccessKeyId>&aws_secret_access_key=<SecretAccessKey>®ion_name=<Region>
|
|
1981
|
+
# source table = stream name
|
|
1982
|
+
parsed_uri = urlparse(uri)
|
|
1983
|
+
params = parse_qs(parsed_uri.query)
|
|
1984
|
+
|
|
1985
|
+
aws_access_key_id = params.get("aws_access_key_id")
|
|
1986
|
+
if aws_access_key_id is None:
|
|
1987
|
+
raise MissingValueError("aws_access_key_id", "Kinesis")
|
|
1988
|
+
|
|
1989
|
+
aws_secret_access_key = params.get("aws_secret_access_key")
|
|
1990
|
+
if aws_secret_access_key is None:
|
|
1991
|
+
raise MissingValueError("aws_secret_access_key", "Kinesis")
|
|
1992
|
+
|
|
1993
|
+
region_name = params.get("region_name")
|
|
1994
|
+
if region_name is None:
|
|
1995
|
+
raise MissingValueError("region_name", "Kinesis")
|
|
1996
|
+
|
|
1997
|
+
start_date = kwargs.get("interval_start")
|
|
1998
|
+
if start_date is not None:
|
|
1999
|
+
# the resource will read all messages after this timestamp.
|
|
2000
|
+
start_date = ensure_pendulum_datetime(start_date)
|
|
2001
|
+
credentials = AwsCredentials(
|
|
2002
|
+
aws_access_key_id=aws_access_key_id[0],
|
|
2003
|
+
aws_secret_access_key=aws_secret_access_key[0],
|
|
2004
|
+
region_name=region_name[0],
|
|
2005
|
+
)
|
|
2006
|
+
return kinesis_stream(
|
|
2007
|
+
stream_name=table, credentials=credentials, initial_at_timestamp=start_date
|
|
2008
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.18
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -14,52 +14,178 @@ Classifier: Operating System :: OS Independent
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Topic :: Database
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
|
+
Requires-Dist: aiobotocore==2.21.1
|
|
18
|
+
Requires-Dist: aiohappyeyeballs==2.4.8
|
|
19
|
+
Requires-Dist: aiohttp==3.11.13
|
|
20
|
+
Requires-Dist: aioitertools==0.12.0
|
|
21
|
+
Requires-Dist: aiosignal==1.3.2
|
|
22
|
+
Requires-Dist: alembic==1.15.1
|
|
23
|
+
Requires-Dist: annotated-types==0.7.0
|
|
17
24
|
Requires-Dist: asana==3.2.3
|
|
25
|
+
Requires-Dist: asn1crypto==1.5.1
|
|
26
|
+
Requires-Dist: asynch==0.2.4
|
|
27
|
+
Requires-Dist: attrs==25.1.0
|
|
28
|
+
Requires-Dist: backoff==2.2.1
|
|
29
|
+
Requires-Dist: beautifulsoup4==4.13.3
|
|
30
|
+
Requires-Dist: boto3==1.37.1
|
|
31
|
+
Requires-Dist: botocore==1.37.1
|
|
32
|
+
Requires-Dist: cachetools==5.5.2
|
|
33
|
+
Requires-Dist: certifi==2025.1.31
|
|
34
|
+
Requires-Dist: cffi==1.17.1
|
|
35
|
+
Requires-Dist: charset-normalizer==3.4.1
|
|
36
|
+
Requires-Dist: ciso8601==2.3.2
|
|
37
|
+
Requires-Dist: click==8.1.8
|
|
38
|
+
Requires-Dist: clickhouse-connect==0.8.14
|
|
39
|
+
Requires-Dist: clickhouse-driver==0.2.9
|
|
40
|
+
Requires-Dist: clickhouse-sqlalchemy==0.2.7
|
|
18
41
|
Requires-Dist: confluent-kafka==2.8.0
|
|
42
|
+
Requires-Dist: cryptography==44.0.2
|
|
43
|
+
Requires-Dist: curlify==2.2.1
|
|
19
44
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
20
45
|
Requires-Dist: databricks-sqlalchemy==1.0.2
|
|
21
46
|
Requires-Dist: dataclasses-json==0.6.7
|
|
47
|
+
Requires-Dist: decorator==5.2.1
|
|
48
|
+
Requires-Dist: deprecation==2.1.0
|
|
22
49
|
Requires-Dist: dlt==1.6.1
|
|
50
|
+
Requires-Dist: dnspython==2.7.0
|
|
23
51
|
Requires-Dist: duckdb-engine==0.15.0
|
|
24
52
|
Requires-Dist: duckdb==1.2.0
|
|
53
|
+
Requires-Dist: et-xmlfile==2.0.0
|
|
25
54
|
Requires-Dist: facebook-business==20.0.0
|
|
55
|
+
Requires-Dist: filelock==3.17.0
|
|
26
56
|
Requires-Dist: flatten-json==0.1.14
|
|
57
|
+
Requires-Dist: frozenlist==1.5.0
|
|
58
|
+
Requires-Dist: fsspec==2024.10.0
|
|
27
59
|
Requires-Dist: gcsfs==2024.10.0
|
|
60
|
+
Requires-Dist: gitdb==4.0.12
|
|
61
|
+
Requires-Dist: gitpython==3.1.44
|
|
62
|
+
Requires-Dist: giturlparse==0.12.0
|
|
28
63
|
Requires-Dist: google-ads==25.1.0
|
|
29
64
|
Requires-Dist: google-analytics-data==0.18.17
|
|
65
|
+
Requires-Dist: google-api-core==2.24.1
|
|
30
66
|
Requires-Dist: google-api-python-client==2.130.0
|
|
67
|
+
Requires-Dist: google-auth-httplib2==0.2.0
|
|
68
|
+
Requires-Dist: google-auth-oauthlib==1.2.1
|
|
69
|
+
Requires-Dist: google-auth==2.38.0
|
|
31
70
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
71
|
+
Requires-Dist: google-cloud-bigquery==3.30.0
|
|
72
|
+
Requires-Dist: google-cloud-core==2.4.2
|
|
73
|
+
Requires-Dist: google-cloud-storage==3.1.0
|
|
74
|
+
Requires-Dist: google-crc32c==1.6.0
|
|
75
|
+
Requires-Dist: google-resumable-media==2.7.2
|
|
76
|
+
Requires-Dist: googleapis-common-protos==1.69.0
|
|
77
|
+
Requires-Dist: greenlet==3.1.1
|
|
78
|
+
Requires-Dist: grpcio-status==1.62.3
|
|
79
|
+
Requires-Dist: grpcio==1.70.0
|
|
80
|
+
Requires-Dist: hdbcli==2.23.27
|
|
81
|
+
Requires-Dist: hexbytes==1.3.0
|
|
82
|
+
Requires-Dist: httplib2==0.22.0
|
|
83
|
+
Requires-Dist: humanize==4.12.1
|
|
84
|
+
Requires-Dist: idna==3.10
|
|
85
|
+
Requires-Dist: inflection==0.5.1
|
|
86
|
+
Requires-Dist: isodate==0.7.2
|
|
87
|
+
Requires-Dist: jmespath==1.0.1
|
|
88
|
+
Requires-Dist: jsonpath-ng==1.7.0
|
|
89
|
+
Requires-Dist: leb128==1.0.8
|
|
90
|
+
Requires-Dist: lxml==5.3.1
|
|
91
|
+
Requires-Dist: lz4==4.4.3
|
|
92
|
+
Requires-Dist: makefun==1.15.6
|
|
93
|
+
Requires-Dist: mako==1.3.9
|
|
94
|
+
Requires-Dist: markdown-it-py==3.0.0
|
|
95
|
+
Requires-Dist: markupsafe==3.0.2
|
|
96
|
+
Requires-Dist: marshmallow==3.26.1
|
|
97
|
+
Requires-Dist: mdurl==0.1.2
|
|
98
|
+
Requires-Dist: monotonic==1.6
|
|
99
|
+
Requires-Dist: more-itertools==10.6.0
|
|
100
|
+
Requires-Dist: multidict==6.1.0
|
|
101
|
+
Requires-Dist: mypy-extensions==1.0.0
|
|
32
102
|
Requires-Dist: mysql-connector-python==9.2.0
|
|
103
|
+
Requires-Dist: numpy==2.2.3
|
|
104
|
+
Requires-Dist: oauthlib==3.2.2
|
|
105
|
+
Requires-Dist: openpyxl==3.1.5
|
|
106
|
+
Requires-Dist: orjson==3.10.15
|
|
107
|
+
Requires-Dist: packaging==24.2
|
|
108
|
+
Requires-Dist: pandas==2.2.3
|
|
109
|
+
Requires-Dist: pathvalidate==3.2.3
|
|
33
110
|
Requires-Dist: pendulum==3.0.0
|
|
111
|
+
Requires-Dist: platformdirs==4.3.6
|
|
112
|
+
Requires-Dist: pluggy==1.5.0
|
|
113
|
+
Requires-Dist: ply==3.11
|
|
114
|
+
Requires-Dist: propcache==0.3.0
|
|
115
|
+
Requires-Dist: proto-plus==1.26.0
|
|
116
|
+
Requires-Dist: protobuf==4.25.6
|
|
34
117
|
Requires-Dist: psutil==6.1.1
|
|
35
118
|
Requires-Dist: psycopg2-binary==2.9.10
|
|
36
119
|
Requires-Dist: py-machineid==0.6.0
|
|
37
120
|
Requires-Dist: pyairtable==2.3.3
|
|
38
121
|
Requires-Dist: pyarrow==18.1.0
|
|
122
|
+
Requires-Dist: pyasn1-modules==0.4.1
|
|
123
|
+
Requires-Dist: pyasn1==0.6.1
|
|
39
124
|
Requires-Dist: pyathena==3.12.2
|
|
125
|
+
Requires-Dist: pycountry==24.6.1
|
|
126
|
+
Requires-Dist: pycparser==2.22
|
|
127
|
+
Requires-Dist: pydantic-core==2.27.2
|
|
128
|
+
Requires-Dist: pydantic==2.10.6
|
|
129
|
+
Requires-Dist: pygments==2.19.1
|
|
130
|
+
Requires-Dist: pyjwt==2.10.1
|
|
40
131
|
Requires-Dist: pymongo==4.11.1
|
|
41
132
|
Requires-Dist: pymysql==1.1.1
|
|
133
|
+
Requires-Dist: pyopenssl==25.0.0
|
|
134
|
+
Requires-Dist: pyparsing==3.2.1
|
|
42
135
|
Requires-Dist: pyrate-limiter==3.7.0
|
|
136
|
+
Requires-Dist: python-dateutil==2.9.0.post0
|
|
137
|
+
Requires-Dist: python-dotenv==1.0.1
|
|
138
|
+
Requires-Dist: pytz==2025.1
|
|
139
|
+
Requires-Dist: pyyaml==6.0.2
|
|
43
140
|
Requires-Dist: redshift-connector==2.1.5
|
|
141
|
+
Requires-Dist: requests-file==2.1.0
|
|
142
|
+
Requires-Dist: requests-oauthlib==1.3.1
|
|
143
|
+
Requires-Dist: requests-toolbelt==1.0.0
|
|
144
|
+
Requires-Dist: requests==2.32.3
|
|
145
|
+
Requires-Dist: requirements-parser==0.11.0
|
|
146
|
+
Requires-Dist: rich-argparse==1.7.0
|
|
44
147
|
Requires-Dist: rich==13.9.4
|
|
148
|
+
Requires-Dist: rsa==4.9
|
|
45
149
|
Requires-Dist: rudder-sdk-python==2.1.4
|
|
46
150
|
Requires-Dist: s3fs==2024.10.0
|
|
151
|
+
Requires-Dist: s3transfer==0.11.3
|
|
152
|
+
Requires-Dist: scramp==1.4.5
|
|
153
|
+
Requires-Dist: semver==3.0.4
|
|
154
|
+
Requires-Dist: setuptools==75.8.2
|
|
155
|
+
Requires-Dist: shellingham==1.5.4
|
|
47
156
|
Requires-Dist: simple-salesforce==1.12.6
|
|
157
|
+
Requires-Dist: simplejson==3.20.1
|
|
158
|
+
Requires-Dist: six==1.17.0
|
|
159
|
+
Requires-Dist: smmap==5.0.2
|
|
160
|
+
Requires-Dist: snowflake-connector-python==3.14.0
|
|
48
161
|
Requires-Dist: snowflake-sqlalchemy==1.6.1
|
|
162
|
+
Requires-Dist: sortedcontainers==2.4.0
|
|
163
|
+
Requires-Dist: soupsieve==2.6
|
|
49
164
|
Requires-Dist: sqlalchemy-bigquery==1.12.1
|
|
50
165
|
Requires-Dist: sqlalchemy-hana==2.0.0
|
|
51
166
|
Requires-Dist: sqlalchemy-redshift==0.8.14
|
|
52
167
|
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
53
168
|
Requires-Dist: sqlalchemy==1.4.52
|
|
54
169
|
Requires-Dist: stripe==10.7.0
|
|
170
|
+
Requires-Dist: tenacity==9.0.0
|
|
171
|
+
Requires-Dist: thrift==0.16.0
|
|
172
|
+
Requires-Dist: time-machine==2.16.0
|
|
173
|
+
Requires-Dist: tomlkit==0.13.2
|
|
55
174
|
Requires-Dist: tqdm==4.67.1
|
|
56
175
|
Requires-Dist: typer==0.13.1
|
|
57
176
|
Requires-Dist: types-requests==2.32.0.20240907
|
|
58
|
-
|
|
59
|
-
Requires-Dist:
|
|
60
|
-
Requires-Dist:
|
|
61
|
-
Requires-Dist:
|
|
62
|
-
Requires-Dist:
|
|
177
|
+
Requires-Dist: types-setuptools==75.8.2.20250305
|
|
178
|
+
Requires-Dist: typing-extensions==4.12.2
|
|
179
|
+
Requires-Dist: typing-inspect==0.9.0
|
|
180
|
+
Requires-Dist: tzdata==2025.1
|
|
181
|
+
Requires-Dist: tzlocal==5.3
|
|
182
|
+
Requires-Dist: uritemplate==4.1.1
|
|
183
|
+
Requires-Dist: urllib3==2.3.0
|
|
184
|
+
Requires-Dist: wrapt==1.17.2
|
|
185
|
+
Requires-Dist: yarl==1.18.3
|
|
186
|
+
Requires-Dist: zeep==4.3.1
|
|
187
|
+
Requires-Dist: zstandard==0.23.0
|
|
188
|
+
Requires-Dist: zstd==1.5.6.5
|
|
63
189
|
Provides-Extra: odbc
|
|
64
190
|
Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
|
|
65
191
|
Provides-Extra: oracle
|
|
@@ -231,6 +357,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
231
357
|
<td>✅</td>
|
|
232
358
|
<td>-</td>
|
|
233
359
|
</tr>
|
|
360
|
+
<tr>
|
|
361
|
+
<td>Amazon Kinesis</td>
|
|
362
|
+
<td>✅</td>
|
|
363
|
+
<td>-</td>
|
|
364
|
+
</tr>
|
|
234
365
|
<tr>
|
|
235
366
|
<td>Apache Kafka</td>
|
|
236
367
|
<td>✅</td>
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/blob.py,sha256=LtEZWoUhm5i2aKerdgEpLtNCf3fdhGGMM4td-LRZVbY,1407
|
|
4
|
-
ingestr/src/buildinfo.py,sha256=
|
|
4
|
+
ingestr/src/buildinfo.py,sha256=Ph2-6uM0ocE2IFQ-YyOxltsHqhrIhOawp3Pr6vUGaWE,21
|
|
5
5
|
ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
|
|
6
6
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
7
|
-
ingestr/src/factory.py,sha256=
|
|
7
|
+
ingestr/src/factory.py,sha256=Si3xQuaqiwR_LMtxg2rA93MkDYpq_BnVWTfBsNVnFIA,5198
|
|
8
8
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
9
9
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
10
|
-
ingestr/src/sources.py,sha256=
|
|
10
|
+
ingestr/src/sources.py,sha256=y6zFGGbi5FvrdQ89e0t1ud3BWNN2kvrNu2iuXb9wu6g,70977
|
|
11
11
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
12
12
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
13
13
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -56,11 +56,13 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
|
|
|
56
56
|
ingestr/src/google_sheets/helpers/data_processing.py,sha256=WYO6z4XjGcG0Hat2J2enb-eLX5mSNVb2vaqRE83FBWU,11000
|
|
57
57
|
ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
|
|
58
58
|
ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
|
|
59
|
-
ingestr/src/hubspot/__init__.py,sha256=
|
|
59
|
+
ingestr/src/hubspot/__init__.py,sha256=jDI9PIgz2cvRMbTtX2tF1FcATFpBOMI7M8Ua4G7qDCc,9465
|
|
60
60
|
ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KFE,6727
|
|
61
61
|
ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
|
|
62
62
|
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
63
63
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
64
|
+
ingestr/src/kinesis/__init__.py,sha256=Bm0S9BvWDHZUhOc8WKTeawORRgldmJsb0Y3XNHpuJ-c,6205
|
|
65
|
+
ingestr/src/kinesis/helpers.py,sha256=aF0GCDKSectaaW8XPdERY_6bUs0ky19dcBs24ZFn-o0,2473
|
|
64
66
|
ingestr/src/klaviyo/_init_.py,sha256=ucWHqBe8DQvXVpbmxKFAV5ljpCFb4ps_2QTD0OSiWxY,7905
|
|
65
67
|
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
66
68
|
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
@@ -108,8 +110,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
108
110
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
109
111
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
110
112
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
111
|
-
ingestr-0.13.
|
|
112
|
-
ingestr-0.13.
|
|
113
|
-
ingestr-0.13.
|
|
114
|
-
ingestr-0.13.
|
|
115
|
-
ingestr-0.13.
|
|
113
|
+
ingestr-0.13.18.dist-info/METADATA,sha256=HYn4_5kTZd5RoQgkz06WF4-x12T8RVJtoQe6fyHQwYk,13569
|
|
114
|
+
ingestr-0.13.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
115
|
+
ingestr-0.13.18.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
116
|
+
ingestr-0.13.18.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
117
|
+
ingestr-0.13.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|