ingestr 0.13.55__py3-none-any.whl → 0.13.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/blob.py CHANGED
@@ -6,6 +6,10 @@ BucketName: TypeAlias = str
6
6
  FileGlob: TypeAlias = str
7
7
 
8
8
 
9
+ class UnsupportedEndpointError(Exception):
10
+ pass
11
+
12
+
9
13
  def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
10
14
  """
11
15
  parse the URI of a blob storage and
@@ -50,3 +54,23 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
50
54
  return "", parts[0]
51
55
 
52
56
  return parts[0], parts[1]
57
+
58
+
59
+ def parse_endpoint(path: str) -> str:
60
+ """
61
+ Parse the endpoint kind from the URI.
62
+
63
+ kind is a file format. one of [csv, jsonl, parquet]
64
+ """
65
+ file_extension = path.split(".")[-1]
66
+ if file_extension == "gz":
67
+ file_extension = path.split(".")[-2]
68
+ if file_extension == "csv":
69
+ endpoint = "read_csv"
70
+ elif file_extension == "jsonl":
71
+ endpoint = "read_jsonl"
72
+ elif file_extension == "parquet":
73
+ endpoint = "read_parquet"
74
+ else:
75
+ raise UnsupportedEndpointError(f"Unsupported file format: {file_extension}")
76
+ return endpoint
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.55"
1
+ version = "v0.13.57"
ingestr/src/factory.py CHANGED
@@ -64,6 +64,7 @@ from ingestr.src.sources import (
64
64
  SqlSource,
65
65
  StripeAnalyticsSource,
66
66
  TikTokSource,
67
+ TrustpilotSource,
67
68
  ZendeskSource,
68
69
  )
69
70
 
@@ -165,6 +166,7 @@ class SourceDestinationFactory:
165
166
  "pipedrive": PipedriveSource,
166
167
  "frankfurter": FrankfurterSource,
167
168
  "freshdesk": FreshdeskSource,
169
+ "trustpilot": TrustpilotSource,
168
170
  "phantombuster": PhantombusterSource,
169
171
  "elasticsearch": ElasticsearchSource,
170
172
  "attio": AttioSource,
@@ -9,7 +9,7 @@ from dlt.common.time import ensure_pendulum_datetime
9
9
  from dlt.common.typing import StrStr, TAnyDateTime, TDataItem
10
10
  from dlt.common.utils import digest128
11
11
 
12
- from .helpers import get_shard_iterator, max_sequence_by_shard
12
+ from .helpers import get_shard_iterator, get_stream_address, max_sequence_by_shard
13
13
 
14
14
 
15
15
  @dlt.resource(
@@ -42,7 +42,7 @@ def kinesis_stream(
42
42
  initial_at_timestamp (TAnyDateTime): An initial timestamp used to generate AT_TIMESTAMP or LATEST iterator when timestamp value is 0
43
43
  max_number_of_messages (int): Maximum number of messages to read in one run. Actual read may exceed that number by up to chunk_size. Defaults to None (no limit).
44
44
  milliseconds_behind_latest (int): The number of milliseconds behind the top of the shard to stop reading messages, defaults to 1000.
45
- parse_json (bool): If True, assumes that messages are json strings, parses them and returns instead of `data` (otherwise). Defaults to False.
45
+ parse_json (bool): If True, assumes that messages are json strings, parses them and returns instead of `data` (otherwise). Defaults to True.
46
46
  chunk_size (int): The number of records to fetch at once. Defaults to 1000.
47
47
  Yields:
48
48
  Iterable[TDataItem]: Messages. Contain Kinesis envelope in `kinesis` and bytes data in `data` (if `parse_json` disabled)
@@ -65,7 +65,7 @@ def kinesis_stream(
65
65
  # so next time we request shards at AT_TIMESTAMP that is now
66
66
  resource_state["initial_at_timestamp"] = pendulum.now("UTC").subtract(seconds=1)
67
67
 
68
- shards_list = kinesis_client.list_shards(StreamName=stream_name)
68
+ shards_list = kinesis_client.list_shards(**get_stream_address(stream_name))
69
69
  shards: List[StrStr] = shards_list["Shards"]
70
70
  while next_token := shards_list.get("NextToken"):
71
71
  shards_list = kinesis_client.list_shards(NextToken=next_token)
@@ -2,7 +2,7 @@ from typing import Any, Sequence, Tuple
2
2
 
3
3
  import dlt
4
4
  from dlt.common import pendulum
5
- from dlt.common.typing import DictStrAny, StrAny, StrStr
5
+ from dlt.common.typing import DictStrAny, DictStrStr, StrAny, StrStr
6
6
 
7
7
 
8
8
  def get_shard_iterator(
@@ -40,7 +40,7 @@ def get_shard_iterator(
40
40
  )
41
41
 
42
42
  shard_iterator: StrStr = kinesis_client.get_shard_iterator(
43
- StreamName=stream_name, ShardId=shard_id, **iterator_params
43
+ **get_stream_address(stream_name), ShardId=shard_id, **iterator_params
44
44
  )
45
45
  return shard_iterator["ShardIterator"], iterator_params
46
46
 
@@ -63,3 +63,20 @@ def max_sequence_by_shard(values: Sequence[StrStr]) -> StrStr:
63
63
  # we compare message sequence at shard_id
64
64
  last_value[shard_id] = max(item["seq_no"], last_value.get(shard_id, ""))
65
65
  return last_value
66
+
67
+
68
+ def get_stream_address(stream_name: str) -> DictStrStr:
69
+ """
70
+ Return address of stream, either as StreamName or StreamARN, when applicable.
71
+
72
+ Examples:
73
+ - customer_events
74
+ - arn:aws:kinesis:eu-central-1:842404475894:stream/customer_events
75
+
76
+ https://docs.aws.amazon.com/kinesis/latest/APIReference/API_StreamDescription.html#Streams-Type-StreamDescription-StreamName
77
+ https://docs.aws.amazon.com/kinesis/latest/APIReference/API_StreamDescription.html#Streams-Type-StreamDescription-StreamARN
78
+ """
79
+ if stream_name.startswith("arn:"):
80
+ return {"StreamARN": stream_name}
81
+ else:
82
+ return {"StreamName": stream_name}
ingestr/src/sources.py CHANGED
@@ -677,24 +677,33 @@ class StripeAnalyticsSource:
677
677
 
678
678
  table = table.lower()
679
679
 
680
- from ingestr.src.stripe_analytics.settings import (
681
- ENDPOINTS,
682
- INCREMENTAL_ENDPOINTS,
683
- )
680
+ from ingestr.src.stripe_analytics.settings import ENDPOINTS
684
681
 
685
- if table in ENDPOINTS:
686
- endpoint = ENDPOINTS[table]
687
- from ingestr.src.stripe_analytics import stripe_source
682
+ endpoint = None
683
+ incremental = False
684
+ sync = False
688
685
 
689
- return stripe_source(
690
- endpoints=[
691
- endpoint,
692
- ],
693
- stripe_secret_key=api_key[0],
694
- ).with_resources(endpoint)
686
+ table_fields = table.split(":")
687
+ if len(table_fields) == 1:
688
+ endpoint = table_fields[0]
689
+ elif len(table_fields) == 2:
690
+ endpoint = table_fields[0]
691
+ sync = table_fields[1] == "sync"
692
+ elif len(table_fields) == 3:
693
+ endpoint = table_fields[0]
694
+ sync = table_fields[1] == "sync"
695
+ incremental = table_fields[2] == "incremental"
696
+ else:
697
+ raise ValueError(
698
+ "Invalid Stripe table format. Expected: stripe:<endpoint> or stripe:<endpoint>:<sync> or stripe:<endpoint>:<sync>:<incremental>"
699
+ )
700
+
701
+ if incremental and not sync:
702
+ raise ValueError(
703
+ "incremental loads must be used with sync loading"
704
+ )
695
705
 
696
- elif table in INCREMENTAL_ENDPOINTS:
697
- endpoint = INCREMENTAL_ENDPOINTS[table]
706
+ if incremental:
698
707
  from ingestr.src.stripe_analytics import incremental_stripe_source
699
708
 
700
709
  def nullable_date(date_str: Optional[str]):
@@ -702,6 +711,7 @@ class StripeAnalyticsSource:
702
711
  return ensure_pendulum_datetime(date_str)
703
712
  return None
704
713
 
714
+ endpoint = ENDPOINTS[endpoint]
705
715
  return incremental_stripe_source(
706
716
  endpoints=[
707
717
  endpoint,
@@ -710,6 +720,26 @@ class StripeAnalyticsSource:
710
720
  initial_start_date=nullable_date(kwargs.get("interval_start", None)),
711
721
  end_date=nullable_date(kwargs.get("interval_end", None)),
712
722
  ).with_resources(endpoint)
723
+ else:
724
+ endpoint = ENDPOINTS[endpoint]
725
+ if sync:
726
+ from ingestr.src.stripe_analytics import stripe_source
727
+
728
+ return stripe_source(
729
+ endpoints=[
730
+ endpoint,
731
+ ],
732
+ stripe_secret_key=api_key[0],
733
+ ).with_resources(endpoint)
734
+ else:
735
+ from ingestr.src.stripe_analytics import async_stripe_source
736
+
737
+ return async_stripe_source(
738
+ endpoints=[
739
+ endpoint,
740
+ ],
741
+ stripe_secret_key=api_key[0],
742
+ ).with_resources(endpoint)
713
743
 
714
744
  raise ValueError(
715
745
  f"Resource '{table}' is not supported for stripe source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
@@ -1362,17 +1392,25 @@ class S3Source:
1362
1392
  secret=secret_access_key[0],
1363
1393
  )
1364
1394
 
1365
- file_extension = path_to_file.split(".")[-1]
1366
- if file_extension == "csv":
1367
- endpoint = "read_csv"
1368
- elif file_extension == "jsonl":
1369
- endpoint = "read_jsonl"
1370
- elif file_extension == "parquet":
1371
- endpoint = "read_parquet"
1395
+ endpoint: Optional[str] = None
1396
+ if "#" in table:
1397
+ _, endpoint = table.split("#")
1398
+ if endpoint not in ["csv", "jsonl", "parquet"]:
1399
+ raise ValueError(
1400
+ "S3 Source only supports specific formats files: csv, jsonl, parquet"
1401
+ )
1402
+ endpoint = f"read_{endpoint}"
1372
1403
  else:
1373
- raise ValueError(
1374
- "S3 Source only supports specific formats files: csv, jsonl, parquet"
1375
- )
1404
+ try:
1405
+ endpoint = blob.parse_endpoint(path_to_file)
1406
+ except blob.UnsupportedEndpointError:
1407
+ raise ValueError(
1408
+ "S3 Source only supports specific formats files: csv, jsonl, parquet"
1409
+ )
1410
+ except Exception as e:
1411
+ raise ValueError(
1412
+ f"Failed to parse endpoint from path: {path_to_file}"
1413
+ ) from e
1376
1414
 
1377
1415
  from ingestr.src.filesystem import readers
1378
1416
 
@@ -1844,17 +1882,16 @@ class GCSSource:
1844
1882
  token=credentials,
1845
1883
  )
1846
1884
 
1847
- file_extension = path_to_file.split(".")[-1]
1848
- if file_extension == "csv":
1849
- endpoint = "read_csv"
1850
- elif file_extension == "jsonl":
1851
- endpoint = "read_jsonl"
1852
- elif file_extension == "parquet":
1853
- endpoint = "read_parquet"
1854
- else:
1885
+ try:
1886
+ endpoint = blob.parse_endpoint(path_to_file)
1887
+ except blob.UnsupportedEndpointError:
1855
1888
  raise ValueError(
1856
- "GCS Source only supports specific formats files: csv, jsonl, parquet"
1889
+ "S3 Source only supports specific formats files: csv, jsonl, parquet"
1857
1890
  )
1891
+ except Exception as e:
1892
+ raise ValueError(
1893
+ f"Failed to parse endpoint from path: {path_to_file}"
1894
+ ) from e
1858
1895
 
1859
1896
  from ingestr.src.filesystem import readers
1860
1897
 
@@ -2392,6 +2429,47 @@ class FreshdeskSource:
2392
2429
  ).with_resources(table)
2393
2430
 
2394
2431
 
2432
+ class TrustpilotSource:
2433
+ # trustpilot://<business_unit_id>?api_key=<api_key>
2434
+ def handles_incrementality(self) -> bool:
2435
+ return True
2436
+
2437
+ def dlt_source(self, uri: str, table: str, **kwargs):
2438
+ parsed_uri = urlparse(uri)
2439
+ business_unit_id = parsed_uri.netloc
2440
+ params = parse_qs(parsed_uri.query)
2441
+
2442
+ if not business_unit_id:
2443
+ raise MissingValueError("business_unit_id", "Trustpilot")
2444
+
2445
+ api_key = params.get("api_key")
2446
+ if api_key is None:
2447
+ raise MissingValueError("api_key", "Trustpilot")
2448
+
2449
+ start_date = kwargs.get("interval_start")
2450
+ if start_date is None:
2451
+ start_date = ensure_pendulum_datetime("2000-01-01").in_tz("UTC").isoformat()
2452
+ else:
2453
+ start_date = ensure_pendulum_datetime(start_date).in_tz("UTC").isoformat()
2454
+
2455
+ end_date = kwargs.get("interval_end")
2456
+
2457
+ if end_date is not None:
2458
+ end_date = ensure_pendulum_datetime(end_date).in_tz("UTC").isoformat()
2459
+
2460
+ if table not in ["reviews"]:
2461
+ raise UnsupportedResourceError(table, "Trustpilot")
2462
+
2463
+ from ingestr.src.trustpilot import trustpilot_source
2464
+
2465
+ return trustpilot_source(
2466
+ business_unit_id=business_unit_id,
2467
+ api_key=api_key[0],
2468
+ start_date=start_date,
2469
+ end_date=end_date,
2470
+ ).with_resources(table)
2471
+
2472
+
2395
2473
  class PhantombusterSource:
2396
2474
  def handles_incrementality(self) -> bool:
2397
2475
  return True
@@ -2622,18 +2700,15 @@ class SFTPSource:
2622
2700
  else:
2623
2701
  file_glob = f"/{table}"
2624
2702
 
2625
- file_extension = table.split(".")[-1].lower()
2626
- endpoint: str
2627
- if file_extension == "csv":
2628
- endpoint = "read_csv"
2629
- elif file_extension == "jsonl":
2630
- endpoint = "read_jsonl"
2631
- elif file_extension == "parquet":
2632
- endpoint = "read_parquet"
2633
- else:
2703
+ try:
2704
+ endpoint = blob.parse_endpoint(table)
2705
+ except blob.UnsupportedEndpointError:
2634
2706
  raise ValueError(
2635
- "FTPServer Source only supports specific file formats: csv, jsonl, parquet."
2707
+ "SFTP Source only supports specific formats files: csv, jsonl, parquet"
2636
2708
  )
2709
+ except Exception as e:
2710
+ raise ValueError(f"Failed to parse endpoint from path: {table}") from e
2711
+
2637
2712
  from ingestr.src.filesystem import readers
2638
2713
 
2639
2714
  dlt_source_resource = readers(bucket_url, fs, file_glob)
@@ -7,7 +7,12 @@ import stripe
7
7
  from dlt.sources import DltResource
8
8
  from pendulum import DateTime
9
9
 
10
- from .helpers import pagination, transform_date
10
+ from .helpers import (
11
+ async_parallel_pagination,
12
+ pagination,
13
+ parallel_pagination,
14
+ transform_date,
15
+ )
11
16
 
12
17
 
13
18
  @dlt.source(max_table_nesting=0)
@@ -50,6 +55,86 @@ def stripe_source(
50
55
  )(endpoint)
51
56
 
52
57
 
58
+ @dlt.source(max_table_nesting=0)
59
+ def parallel_stripe_source(
60
+ endpoints: Tuple[str, ...],
61
+ stripe_secret_key: str = dlt.secrets.value,
62
+ start_date: Optional[DateTime] = None,
63
+ end_date: Optional[DateTime] = None,
64
+ max_workers: int = 12,
65
+ ) -> Iterable[DltResource]:
66
+ """
67
+ Retrieves data from the Stripe API for the specified endpoints using parallel pagination.
68
+
69
+ This source divides the date range across multiple workers to fetch data in parallel,
70
+ which can significantly speed up data retrieval for large date ranges.
71
+
72
+ Args:
73
+ endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from.
74
+ stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
75
+ start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Required for parallel processing.
76
+ end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Required for parallel processing.
77
+ max_workers (int): Maximum number of worker threads for parallel fetching. Defaults to 4.
78
+
79
+ Returns:
80
+ Iterable[DltResource]: Resources with data that was created during the period greater than or equal to 'start_date' and less than 'end_date'.
81
+ """
82
+ stripe.api_key = stripe_secret_key
83
+ stripe.api_version = "2022-11-15"
84
+
85
+ def parallel_stripe_resource(
86
+ endpoint: str,
87
+ ) -> Generator[Dict[Any, Any], Any, None]:
88
+ yield from parallel_pagination(endpoint, start_date, end_date, max_workers)
89
+
90
+ for endpoint in endpoints:
91
+ yield dlt.resource(
92
+ parallel_stripe_resource,
93
+ name=endpoint,
94
+ write_disposition="replace",
95
+ )(endpoint)
96
+
97
+
98
+ @dlt.source(max_table_nesting=0)
99
+ def async_stripe_source(
100
+ endpoints: Tuple[str, ...],
101
+ stripe_secret_key: str = dlt.secrets.value,
102
+ start_date: Optional[DateTime] = None,
103
+ end_date: Optional[DateTime] = None,
104
+ max_workers: int = 40,
105
+ rate_limit_delay: float = 0.03,
106
+ ) -> Iterable[DltResource]:
107
+ """
108
+ ULTRA-FAST async Stripe source optimized for maximum speed and throughput.
109
+
110
+ WARNING: Returns data in RANDOM ORDER for maximum performance.
111
+ Uses aggressive concurrency and minimal delays to maximize API throughput.
112
+
113
+ Args:
114
+ endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from.
115
+ stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
116
+ start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to 2010-01-01.
117
+ end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to today.
118
+ max_workers (int): Maximum number of concurrent async tasks. Defaults to 40 for maximum speed.
119
+ rate_limit_delay (float): Minimal delay between requests. Defaults to 0.03 seconds.
120
+
121
+ Returns:
122
+ Iterable[DltResource]: Resources with data in RANDOM ORDER (optimized for speed).
123
+ """
124
+ stripe.api_key = stripe_secret_key
125
+ stripe.api_version = "2022-11-15"
126
+
127
+ async def async_stripe_resource(endpoint: str):
128
+ yield async_parallel_pagination(endpoint, max_workers, rate_limit_delay)
129
+
130
+ for endpoint in endpoints:
131
+ yield dlt.resource(
132
+ async_stripe_resource,
133
+ name=endpoint,
134
+ write_disposition="replace",
135
+ )(endpoint)
136
+
137
+
53
138
  @dlt.source
54
139
  def incremental_stripe_source(
55
140
  endpoints: Tuple[str, ...],
@@ -1,6 +1,10 @@
1
1
  """Stripe analytics source helpers"""
2
2
 
3
- from typing import Any, Dict, Iterable, Optional, Union
3
+ import asyncio
4
+ import math
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from datetime import datetime, timedelta
7
+ from typing import Any, Dict, Iterable, List, Optional, Union
4
8
 
5
9
  import stripe
6
10
  from dlt.common import pendulum
@@ -39,6 +43,300 @@ def pagination(
39
43
  break
40
44
 
41
45
 
46
+ def parallel_pagination(
47
+ endpoint: str,
48
+ start_date: Optional[Any] = None,
49
+ end_date: Optional[Any] = None,
50
+ max_workers: int = 4,
51
+ ) -> Iterable[TDataItem]:
52
+ """
53
+ Retrieves data from an endpoint with parallel pagination by dividing date ranges across workers.
54
+
55
+ Args:
56
+ endpoint (str): The endpoint to retrieve data from.
57
+ start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to 2010-01-01 if None.
58
+ end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to today if None.
59
+ max_workers (int): Maximum number of worker threads to use for parallel fetching. Defaults to 4.
60
+
61
+ Returns:
62
+ Iterable[TDataItem]: Data items retrieved from the endpoint.
63
+ """
64
+ # Set default date range if not provided: 2010 to today
65
+ if not start_date:
66
+ start_date = pendulum.datetime(2010, 1, 1)
67
+ if not end_date:
68
+ end_date = pendulum.now()
69
+
70
+ # Convert dates to timestamps for processing
71
+ start_ts = transform_date(start_date)
72
+ end_ts = transform_date(end_date)
73
+
74
+ # If date range is very small, use sequential pagination
75
+ date_range_days = (end_ts - start_ts) / (24 * 60 * 60)
76
+ if date_range_days < 30: # Less than 30 days
77
+ yield from pagination(endpoint, start_date, end_date)
78
+ return
79
+
80
+ # Create time chunks with larger chunks for 2010s (less data expected)
81
+ time_chunks = _create_adaptive_time_chunks(start_ts, end_ts, max_workers)
82
+
83
+ # Use ThreadPoolExecutor to fetch data in parallel and yield as soon as ready
84
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
85
+ # Submit all tasks
86
+ future_to_chunk = {
87
+ executor.submit(
88
+ _fetch_chunk_data_streaming, endpoint, chunk_start, chunk_end
89
+ ): (chunk_start, chunk_end)
90
+ for chunk_start, chunk_end in time_chunks
91
+ }
92
+
93
+ # MAXIMUM SPEED - Yield results immediately as they complete
94
+ for future in as_completed(future_to_chunk):
95
+ chunk_start, chunk_end = future_to_chunk[future]
96
+ try:
97
+ chunk_data = future.result()
98
+ # Yield all batches from this chunk immediately - NO ORDERING
99
+ for batch in chunk_data:
100
+ yield batch
101
+
102
+ except Exception as exc:
103
+ print(f"Chunk {chunk_start}-{chunk_end} generated an exception: {exc}")
104
+ raise exc
105
+
106
+
107
+ def _create_time_chunks(start_ts: int, end_ts: int, num_chunks: int) -> List[tuple]:
108
+ """
109
+ Divide a time range into equal chunks for parallel processing.
110
+
111
+ Args:
112
+ start_ts (int): Start timestamp
113
+ end_ts (int): End timestamp
114
+ num_chunks (int): Number of chunks to create
115
+
116
+ Returns:
117
+ List[tuple]: List of (chunk_start, chunk_end) timestamp pairs
118
+ """
119
+ total_duration = end_ts - start_ts
120
+ chunk_duration = math.ceil(total_duration / num_chunks)
121
+
122
+ chunks = []
123
+ current_start = start_ts
124
+
125
+ for i in range(num_chunks):
126
+ current_end = min(current_start + chunk_duration, end_ts)
127
+ if current_start < end_ts:
128
+ chunks.append((current_start, current_end))
129
+ current_start = current_end
130
+
131
+ if current_start >= end_ts:
132
+ break
133
+
134
+ return chunks
135
+
136
+
137
+ def _create_adaptive_time_chunks(
138
+ start_ts: int, end_ts: int, max_workers: int
139
+ ) -> List[tuple]:
140
+ """
141
+ Create time chunks with adaptive sizing - larger chunks for 2010s (less data expected).
142
+
143
+ Args:
144
+ start_ts (int): Start timestamp
145
+ end_ts (int): End timestamp
146
+ max_workers (int): Maximum number of workers
147
+
148
+ Returns:
149
+ List[tuple]: List of (chunk_start, chunk_end) timestamp pairs
150
+ """
151
+ chunks = []
152
+
153
+ # Key timestamps
154
+ year_2020_ts = int(pendulum.datetime(2020, 1, 1).timestamp())
155
+ year_2015_ts = int(pendulum.datetime(2015, 1, 1).timestamp())
156
+
157
+ current_start = start_ts
158
+
159
+ # Handle 2010-2015: Large chunks (2-3 year periods)
160
+ if current_start < year_2015_ts:
161
+ chunk_end = min(year_2015_ts, end_ts)
162
+ if current_start < chunk_end:
163
+ # Split 2010-2015 into 2-3 chunks max
164
+ pre_2015_chunks = _create_time_chunks(
165
+ current_start, chunk_end, min(3, max_workers)
166
+ )
167
+ chunks.extend(pre_2015_chunks)
168
+ current_start = chunk_end
169
+
170
+ # Handle 2015-2020: Medium chunks (6 month to 1 year periods)
171
+ if current_start < year_2020_ts and current_start < end_ts:
172
+ chunk_end = min(year_2020_ts, end_ts)
173
+ if current_start < chunk_end:
174
+ # Split 2015-2020 into smaller chunks
175
+ duration_2015_2020 = chunk_end - current_start
176
+ years_2015_2020 = duration_2015_2020 / (365 * 24 * 60 * 60)
177
+ num_chunks_2015_2020 = min(
178
+ max_workers, max(2, int(years_2015_2020 * 2))
179
+ ) # ~6 months per chunk
180
+
181
+ pre_2020_chunks = _create_time_chunks(
182
+ current_start, chunk_end, num_chunks_2015_2020
183
+ )
184
+ chunks.extend(pre_2020_chunks)
185
+ current_start = chunk_end
186
+
187
+ if current_start < end_ts:
188
+ # Split post-2020 data into daily chunks for maximum granularity
189
+ current_chunk_start = current_start
190
+ while current_chunk_start < end_ts:
191
+ # Calculate end of current day
192
+ current_date = datetime.fromtimestamp(current_chunk_start)
193
+ next_day = current_date + timedelta(days=1)
194
+ chunk_end = min(int(next_day.timestamp()), end_ts)
195
+
196
+ chunks.append((current_chunk_start, chunk_end))
197
+ current_chunk_start = chunk_end
198
+
199
+ return chunks
200
+
201
+
202
+ def _fetch_chunk_data_streaming(
203
+ endpoint: str, start_ts: int, end_ts: int
204
+ ) -> List[List[TDataItem]]:
205
+ """
206
+ Fetch data for a specific time chunk using sequential pagination with memory-efficient approach.
207
+
208
+ Args:
209
+ endpoint (str): The Stripe endpoint to fetch from
210
+ start_ts (int): Start timestamp for this chunk
211
+ end_ts (int): End timestamp for this chunk
212
+
213
+ Returns:
214
+ List[List[TDataItem]]: List of batches of data items
215
+ """
216
+ # For streaming, we still need to collect the chunk data to maintain order
217
+ # but we can optimize by not holding all data in memory at once
218
+ print(
219
+ f"Fetching chunk {datetime.fromtimestamp(start_ts).strftime('%Y-%m-%d')}-{datetime.fromtimestamp(end_ts).strftime('%Y-%m-%d')}"
220
+ )
221
+ chunk_data = []
222
+ batch_count = 0
223
+
224
+ for batch in pagination(endpoint, start_ts, end_ts):
225
+ chunk_data.append(batch)
226
+ print(
227
+ f"Processed {batch_count} batches for chunk {datetime.fromtimestamp(start_ts).strftime('%Y-%m-%d')}-{datetime.fromtimestamp(end_ts).strftime('%Y-%m-%d')}"
228
+ )
229
+ batch_count += 1
230
+
231
+ return chunk_data
232
+
233
+
234
+ async def async_pagination(
235
+ endpoint: str, start_date: Optional[Any] = None, end_date: Optional[Any] = None
236
+ ) -> Iterable[TDataItem]:
237
+ """
238
+ Async version of pagination that retrieves data from an endpoint with pagination.
239
+
240
+ Args:
241
+ endpoint (str): The endpoint to retrieve data from.
242
+ start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to None.
243
+ end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to None.
244
+
245
+ Returns:
246
+ Iterable[TDataItem]: Data items retrieved from the endpoint.
247
+ """
248
+ starting_after = None
249
+ while True:
250
+ response = await stripe_get_data_async(
251
+ endpoint,
252
+ start_date=start_date,
253
+ end_date=end_date,
254
+ starting_after=starting_after,
255
+ )
256
+
257
+ if len(response["data"]) > 0:
258
+ starting_after = response["data"][-1]["id"]
259
+ yield response["data"]
260
+
261
+ if not response["has_more"]:
262
+ break
263
+
264
+
265
+ async def async_parallel_pagination(
266
+ endpoint: str,
267
+ max_workers: int = 8,
268
+ rate_limit_delay: float = 5,
269
+ ) -> Iterable[TDataItem]:
270
+ """
271
+ ULTRA-FAST async parallel pagination - yields data in random order for maximum speed.
272
+ No ordering constraints - pure performance optimization.
273
+
274
+ Args:
275
+ endpoint (str): The endpoint to retrieve data from.
276
+ start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to 2010-01-01 if None.
277
+ end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to today if None.
278
+ max_workers (int): Maximum number of concurrent async tasks. Defaults to 8 for balanced speed/rate limit respect.
279
+ rate_limit_delay (float): Minimal delay between requests. Defaults to 5 seconds.
280
+
281
+ Returns:
282
+ Iterable[TDataItem]: Data items retrieved from the endpoint (RANDOM ORDER FOR SPEED).
283
+ """
284
+
285
+ start_date = pendulum.datetime(2010, 1, 1)
286
+ end_date = pendulum.now()
287
+ start_ts = transform_date(start_date)
288
+ end_ts = transform_date(end_date)
289
+
290
+ # Create time chunks with larger chunks for 2010s (less data expected)
291
+ time_chunks = _create_adaptive_time_chunks(start_ts, end_ts, max_workers)
292
+
293
+ # Use asyncio semaphore to control concurrency and respect rate limits
294
+ semaphore = asyncio.Semaphore(max_workers)
295
+
296
+ async def fetch_chunk_with_semaphore(chunk_start: int, chunk_end: int):
297
+ async with semaphore:
298
+ await asyncio.sleep(rate_limit_delay)
299
+ return await _fetch_chunk_data_async_fast(endpoint, chunk_start, chunk_end)
300
+
301
+ # Create all tasks
302
+ tasks = [
303
+ fetch_chunk_with_semaphore(chunk_start, chunk_end)
304
+ for chunk_start, chunk_end in time_chunks
305
+ ]
306
+
307
+ for coro in asyncio.as_completed(tasks):
308
+ try:
309
+ chunk_data = await coro
310
+
311
+ for batch in chunk_data:
312
+ yield batch
313
+
314
+ except Exception as exc:
315
+ print(f"Async chunk processing generated an exception: {exc}")
316
+ raise exc
317
+
318
+
319
+ async def _fetch_chunk_data_async_fast(
320
+ endpoint: str, start_ts: int, end_ts: int
321
+ ) -> List[List[TDataItem]]:
322
+ """
323
+ ULTRA-FAST async chunk fetcher - no metadata overhead, direct data return.
324
+
325
+ Args:
326
+ endpoint (str): The Stripe endpoint to fetch from
327
+ start_ts (int): Start timestamp for this chunk
328
+ end_ts (int): End timestamp for this chunk
329
+
330
+ Returns:
331
+ List[List[TDataItem]]: Raw batches with zero overhead
332
+ """
333
+ chunk_data = []
334
+ async for batch in async_pagination(endpoint, start_ts, end_ts):
335
+ chunk_data.append(batch)
336
+
337
+ return chunk_data
338
+
339
+
42
340
  def transform_date(date: Union[str, DateTime, int]) -> int:
43
341
  if isinstance(date, str):
44
342
  date = pendulum.from_format(date, "%Y-%m-%dT%H:%M:%SZ")
@@ -66,3 +364,45 @@ def stripe_get_data(
66
364
  created={"gte": start_date, "lt": end_date}, limit=100, **kwargs
67
365
  )
68
366
  return dict(resource_dict)
367
+
368
+
369
+ async def stripe_get_data_async(
370
+ resource: str,
371
+ start_date: Optional[Any] = None,
372
+ end_date: Optional[Any] = None,
373
+ **kwargs: Any,
374
+ ) -> Dict[Any, Any]:
375
+ """Async version of stripe_get_data"""
376
+ if start_date:
377
+ start_date = transform_date(start_date)
378
+ if end_date:
379
+ end_date = transform_date(end_date)
380
+
381
+ if resource == "Subscription":
382
+ kwargs.update({"status": "all"})
383
+
384
+ import asyncio
385
+
386
+ from stripe import RateLimitError
387
+
388
+ max_retries = 50
389
+ retry_count = 0
390
+ max_wait_time_ms = 10000
391
+
392
+ while retry_count < max_retries:
393
+ try:
394
+ resource_dict = await getattr(stripe, resource).list_async(
395
+ created={"gte": start_date, "lt": end_date}, limit=100, **kwargs
396
+ )
397
+ return dict(resource_dict)
398
+ except RateLimitError:
399
+ retry_count += 1
400
+ if retry_count < max_retries:
401
+ wait_time = min(2**retry_count * 0.001, max_wait_time_ms)
402
+ await asyncio.sleep(wait_time)
403
+ else:
404
+ # Re-raise the last exception if we've exhausted retries
405
+ print(f"✗ Failed to fetch {resource} after {max_retries} retries")
406
+ raise
407
+
408
+ return dict(resource_dict)
@@ -11,6 +11,7 @@ ENDPOINTS = {
11
11
  "checkoutsession": "CheckoutSession",
12
12
  "checkout_session": "CheckoutSession",
13
13
  "coupon": "Coupon",
14
+ "charge": "Charge",
14
15
  "customer": "Customer",
15
16
  "dispute": "Dispute",
16
17
  "paymentintent": "PaymentIntent",
@@ -52,22 +53,14 @@ ENDPOINTS = {
52
53
  "top_up": "Topup",
53
54
  "webhookendpoint": "WebhookEndpoint",
54
55
  "webhook_endpoint": "WebhookEndpoint",
55
- }
56
- # possible incremental endpoints
57
- INCREMENTAL_ENDPOINTS = {
58
- "applicationfee": "ApplicationFee",
59
- "application_fee": "ApplicationFee",
60
- "balancetransaction": "BalanceTransaction",
61
- "balance_transaction": "BalanceTransaction",
62
- "charge": "Charge",
63
- "creditnote": "CreditNote",
64
- "credit_note": "CreditNote",
65
- "event": "Event",
66
56
  "invoice": "Invoice",
67
57
  "invoiceitem": "InvoiceItem",
68
58
  "invoice_item": "InvoiceItem",
69
59
  "invoicelineitem": "InvoiceLineItem",
70
60
  "invoice_line_item": "InvoiceLineItem",
71
- "setupattempt": "SetupAttempt",
72
- "setup_attempt": "SetupAttempt",
61
+ "balancetransaction": "BalanceTransaction",
62
+ "balance_transaction": "BalanceTransaction",
63
+ "creditnote": "CreditNote",
64
+ "credit_note": "CreditNote",
65
+ "event": "Event",
73
66
  }
@@ -0,0 +1,48 @@
1
+ """Trustpilot source for ingesting reviews."""
2
+
3
+ from typing import Any, Dict, Generator, Iterable
4
+
5
+ import dlt
6
+ import pendulum
7
+ from dlt.sources import DltResource
8
+
9
+ from .client import TrustpilotClient
10
+
11
+
12
+ @dlt.source()
13
+ def trustpilot_source(
14
+ business_unit_id: str,
15
+ start_date: str,
16
+ end_date: str | None,
17
+ api_key: str,
18
+ per_page: int = 1000,
19
+ ) -> Iterable[DltResource]:
20
+ """Return resources for Trustpilot."""
21
+
22
+ client = TrustpilotClient(api_key=api_key)
23
+
24
+ @dlt.resource(name="reviews", write_disposition="merge", primary_key="id")
25
+ def reviews(
26
+ dateTime=(
27
+ dlt.sources.incremental(
28
+ "updated_at",
29
+ initial_value=start_date,
30
+ end_value=end_date,
31
+ range_start="closed",
32
+ range_end="closed",
33
+ )
34
+ ),
35
+ ) -> Generator[Dict[str, Any], None, None]:
36
+ if end_date is None:
37
+ end_dt = pendulum.now(tz="UTC").isoformat()
38
+ else:
39
+ end_dt = dateTime.end_value
40
+ start_dt = dateTime.last_value
41
+ yield from client.paginated_reviews(
42
+ business_unit_id=business_unit_id,
43
+ per_page=per_page,
44
+ updated_since=start_dt,
45
+ end_date=end_dt,
46
+ )
47
+
48
+ yield reviews
@@ -0,0 +1,48 @@
1
+ """Simple Trustpilot API client."""
2
+
3
+ from typing import Any, Dict, Iterable
4
+
5
+ import pendulum
6
+ from dlt.sources.helpers import requests
7
+
8
+
9
+ class TrustpilotClient:
10
+ """Client for the Trustpilot public API."""
11
+
12
+ def __init__(self, api_key: str) -> None:
13
+ self.api_key = api_key
14
+ self.base_url = "https://api.trustpilot.com/v1"
15
+
16
+ def _get(self, endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
17
+ params = dict(params)
18
+ params["apikey"] = self.api_key
19
+ response = requests.get(f"{self.base_url}{endpoint}", params=params)
20
+ response.raise_for_status()
21
+ return response.json()
22
+
23
+ def paginated_reviews(
24
+ self,
25
+ business_unit_id: str,
26
+ updated_since: str,
27
+ end_date: str,
28
+ per_page: int = 1000,
29
+ ) -> Iterable[Dict[str, Any]]:
30
+ page = 1
31
+ while True:
32
+ params: Dict[str, Any] = {"perPage": per_page, "page": page}
33
+ if updated_since:
34
+ params["updatedSince"] = updated_since
35
+ data = self._get(f"/business-units/{business_unit_id}/reviews", params)
36
+ reviews = data.get("reviews", data)
37
+ if not reviews:
38
+ break
39
+ for review in reviews:
40
+ end_date_dt = pendulum.parse(end_date)
41
+ review["updated_at"] = review["updatedAt"]
42
+ review_dt = pendulum.parse(review["updated_at"])
43
+ if review_dt > end_date_dt: # type: ignore
44
+ continue
45
+ yield review
46
+ if len(reviews) < per_page:
47
+ break
48
+ page += 1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.55
3
+ Version: 0.13.57
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -27,7 +27,6 @@ Requires-Dist: asynch==0.2.4
27
27
  Requires-Dist: attrs==25.1.0
28
28
  Requires-Dist: backoff==2.2.1
29
29
  Requires-Dist: bcrypt==4.3.0
30
- Requires-Dist: beautifulsoup4==4.13.3
31
30
  Requires-Dist: boto3==1.37.1
32
31
  Requires-Dist: botocore==1.37.1
33
32
  Requires-Dist: cachetools==5.5.2
@@ -156,7 +155,6 @@ Requires-Dist: python-quickbooks==0.9.2
156
155
  Requires-Dist: pytz==2025.1
157
156
  Requires-Dist: pyyaml==6.0.2
158
157
  Requires-Dist: rauth==0.7.3
159
- Requires-Dist: redshift-connector==2.1.5
160
158
  Requires-Dist: requests-file==2.1.0
161
159
  Requires-Dist: requests-oauthlib==1.3.1
162
160
  Requires-Dist: requests-toolbelt==1.0.0
@@ -168,7 +166,6 @@ Requires-Dist: rsa==4.9
168
166
  Requires-Dist: rudder-sdk-python==2.1.4
169
167
  Requires-Dist: s3fs==2025.3.2
170
168
  Requires-Dist: s3transfer==0.11.3
171
- Requires-Dist: scramp==1.4.5
172
169
  Requires-Dist: semver==3.0.4
173
170
  Requires-Dist: setuptools==75.8.2
174
171
  Requires-Dist: shellingham==1.5.4
@@ -180,7 +177,6 @@ Requires-Dist: smmap==5.0.2
180
177
  Requires-Dist: snowflake-connector-python==3.14.0
181
178
  Requires-Dist: snowflake-sqlalchemy==1.6.1
182
179
  Requires-Dist: sortedcontainers==2.4.0
183
- Requires-Dist: soupsieve==2.6
184
180
  Requires-Dist: sqlalchemy-bigquery==1.12.1
185
181
  Requires-Dist: sqlalchemy-cratedb==0.42.0.dev2
186
182
  Requires-Dist: sqlalchemy-hana==2.0.0
@@ -1,17 +1,17 @@
1
1
  ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
2
  ingestr/main.py,sha256=GkC1hdq8AVGrvolc95zMfjmibI95p2pmFkbgCOVf-Og,26311
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
- ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
5
- ingestr/src/buildinfo.py,sha256=bdi0-mZhnHheYgs6WuEb8p-RIk_RFAXRCF9HalRfV0k,21
4
+ ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
5
+ ingestr/src/buildinfo.py,sha256=mMPkVD8bGtndOM5rdoqMRQZCk_zPuHM2EG9vRNh5cIk,21
6
6
  ingestr/src/destinations.py,sha256=TcxM2rcwHfgMMP6U0yRNcfWKlEzkBbZbqCIDww7lkTY,16882
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
- ingestr/src/factory.py,sha256=mcjgbmrZr6TvP9fCMQxo-aMGcrb2PqToRcSLp5nldww,6138
8
+ ingestr/src/factory.py,sha256=R7KzGRQ9tYZ_N-daD9OtnEp0K-DrsP8bUyXWdv4LV4A,6200
9
9
  ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
10
10
  ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
11
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
12
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
13
13
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
14
- ingestr/src/sources.py,sha256=3ozLt9lhhNANspfjA2vb8u6qjgBJezH8QBV1XKqT1fg,94124
14
+ ingestr/src/sources.py,sha256=dtr5sXF2ecO54eJR1Mu-dH9YgSxAVx93gjfSwJISI00,96867
15
15
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
16
16
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
17
17
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -75,8 +75,8 @@ ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b
75
75
  ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
76
76
  ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
77
77
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
78
- ingestr/src/kinesis/__init__.py,sha256=u5ThH1y8uObZKXgIo71em1UnX6MsVHWOjcf1jKqKbE8,6205
79
- ingestr/src/kinesis/helpers.py,sha256=aF0GCDKSectaaW8XPdERY_6bUs0ky19dcBs24ZFn-o0,2473
78
+ ingestr/src/kinesis/__init__.py,sha256=YretSz4F28tbkcPhd55mBp2Xk7XE9unyWx0nmvl8iEc,6235
79
+ ingestr/src/kinesis/helpers.py,sha256=SO2cFmWNGcykUYmjHdfxWsOQSkLQXyhFtfWnkcUOM0s,3152
80
80
  ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
81
81
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
82
82
  ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
@@ -117,13 +117,15 @@ ingestr/src/solidgate/__init__.py,sha256=JdaXvAu5QGuf9-FY294vwCQCEmfrqIld9oqbzqC
117
117
  ingestr/src/solidgate/helpers.py,sha256=oePEc9nnvmN3IaKrfJCvyKL79xdGM0-gRTN3-8tY4Fc,4952
118
118
  ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
119
119
  ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG4kLd7tMU,2002
120
- ingestr/src/stripe_analytics/__init__.py,sha256=j3Vmvo8G75fJJIF4rUnpGliGTpYQZt372wo-AjGImYs,4581
121
- ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
122
- ingestr/src/stripe_analytics/settings.py,sha256=ZahhZg3Sq2KnvnDcfSaXO494Csy3tElBDEHnvA1AVmA,2461
120
+ ingestr/src/stripe_analytics/__init__.py,sha256=g2miuPexUcPEEMzmPQZqxEaQ0Q8YjUAkOvKaLn3KC-c,8219
121
+ ingestr/src/stripe_analytics/helpers.py,sha256=8in6k1ndTon7xNh8QPDqThBWvKY9XQrmrJXveAOA6R4,13858
122
+ ingestr/src/stripe_analytics/settings.py,sha256=xt1-ljwP4nLTNUa8l3KwFbtK8FtQHgHpzGF5uPKfRsw,2246
123
123
  ingestr/src/telemetry/event.py,sha256=W7bs4uVfPakQ5otmiqgqu1l5SqjYx1p87wudnWXckBc,949
124
124
  ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
125
125
  ingestr/src/tiktok_ads/__init__.py,sha256=aEqCl3dTH6_d43s1jgAeG1UasEls_SlorORulYMwIL8,4590
126
126
  ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=jmWHvZzN1Vt_PWrJkgq5a2wIwon-OBEzXoZx0jEy-74,3905
127
+ ingestr/src/trustpilot/__init__.py,sha256=ofhjep4qRPIi8q41qc97QVex8UbWF-Fd7gUsqeQlQX8,1279
128
+ ingestr/src/trustpilot/client.py,sha256=zKYt5C7nrR83Id0KN49EPmtml8MEtlSPlAosEFU3VXY,1616
127
129
  ingestr/src/zendesk/__init__.py,sha256=tmJ_jdb6kpwmEKpcv6Im71-bOZI6h-Tcofe18OH4I24,17762
128
130
  ingestr/src/zendesk/settings.py,sha256=Vdj706nTJFQ-3KH4nO97iYCQuba3dV3E9gfnmLK6xwU,2294
129
131
  ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1Ydq5FDH_0c,888
@@ -139,8 +141,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
139
141
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
140
142
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
141
143
  ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
142
- ingestr-0.13.55.dist-info/METADATA,sha256=WNMM4qLCTDJg4xUnYNefHffB6vidRl4xopoBaaux-FM,15131
143
- ingestr-0.13.55.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
144
- ingestr-0.13.55.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
145
- ingestr-0.13.55.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
146
- ingestr-0.13.55.dist-info/RECORD,,
144
+ ingestr-0.13.57.dist-info/METADATA,sha256=k5LnyrqN3QlhkaGqKGtQBD370oNkTTkoEWDAhlCq-3M,14993
145
+ ingestr-0.13.57.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
146
+ ingestr-0.13.57.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
147
+ ingestr-0.13.57.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
148
+ ingestr-0.13.57.dist-info/RECORD,,