ingestr 0.13.37__py3-none-any.whl → 0.13.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -485,6 +485,9 @@ def ingest(
485
485
  print(
486
486
  f"[bold yellow] Primary Key:[/bold yellow] {primary_key if primary_key else 'None'}"
487
487
  )
488
+ print(
489
+ f"[bold yellow] Pipeline ID:[/bold yellow] {m.hexdigest()}"
490
+ )
488
491
  print()
489
492
 
490
493
  if not yes:
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.37"
1
+ version = "v0.13.38"
@@ -1,4 +1,4 @@
1
- from typing import Any, Iterator
1
+ from typing import Any, Iterator, Optional
2
2
 
3
3
  import dlt
4
4
  from dlt.common.pendulum import pendulum
@@ -15,13 +15,13 @@ from ingestr.src.frankfurter.helpers import get_path_with_retry
15
15
  def frankfurter_source(
16
16
  start_date: TAnyDateTime,
17
17
  end_date: TAnyDateTime,
18
+ base_currency: str,
18
19
  ) -> Any:
19
20
  """
20
21
  A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
21
22
  various types of data: currencies, latest rates, historical rates.
22
23
  """
23
24
  date_time = dlt.sources.incremental(
24
-
25
25
  "date",
26
26
  initial_value=start_date,
27
27
  end_value=end_date,
@@ -31,9 +31,10 @@ def frankfurter_source(
31
31
 
32
32
  return (
33
33
  currencies(),
34
- latest(),
35
- exchange_rates(start_date=date_time, end_date=end_date),
36
-
34
+ latest(base_currency=base_currency),
35
+ exchange_rates(
36
+ start_date=date_time, end_date=end_date, base_currency=base_currency
37
+ ),
37
38
  )
38
39
 
39
40
 
@@ -61,29 +62,33 @@ def currencies() -> Iterator[dict]:
61
62
  "date": {"data_type": "text"},
62
63
  "currency_code": {"data_type": "text"},
63
64
  "rate": {"data_type": "double"},
65
+ "base_currency": {"data_type": "text"},
64
66
  },
65
- primary_key=["date", "currency_code"], # Composite primary key
67
+ primary_key=["date", "currency_code", "base_currency"],
66
68
  )
67
- def latest() -> Iterator[dict]:
69
+ def latest(base_currency: Optional[str] = "") -> Iterator[dict]:
68
70
  """
69
71
  Fetches the latest exchange rates and yields them as rows.
70
72
  """
71
73
  # Base URL
72
74
  url = "latest?"
73
75
 
76
+ if base_currency:
77
+ url += f"base={base_currency}"
78
+
74
79
  # Fetch data
75
80
  data = get_path_with_retry(url)
76
81
 
77
82
  # Extract rates and base currency
78
83
  rates = data["rates"]
79
-
80
84
  date = pendulum.parse(data["date"])
81
85
 
82
- # Add the base currency (EUR) with a rate of 1.0
86
+ # Add the base currency with a rate of 1.0
83
87
  yield {
84
88
  "date": date,
85
- "currency_code": "EUR",
89
+ "currency_code": base_currency,
86
90
  "rate": 1.0,
91
+ "base_currency": base_currency,
87
92
  }
88
93
 
89
94
  # Add all currencies and their rates
@@ -92,6 +97,7 @@ def latest() -> Iterator[dict]:
92
97
  "date": date,
93
98
  "currency_code": currency_code,
94
99
  "rate": rate,
100
+ "base_currency": base_currency,
95
101
  }
96
102
 
97
103
 
@@ -101,12 +107,14 @@ def latest() -> Iterator[dict]:
101
107
  "date": {"data_type": "text"},
102
108
  "currency_code": {"data_type": "text"},
103
109
  "rate": {"data_type": "double"},
110
+ "base_currency": {"data_type": "text"},
104
111
  },
105
- primary_key=("date", "currency_code"), # Composite primary key
112
+ primary_key=("date", "currency_code", "base_currency"),
106
113
  )
107
114
  def exchange_rates(
108
115
  end_date: TAnyDateTime,
109
116
  start_date: dlt.sources.incremental[TAnyDateTime] = dlt.sources.incremental("date"),
117
+ base_currency: Optional[str] = "",
110
118
  ) -> Iterator[dict]:
111
119
  """
112
120
  Fetches exchange rates for a specified date range.
@@ -124,6 +132,9 @@ def exchange_rates(
124
132
  # Compose the URL
125
133
  url = f"{start_date_str}..{end_date_str}?"
126
134
 
135
+ if base_currency:
136
+ url += f"base={base_currency}"
137
+
127
138
  # Fetch data from the API
128
139
  data = get_path_with_retry(url)
129
140
 
@@ -137,8 +148,9 @@ def exchange_rates(
137
148
  # Add the base currency with a rate of 1.0
138
149
  yield {
139
150
  "date": formatted_date,
140
- "currency_code": "EUR",
151
+ "currency_code": base_currency,
141
152
  "rate": 1.0,
153
+ "base_currency": base_currency,
142
154
  }
143
155
 
144
156
  # Add all other currencies and their rates
@@ -147,4 +159,5 @@ def exchange_rates(
147
159
  "date": formatted_date,
148
160
  "currency_code": currency_code,
149
161
  "rate": rate,
162
+ "base_currency": base_currency,
150
163
  }
@@ -30,3 +30,19 @@ def validate_dates(start_date: datetime, end_date: datetime) -> None:
30
30
  # Check if start_date is before end_date
31
31
  if start_date > end_date:
32
32
  raise ValueError("Interval-end cannot be before interval-start.")
33
+
34
+
35
+ def validate_currency(currency_code: str) -> bool:
36
+ url = "https://api.frankfurter.dev/v1/currencies"
37
+
38
+ response = requests.get(url, timeout=5)
39
+ currencies = response.json()
40
+
41
+ if currency_code.upper() in currencies:
42
+ return True
43
+ else:
44
+ supported_currencies = list(currencies.keys())
45
+ print(
46
+ f"Invalid base currency '{currency_code}'. Supported currencies are: {supported_currencies}"
47
+ )
48
+ return False
@@ -3,10 +3,11 @@ from typing import Iterable, Optional
3
3
  import dlt
4
4
  import pendulum
5
5
  import requests
6
- from dlt.common.typing import TDataItem
6
+ from dlt.common.typing import TDataItem, TAnyDateTime
7
7
  from dlt.sources import DltResource
8
8
  from dlt.sources.helpers.requests import Client
9
9
 
10
+
10
11
  from ingestr.src.phantombuster.client import PhantombusterClient
11
12
 
12
13
 
@@ -26,13 +27,34 @@ def create_client() -> requests.Session:
26
27
  request_backoff_factor=2,
27
28
  ).session
28
29
 
29
-
30
30
  @dlt.source(max_table_nesting=0)
31
- def phantombuster_source(api_key: str, agent_id: str, start_date: pendulum.DateTime, end_date: pendulum.DateTime) -> Iterable[DltResource]:
31
+ def phantombuster_source(api_key: str, agent_id: str, start_date: TAnyDateTime, end_date: TAnyDateTime | None) -> Iterable[DltResource]:
32
32
  client = PhantombusterClient(api_key)
33
-
34
- @dlt.resource()
35
- def completed_phantoms() -> Iterable[TDataItem]:
36
- yield client.fetch_containers_result(create_client(), agent_id, start_date, end_date)
33
+ @dlt.resource(
34
+ write_disposition="merge",
35
+ primary_key="container_id",
36
+ columns={
37
+ "partition_dt": {"data_type": "date", "partition": True},
38
+ },
39
+ )
40
+ def completed_phantoms(
41
+ dateTime=(
42
+ dlt.sources.incremental(
43
+ "ended_at",
44
+ initial_value=start_date,
45
+ end_value=end_date,
46
+ range_start="closed",
47
+ range_end="closed",
48
+ )
49
+ ),
50
+ ) -> Iterable[TDataItem]:
51
+ if dateTime.end_value is None:
52
+ end_dt = pendulum.now(tz="UTC")
53
+ else:
54
+ end_dt = dateTime.end_value
55
+
56
+ start_dt = dateTime.last_value
57
+
58
+ yield client.fetch_containers_result(create_client(), agent_id, start_date=start_dt, end_date=end_dt)
37
59
 
38
60
  return completed_phantoms
@@ -18,8 +18,10 @@ class PhantombusterClient:
18
18
  url = "https://api.phantombuster.com/api/v2/containers/fetch-all/"
19
19
  before_ended_at = None
20
20
  limit = 100
21
+
21
22
  started_at = start_date.int_timestamp * 1000 + int(start_date.microsecond / 1000)
22
23
  ended_at = end_date.int_timestamp * 1000 + int(end_date.microsecond / 1000)
24
+
23
25
  while True:
24
26
  params: dict[str, Union[str, int, float, bytes, None]] = {
25
27
  "agentId": agent_id,
@@ -36,15 +38,18 @@ class PhantombusterClient:
36
38
 
37
39
  for container in containers:
38
40
  container_ended_at = container.get("endedAt")
39
- if before_ended_at is None or before_ended_at > container["endedAt"]:
40
- before_ended_at = container["endedAt"]
41
-
42
- if not (started_at <= container_ended_at <= ended_at):
41
+
42
+ if before_ended_at is None or before_ended_at > container_ended_at:
43
+ before_ended_at = container_ended_at
44
+
45
+ if container_ended_at < started_at or container_ended_at > ended_at:
43
46
  continue
47
+
44
48
  try:
45
49
  result = self.fetch_result_object(session, container["id"])
46
- partition_dt = pendulum.from_timestamp(container_ended_at / 1000, tz="UTC").to_date_string()
47
- row = {"container": container, "result": result, "partition_dt": partition_dt}
50
+ partition_dt = pendulum.from_timestamp(container_ended_at / 1000, tz="UTC").date()
51
+ container_ended_at_datetime = pendulum.from_timestamp(container_ended_at / 1000, tz="UTC")
52
+ row = {"container_id": container["id"],"container": container, "result": result, "partition_dt": partition_dt, "ended_at": container_ended_at_datetime}
48
53
  yield row
49
54
 
50
55
  except requests.RequestException as e:
ingestr/src/sources.py CHANGED
@@ -2180,6 +2180,18 @@ class FrankfurterSource:
2180
2180
  "Frankfurter takes care of incrementality on its own, you should not provide incremental_key"
2181
2181
  )
2182
2182
 
2183
+ from ingestr.src.frankfurter import frankfurter_source
2184
+ from ingestr.src.frankfurter.helpers import validate_currency, validate_dates
2185
+
2186
+ parsed_uri = urlparse(uri)
2187
+ source_params = parse_qs(parsed_uri.query)
2188
+ base_currency = source_params.get("base", [None])[0]
2189
+
2190
+ if not base_currency:
2191
+ base_currency = "USD"
2192
+
2193
+ validate_currency(base_currency)
2194
+
2183
2195
  if kwargs.get("interval_start"):
2184
2196
  start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
2185
2197
  if kwargs.get("interval_end"):
@@ -2190,21 +2202,20 @@ class FrankfurterSource:
2190
2202
  start_date = pendulum.now()
2191
2203
  end_date = pendulum.now()
2192
2204
 
2193
- from ingestr.src.frankfurter import frankfurter_source
2194
- from ingestr.src.frankfurter.helpers import validate_dates
2195
-
2196
2205
  validate_dates(start_date=start_date, end_date=end_date)
2197
2206
 
2198
2207
  src = frankfurter_source(
2199
2208
  start_date=start_date,
2200
2209
  end_date=end_date,
2210
+ base_currency=base_currency,
2201
2211
  )
2202
2212
 
2203
2213
  if table not in src.resources:
2204
2214
  raise UnsupportedResourceError(table, "Frankfurter")
2205
2215
 
2206
2216
  return src.with_resources(table)
2207
-
2217
+
2218
+
2208
2219
  class FreshdeskSource:
2209
2220
  # freshdesk://domain?api_key=<api_key>
2210
2221
  def handles_incrementality(self) -> bool:
@@ -2257,18 +2268,14 @@ class PhantombusterSource:
2257
2268
  raise MissingValueError("agent_id", "Phantombuster")
2258
2269
 
2259
2270
  start_date = kwargs.get("interval_start")
2260
- if start_date is not None:
2261
- start_date = ensure_pendulum_datetime(start_date)
2271
+ if start_date is None:
2272
+ start_date = ensure_pendulum_datetime("2018-01-01").in_tz("UTC")
2262
2273
  else:
2263
- start_date = pendulum.parse("2018-01-01")
2274
+ start_date = ensure_pendulum_datetime(start_date).in_tz("UTC")
2264
2275
 
2265
2276
  end_date = kwargs.get("interval_end")
2266
-
2267
- #doesnot support incremental loading
2268
2277
  if end_date is not None:
2269
- end_date = ensure_pendulum_datetime(end_date)
2270
- else:
2271
- end_date = pendulum.now()
2278
+ end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
2272
2279
 
2273
2280
  from ingestr.src.phantombuster import phantombuster_source
2274
2281
  return phantombuster_source(api_key=api_key[0], agent_id=agent_id, start_date=start_date, end_date=end_date).with_resources(table_name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.37
3
+ Version: 0.13.38
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -74,7 +74,7 @@ Requires-Dist: google-cloud-storage==3.1.0
74
74
  Requires-Dist: google-crc32c==1.6.0
75
75
  Requires-Dist: google-resumable-media==2.7.2
76
76
  Requires-Dist: googleapis-common-protos==1.69.0
77
- Requires-Dist: greenlet==3.2.1
77
+ Requires-Dist: greenlet==3.2.2
78
78
  Requires-Dist: grpcio-status==1.62.3
79
79
  Requires-Dist: grpcio==1.70.0
80
80
  Requires-Dist: hdbcli==2.23.27
@@ -1,8 +1,8 @@
1
1
  ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
- ingestr/main.py,sha256=mRlGSqi2sHcZ2AKlwn5MqoMvFxXlSjcZxmPJr76rmRk,25187
2
+ ingestr/main.py,sha256=QHLjpCItCgL5XHcTezBOp6Vdy_VHAPciRjqy63ZIK2s,25285
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
5
- ingestr/src/buildinfo.py,sha256=zGfudKvUvWbTMFprtyFws2zsqeHGQj08eCKTrwTnVj8,21
5
+ ingestr/src/buildinfo.py,sha256=iW7GVGJjTCNtuINLzbFuur_d0ALxf_JmZSmuwD9yZ2Y,21
6
6
  ingestr/src/destinations.py,sha256=Z79f01BSmEaXnQno2IQVt4Th4dmD-BiOQXlibZJ5sTw,13180
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
8
  ingestr/src/factory.py,sha256=Mm_Be60PFO4mUIeJLBMDVU_uyH0IeCiZ1dtNDFiDFSo,5463
@@ -10,7 +10,7 @@ ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
10
10
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
11
11
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
12
12
  ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
13
- ingestr/src/sources.py,sha256=9ESEgdlaSQQszpRfp-etKvfFDvvmYfCc9sBlEPJxh3Q,78809
13
+ ingestr/src/sources.py,sha256=q9FJ3vRaeTjbL5sgau_GHPxbn5LfSI4t5TBwycVdwSc,79060
14
14
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
15
15
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
16
16
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -42,8 +42,8 @@ ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_
42
42
  ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
43
43
  ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
44
44
  ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
45
- ingestr/src/frankfurter/__init__.py,sha256=sjxfq377-lryuFC3JswcbHBRoBjLnGLKNRTwBpDZyLw,4403
46
- ingestr/src/frankfurter/helpers.py,sha256=wqm087QVPcyTuMl6yj_Pl1wcuqElwcBMPz3P4773wcM,979
45
+ ingestr/src/frankfurter/__init__.py,sha256=oVi4BiOxPRyckEVrBNunyMAHulPyMgyGRwBbhn-Xz6M,4987
46
+ ingestr/src/frankfurter/helpers.py,sha256=SyrkRTDqvKdQxRHTV5kcSeVG3FEnaK5zxHyNyqtumZ0,1445
47
47
  ingestr/src/freshdesk/__init__.py,sha256=uFQW_cJyymxtHQiYb_xjzZAklc487L0n9GkgHgC7yAI,2618
48
48
  ingestr/src/freshdesk/freshdesk_client.py,sha256=3z5Yc008ADzRcJWtNc00PwjkLzG-RMI8jVIOOyYA-Rw,4088
49
49
  ingestr/src/freshdesk/settings.py,sha256=0Wr_OMnUZcTlry7BmALssLxD2yh686JW4moLNv12Jnw,409
@@ -87,8 +87,8 @@ ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falP
87
87
  ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
88
88
  ingestr/src/personio/__init__.py,sha256=sHYpoV-rg-kA1YsflctChis0hKcTrL6mka9O0CHV4zA,11638
89
89
  ingestr/src/personio/helpers.py,sha256=EKmBN0Lf4R0lc3yqqs7D-RjoZ75E8gPcctt59xwHxrY,2901
90
- ingestr/src/phantombuster/__init__.py,sha256=FJJiVP0ciR48FTmXYLAasZ4JQAB1Ow4M_Hh39J6hWks,1112
91
- ingestr/src/phantombuster/client.py,sha256=HFJ46f_IU1NMMCA94ttoY1LBc0L7qfqeQEawczlbBvQ,2584
90
+ ingestr/src/phantombuster/__init__.py,sha256=5XGwbtNYmRxL4lmcC0TmyXgljY_21DvDIdw9zaWmgvI,1757
91
+ ingestr/src/phantombuster/client.py,sha256=WO87AGU3Fphd9DgWTkrSAfldLH1XxcPxvNVmNxQ_5eU,2785
92
92
  ingestr/src/pipedrive/__init__.py,sha256=iRrxeMwo8_83ptgGnTFTNHV1nYvIsFfg0a3XzugPYeI,6982
93
93
  ingestr/src/pipedrive/settings.py,sha256=q119Fy4C5Ip1rMoCILX2BkHV3bwiXC_dW58KIiDUzsY,708
94
94
  ingestr/src/pipedrive/typing.py,sha256=lEMXu4hhAA3XkhVSlBUa-juqyupisd3c-qSQKxFvzoE,69
@@ -127,8 +127,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
127
127
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
128
128
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
129
129
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
130
- ingestr-0.13.37.dist-info/METADATA,sha256=Mmc9hAE_zCJ_b5U9hCLpJXpU0858FirZdoO-FyPuOI4,13575
131
- ingestr-0.13.37.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
132
- ingestr-0.13.37.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
133
- ingestr-0.13.37.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
134
- ingestr-0.13.37.dist-info/RECORD,,
130
+ ingestr-0.13.38.dist-info/METADATA,sha256=ZknSXuae-_lb60AWDACr731hTr0Q_bX_9sUds6rHM28,13575
131
+ ingestr-0.13.38.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
132
+ ingestr-0.13.38.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
133
+ ingestr-0.13.38.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
134
+ ingestr-0.13.38.dist-info/RECORD,,