ingestr 0.14.3__py3-none-any.whl → 0.14.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.14.3"
1
+ version = "v0.14.5"
ingestr/src/factory.py CHANGED
@@ -70,6 +70,7 @@ from ingestr.src.sources import (
70
70
  PhantombusterSource,
71
71
  PinterestSource,
72
72
  PipedriveSource,
73
+ PlusVibeAISource,
73
74
  QuickBooksSource,
74
75
  RevenueCatSource,
75
76
  S3Source,
@@ -212,6 +213,7 @@ class SourceDestinationFactory:
212
213
  "clickup": ClickupSource,
213
214
  "influxdb": InfluxDBSource,
214
215
  "wise": WiseSource,
216
+ "plusvibeai": PlusVibeAISource,
215
217
  }
216
218
  destinations: Dict[str, Type[DestinationProtocol]] = {
217
219
  "bigquery": BigQueryDestination,
@@ -20,6 +20,7 @@ def freshdesk_source(
20
20
  end_date: Optional[pendulum.DateTime] = None,
21
21
  per_page: int = 100,
22
22
  endpoints: Optional[List[str]] = None,
23
+ query: Optional[str] = None,
23
24
  ) -> Iterable[DltResource]:
24
25
  """
25
26
  Retrieves data from specified Freshdesk API endpoints.
@@ -72,6 +73,7 @@ def freshdesk_source(
72
73
  per_page=per_page,
73
74
  start_date=start_date,
74
75
  end_date=end_date,
76
+ query=query,
75
77
  )
76
78
 
77
79
  # Set default endpoints if not provided
@@ -2,7 +2,7 @@
2
2
 
3
3
  import logging
4
4
  import time
5
- from typing import Any, Dict, Iterable
5
+ from typing import Any, Dict, Iterable, Optional
6
6
 
7
7
  import pendulum
8
8
  from dlt.common.typing import TDataItem
@@ -70,6 +70,7 @@ class FreshdeskClient:
70
70
  per_page: int,
71
71
  start_date: pendulum.DateTime,
72
72
  end_date: pendulum.DateTime,
73
+ query: Optional[str] = None,
73
74
  ) -> Iterable[TDataItem]:
74
75
  """
75
76
  Fetches a paginated response from a specified endpoint.
@@ -79,6 +80,9 @@ class FreshdeskClient:
79
80
  updated at the specified timestamp.
80
81
  """
81
82
  page = 1
83
+ if query is not None:
84
+ query = query.replace('"', "").strip()
85
+
82
86
  while True:
83
87
  # Construct the URL for the specific endpoint
84
88
  url = f"{self.base_url}/{endpoint}"
@@ -93,11 +97,21 @@ class FreshdeskClient:
93
97
 
94
98
  params[param_key] = start_date.to_iso8601_string()
95
99
 
100
+ if query and endpoint == "tickets":
101
+ url = f"{self.base_url}/search/tickets"
102
+ params = {
103
+ "query": f'"{query}"',
104
+ "page": page,
105
+ }
106
+
96
107
  # Handle requests with rate-limiting
97
108
  # A maximum of 300 pages (30000 tickets) will be returned.
98
109
  response = self._request_with_rate_limit(url, params=params)
99
110
  data = response.json()
100
111
 
112
+ if query and endpoint == "tickets":
113
+ data = data["results"]
114
+
101
115
  if not data:
102
116
  break # Stop if no data or max page limit reached
103
117
 
@@ -37,6 +37,7 @@ def jira_source() -> Any:
37
37
  resolutions,
38
38
  project_versions,
39
39
  project_components,
40
+ events,
40
41
  ]
41
42
 
42
43
 
@@ -65,7 +66,11 @@ def projects(
65
66
  yield from client.get_projects(expand=expand, recent=recent)
66
67
 
67
68
 
68
- @dlt.resource(write_disposition="merge", primary_key="id")
69
+ @dlt.resource(
70
+ write_disposition="merge",
71
+ primary_key="id",
72
+ max_table_nesting=2,
73
+ )
69
74
  def issues(
70
75
  base_url: str = dlt.secrets.value,
71
76
  email: str = dlt.secrets.value,
@@ -312,3 +317,24 @@ def project_components(
312
317
  return []
313
318
 
314
319
  return list(client.get_project_components(project_key))
320
+
321
+
322
+ @dlt.resource(write_disposition="replace")
323
+ def events(
324
+ base_url: str = dlt.secrets.value,
325
+ email: str = dlt.secrets.value,
326
+ api_token: str = dlt.secrets.value,
327
+ ) -> Iterable[TDataItem]:
328
+ """
329
+ Fetches all event types from Jira (e.g., Issue Created, Issue Updated, etc.).
330
+
331
+ Args:
332
+ base_url (str): Jira instance URL
333
+ email (str): User email for authentication
334
+ api_token (str): API token for authentication
335
+
336
+ Yields:
337
+ dict: The event data.
338
+ """
339
+ client = get_client(base_url, email, api_token)
340
+ yield from client.get_events()
@@ -98,8 +98,6 @@ class JiraClient:
98
98
 
99
99
  for attempt in range(max_retries + 1):
100
100
  try:
101
- logger.debug(f"Making request to {url} (attempt {attempt + 1})")
102
-
103
101
  response = requests.request(
104
102
  method=method,
105
103
  url=url,
@@ -214,10 +212,6 @@ class JiraClient:
214
212
  consecutive_empty_pages = 0
215
213
  max_empty_pages = 3
216
214
 
217
- logger.info(
218
- f"Starting paginated request to {endpoint} with page_size={page_size}"
219
- )
220
-
221
215
  while True:
222
216
  try:
223
217
  response = self._make_request(endpoint, params)
@@ -238,7 +232,6 @@ class JiraClient:
238
232
  is_last = True
239
233
  else:
240
234
  # Single item response
241
- logger.debug(f"Received single item response from {endpoint}")
242
235
  yield response
243
236
  break
244
237
 
@@ -253,27 +246,18 @@ class JiraClient:
253
246
  else:
254
247
  consecutive_empty_pages = 0
255
248
 
256
- logger.debug(
257
- f"Retrieved {len(items)} items from {endpoint} (page {params['startAt'] // page_size + 1})"
258
- )
259
-
260
249
  for item in items:
261
250
  if max_results and total_returned >= max_results:
262
- logger.info(f"Reached max_results limit of {max_results}")
263
251
  return
264
252
  yield item
265
253
  total_returned += 1
266
254
 
267
255
  # Check if we've reached the end
268
256
  if is_last or len(items) < page_size:
269
- logger.debug(f"Reached end of pagination for {endpoint}")
270
257
  break
271
258
 
272
259
  # Check if we've got all available items
273
260
  if total and total_returned >= total:
274
- logger.debug(
275
- f"Retrieved all {total} available items from {endpoint}"
276
- )
277
261
  break
278
262
 
279
263
  # Move to next page
@@ -295,10 +279,6 @@ class JiraClient:
295
279
  )
296
280
  raise JiraAPIError(f"Pagination failed: {str(e)}")
297
281
 
298
- logger.info(
299
- f"Completed pagination for {endpoint}, returned {total_returned} items"
300
- )
301
-
302
282
  def search_issues(
303
283
  self,
304
284
  jql: str,
@@ -327,7 +307,7 @@ class JiraClient:
327
307
  params["expand"] = expand
328
308
 
329
309
  yield from self.get_paginated(
330
- "search", params=params, page_size=page_size, max_results=max_results
310
+ "search/jql", params=params, page_size=page_size, max_results=max_results
331
311
  )
332
312
 
333
313
  def get_projects(
@@ -433,6 +413,13 @@ class JiraClient:
433
413
  """
434
414
  yield from self.get_paginated(f"project/{project_key}/component")
435
415
 
416
+ def get_events(self) -> Iterator[Dict[str, Any]]:
417
+ """Get all events (issue events like created, updated, etc.)."""
418
+ response = self._make_request("events")
419
+ if isinstance(response, list):
420
+ for event in response:
421
+ yield event
422
+
436
423
 
437
424
  def get_client(
438
425
  base_url: str, email: str, api_token: str, timeout: int = REQUEST_TIMEOUT
@@ -0,0 +1,335 @@
1
+ """
2
+ This source provides data extraction from PlusVibeAI via the REST API.
3
+
4
+ It defines functions to fetch data from different parts of PlusVibeAI including
5
+ campaigns and other marketing analytics data.
6
+ """
7
+
8
+ from typing import Any, Iterable, Optional
9
+
10
+ import dlt
11
+ from dlt.common.typing import TDataItem
12
+
13
+ from .helpers import get_client
14
+ from .settings import DEFAULT_PAGE_SIZE, DEFAULT_START_DATE
15
+
16
+
17
+ @dlt.source
18
+ def plusvibeai_source() -> Any:
19
+ """
20
+ The main function that runs all the other functions to fetch data from PlusVibeAI.
21
+
22
+ Returns:
23
+ Sequence[DltResource]: A sequence of DltResource objects containing the fetched data.
24
+ """
25
+ return [
26
+ campaigns,
27
+ leads,
28
+ email_accounts,
29
+ emails,
30
+ blocklist,
31
+ webhooks,
32
+ tags,
33
+ ]
34
+
35
+
36
+ @dlt.resource(
37
+ write_disposition="merge",
38
+ primary_key="id",
39
+ max_table_nesting=0, # Keep nested objects (schedule, sequences) as JSON columns
40
+ )
41
+ def campaigns(
42
+ api_key: str = dlt.secrets.value,
43
+ workspace_id: str = dlt.secrets.value,
44
+ base_url: str = "https://api.plusvibe.ai",
45
+ max_results: Optional[int] = None,
46
+ updated: dlt.sources.incremental[str] = dlt.sources.incremental(
47
+ "modified_at", # PlusVibeAI uses modified_at for updates
48
+ initial_value=DEFAULT_START_DATE,
49
+ range_end="closed",
50
+ range_start="closed",
51
+ ),
52
+ ) -> Iterable[TDataItem]:
53
+ """
54
+ Fetches campaigns from PlusVibeAI.
55
+
56
+ Args:
57
+ api_key (str): API key for authentication (get from https://app.plusvibe.ai/v2/settings/api-access/)
58
+ workspace_id (str): Workspace ID to access
59
+ base_url (str): PlusVibeAI API base URL
60
+ max_results (int): Maximum number of results to return
61
+ updated (str): The date from which to fetch updated campaigns
62
+
63
+ Yields:
64
+ dict: The campaign data with nested objects (schedule, sequences, etc.) as JSON.
65
+ """
66
+ client = get_client(api_key, workspace_id, base_url)
67
+
68
+ for campaign in client.get_campaigns(
69
+ page_size=DEFAULT_PAGE_SIZE, max_results=max_results
70
+ ):
71
+ # Apply incremental filter if needed
72
+ if updated.start_value:
73
+ campaign_updated = campaign.get("modified_at")
74
+ if campaign_updated and campaign_updated < updated.start_value:
75
+ continue
76
+
77
+ yield campaign
78
+
79
+
80
+ @dlt.resource(
81
+ write_disposition="merge",
82
+ primary_key="_id",
83
+ max_table_nesting=0,
84
+ )
85
+ def leads(
86
+ api_key: str = dlt.secrets.value,
87
+ workspace_id: str = dlt.secrets.value,
88
+ base_url: str = "https://api.plusvibe.ai",
89
+ max_results: Optional[int] = None,
90
+ updated: dlt.sources.incremental[str] = dlt.sources.incremental(
91
+ "modified_at",
92
+ initial_value=DEFAULT_START_DATE,
93
+ range_end="closed",
94
+ range_start="closed",
95
+ ),
96
+ ) -> Iterable[TDataItem]:
97
+ """
98
+ Fetches leads from PlusVibeAI.
99
+
100
+ Args:
101
+ api_key (str): API key for authentication
102
+ workspace_id (str): Workspace ID to access
103
+ base_url (str): PlusVibeAI API base URL
104
+ max_results (int): Maximum number of results to return
105
+ updated (str): The date from which to fetch updated leads
106
+
107
+ Yields:
108
+ dict: The lead data.
109
+ """
110
+ client = get_client(api_key, workspace_id, base_url)
111
+
112
+ for lead in client.get_leads(page_size=DEFAULT_PAGE_SIZE, max_results=max_results):
113
+ # Apply incremental filter if needed
114
+ if updated.start_value:
115
+ lead_updated = lead.get("modified_at")
116
+ if lead_updated and lead_updated < updated.start_value:
117
+ continue
118
+
119
+ yield lead
120
+
121
+
122
+ @dlt.resource(
123
+ write_disposition="merge",
124
+ primary_key="_id",
125
+ max_table_nesting=0,
126
+ )
127
+ def email_accounts(
128
+ api_key: str = dlt.secrets.value,
129
+ workspace_id: str = dlt.secrets.value,
130
+ base_url: str = "https://api.plusvibe.ai",
131
+ max_results: Optional[int] = None,
132
+ updated: dlt.sources.incremental[str] = dlt.sources.incremental(
133
+ "timestamp_updated",
134
+ initial_value=DEFAULT_START_DATE,
135
+ range_end="closed",
136
+ range_start="closed",
137
+ ),
138
+ ) -> Iterable[TDataItem]:
139
+ """
140
+ Fetches email accounts from PlusVibeAI.
141
+
142
+ Args:
143
+ api_key (str): API key for authentication
144
+ workspace_id (str): Workspace ID to access
145
+ base_url (str): PlusVibeAI API base URL
146
+ max_results (int): Maximum number of results to return
147
+ updated (str): The date from which to fetch updated email accounts
148
+
149
+ Yields:
150
+ dict: The email account data.
151
+ """
152
+ client = get_client(api_key, workspace_id, base_url)
153
+
154
+ for account in client.get_email_accounts(
155
+ page_size=DEFAULT_PAGE_SIZE, max_results=max_results
156
+ ):
157
+ # Apply incremental filter if needed
158
+ if updated.start_value:
159
+ account_updated = account.get("timestamp_updated")
160
+ if account_updated and account_updated < updated.start_value:
161
+ continue
162
+
163
+ yield account
164
+
165
+
166
+ @dlt.resource(
167
+ write_disposition="merge",
168
+ primary_key="id",
169
+ max_table_nesting=0,
170
+ )
171
+ def emails(
172
+ api_key: str = dlt.secrets.value,
173
+ workspace_id: str = dlt.secrets.value,
174
+ base_url: str = "https://api.plusvibe.ai",
175
+ max_results: Optional[int] = None,
176
+ updated: dlt.sources.incremental[str] = dlt.sources.incremental(
177
+ "timestamp_created",
178
+ initial_value=DEFAULT_START_DATE,
179
+ range_end="closed",
180
+ range_start="closed",
181
+ ),
182
+ ) -> Iterable[TDataItem]:
183
+ """
184
+ Fetches emails from PlusVibeAI.
185
+
186
+ Args:
187
+ api_key (str): API key for authentication
188
+ workspace_id (str): Workspace ID to access
189
+ base_url (str): PlusVibeAI API base URL
190
+ max_results (int): Maximum number of results to return
191
+ updated (str): The date from which to fetch emails
192
+
193
+ Yields:
194
+ dict: The email data.
195
+ """
196
+ client = get_client(api_key, workspace_id, base_url)
197
+
198
+ for email in client.get_emails(max_results=max_results):
199
+ # Apply incremental filter if needed
200
+ if updated.start_value:
201
+ email_created = email.get("timestamp_created")
202
+ if email_created and email_created < updated.start_value:
203
+ continue
204
+
205
+ yield email
206
+
207
+
208
+ @dlt.resource(
209
+ write_disposition="merge",
210
+ primary_key="_id",
211
+ max_table_nesting=0,
212
+ )
213
+ def blocklist(
214
+ api_key: str = dlt.secrets.value,
215
+ workspace_id: str = dlt.secrets.value,
216
+ base_url: str = "https://api.plusvibe.ai",
217
+ max_results: Optional[int] = None,
218
+ updated: dlt.sources.incremental[str] = dlt.sources.incremental(
219
+ "created_at",
220
+ initial_value=DEFAULT_START_DATE,
221
+ range_end="closed",
222
+ range_start="closed",
223
+ ),
224
+ ) -> Iterable[TDataItem]:
225
+ """
226
+ Fetches blocklist entries from PlusVibeAI.
227
+
228
+ Args:
229
+ api_key (str): API key for authentication
230
+ workspace_id (str): Workspace ID to access
231
+ base_url (str): PlusVibeAI API base URL
232
+ max_results (int): Maximum number of results to return
233
+ updated (str): The date from which to fetch blocklist entries
234
+
235
+ Yields:
236
+ dict: The blocklist entry data.
237
+ """
238
+ client = get_client(api_key, workspace_id, base_url)
239
+
240
+ for entry in client.get_blocklist(
241
+ page_size=DEFAULT_PAGE_SIZE, max_results=max_results
242
+ ):
243
+ # Apply incremental filter if needed
244
+ if updated.start_value:
245
+ entry_created = entry.get("created_at")
246
+ if entry_created and entry_created < updated.start_value:
247
+ continue
248
+
249
+ yield entry
250
+
251
+
252
+ @dlt.resource(
253
+ write_disposition="merge",
254
+ primary_key="_id",
255
+ max_table_nesting=0,
256
+ )
257
+ def webhooks(
258
+ api_key: str = dlt.secrets.value,
259
+ workspace_id: str = dlt.secrets.value,
260
+ base_url: str = "https://api.plusvibe.ai",
261
+ max_results: Optional[int] = None,
262
+ updated: dlt.sources.incremental[str] = dlt.sources.incremental(
263
+ "modified_at",
264
+ initial_value=DEFAULT_START_DATE,
265
+ range_end="closed",
266
+ range_start="closed",
267
+ ),
268
+ ) -> Iterable[TDataItem]:
269
+ """
270
+ Fetches webhooks from PlusVibeAI.
271
+
272
+ Args:
273
+ api_key (str): API key for authentication
274
+ workspace_id (str): Workspace ID to access
275
+ base_url (str): PlusVibeAI API base URL
276
+ max_results (int): Maximum number of results to return
277
+ updated (str): The date from which to fetch updated webhooks
278
+
279
+ Yields:
280
+ dict: The webhook data.
281
+ """
282
+ client = get_client(api_key, workspace_id, base_url)
283
+
284
+ for webhook in client.get_webhooks(
285
+ page_size=DEFAULT_PAGE_SIZE, max_results=max_results
286
+ ):
287
+ # Apply incremental filter if needed
288
+ if updated.start_value:
289
+ webhook_updated = webhook.get("modified_at")
290
+ if webhook_updated and webhook_updated < updated.start_value:
291
+ continue
292
+
293
+ yield webhook
294
+
295
+
296
+ @dlt.resource(
297
+ write_disposition="merge",
298
+ primary_key="_id",
299
+ max_table_nesting=0,
300
+ )
301
+ def tags(
302
+ api_key: str = dlt.secrets.value,
303
+ workspace_id: str = dlt.secrets.value,
304
+ base_url: str = "https://api.plusvibe.ai",
305
+ max_results: Optional[int] = None,
306
+ updated: dlt.sources.incremental[str] = dlt.sources.incremental(
307
+ "modified_at",
308
+ initial_value=DEFAULT_START_DATE,
309
+ range_end="closed",
310
+ range_start="closed",
311
+ ),
312
+ ) -> Iterable[TDataItem]:
313
+ """
314
+ Fetches tags from PlusVibeAI.
315
+
316
+ Args:
317
+ api_key (str): API key for authentication
318
+ workspace_id (str): Workspace ID to access
319
+ base_url (str): PlusVibeAI API base URL
320
+ max_results (int): Maximum number of results to return
321
+ updated (str): The date from which to fetch updated tags
322
+
323
+ Yields:
324
+ dict: The tag data.
325
+ """
326
+ client = get_client(api_key, workspace_id, base_url)
327
+
328
+ for tag in client.get_tags(page_size=DEFAULT_PAGE_SIZE, max_results=max_results):
329
+ # Apply incremental filter if needed
330
+ if updated.start_value:
331
+ tag_updated = tag.get("modified_at")
332
+ if tag_updated and tag_updated < updated.start_value:
333
+ continue
334
+
335
+ yield tag