ingestr 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.14.2"
1
+ version = "v0.14.4"
@@ -813,30 +813,7 @@ class ElasticsearchDestination:
813
813
 
814
814
  class MongoDBDestination:
815
815
  def dlt_dest(self, uri: str, **kwargs):
816
- from urllib.parse import urlparse
817
-
818
- parsed_uri = urlparse(uri)
819
-
820
- # Extract connection details from URI
821
- host = parsed_uri.hostname or "localhost"
822
- port = parsed_uri.port or 27017
823
- username = parsed_uri.username
824
- password = parsed_uri.password
825
- database = (
826
- parsed_uri.path.lstrip("/") if parsed_uri.path.lstrip("/") else "ingestr_db"
827
- )
828
-
829
- # Build connection string
830
- if username and password:
831
- connection_string = f"mongodb://{username}:{password}@{host}:{port}"
832
- else:
833
- connection_string = f"mongodb://{host}:{port}"
834
-
835
- # Add query parameters if any
836
- if parsed_uri.query:
837
- connection_string += f"?{parsed_uri.query}"
838
-
839
- return mongodb_insert(connection_string, database)
816
+ return mongodb_insert(uri)
840
817
 
841
818
  def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
842
819
  return {
@@ -1,6 +1,7 @@
1
1
  """Elasticsearch destination helpers"""
2
2
 
3
3
  import json
4
+ import logging
4
5
  from typing import Any, Dict, Iterator
5
6
  from urllib.parse import urlparse
6
7
 
@@ -9,6 +10,10 @@ import dlt
9
10
  from elasticsearch import Elasticsearch
10
11
  from elasticsearch.helpers import bulk
11
12
 
13
+ # Suppress Elasticsearch transport logging
14
+ logging.getLogger("elasticsearch.transport").setLevel(logging.WARNING)
15
+ logging.getLogger("elastic_transport.transport").setLevel(logging.WARNING)
16
+
12
17
 
13
18
  def process_file_items(file_path: str) -> Iterator[Dict[str, Any]]:
14
19
  """Process items from a file path (JSONL format)."""
@@ -52,15 +57,36 @@ def elasticsearch_insert(
52
57
  parsed = urlparse(connection_string)
53
58
 
54
59
  # Build Elasticsearch client configuration
55
- hosts = [
56
- {
57
- "host": parsed.hostname or "localhost",
58
- "port": parsed.port or 9200,
59
- "scheme": parsed.scheme or "http",
60
- }
61
- ]
62
-
63
- es_config: Dict[str, Any] = {"hosts": hosts}
60
+ actual_url = connection_string
61
+ secure = True # Default to HTTPS (secure by default)
62
+
63
+ if connection_string.startswith("elasticsearch://"):
64
+ actual_url = connection_string.replace("elasticsearch://", "")
65
+
66
+ # Parse to check for query parameters
67
+ temp_parsed = urlparse("http://" + actual_url)
68
+ from urllib.parse import parse_qs
69
+
70
+ query_params = parse_qs(temp_parsed.query)
71
+
72
+ # Check ?secure parameter (defaults to true)
73
+ if "secure" in query_params:
74
+ secure = query_params["secure"][0].lower() in ["true", "1", "yes"]
75
+
76
+ # Remove query params from URL for ES client
77
+ actual_url = actual_url.split("?")[0]
78
+
79
+ # Add scheme
80
+ scheme = "https" if secure else "http"
81
+ actual_url = f"{scheme}://{actual_url}"
82
+
83
+ parsed = urlparse(actual_url)
84
+
85
+ es_config: Dict[str, Any] = {
86
+ "hosts": [actual_url],
87
+ "verify_certs": secure,
88
+ "ssl_show_warn": False,
89
+ }
64
90
 
65
91
  # Add authentication if present
66
92
  if parsed.username and parsed.password:
ingestr/src/factory.py CHANGED
@@ -56,6 +56,7 @@ from ingestr.src.sources import (
56
56
  InfluxDBSource,
57
57
  IntercomSource,
58
58
  IsocPulseSource,
59
+ JiraSource,
59
60
  KafkaSource,
60
61
  KinesisSource,
61
62
  KlaviyoSource,
@@ -168,6 +169,7 @@ class SourceDestinationFactory:
168
169
  "slack": SlackSource,
169
170
  "hubspot": HubspotSource,
170
171
  "intercom": IntercomSource,
172
+ "jira": JiraSource,
171
173
  "airtable": AirtableSource,
172
174
  "klaviyo": KlaviyoSource,
173
175
  "mixpanel": MixpanelSource,
@@ -96,6 +96,15 @@ FLUXX_RESOURCES = {
96
96
  "workflow_events": {"data_type": "json", "field_type": "relation"},
97
97
  },
98
98
  },
99
+ "alert_email": {
100
+ "endpoint": "alert_email",
101
+ "fields": {
102
+ "alert_id": {"data_type": "bigint", "field_type": "column"},
103
+ "created_at": {"data_type": "timestamp", "field_type": "column"},
104
+ "id": {"data_type": "bigint", "field_type": "column"},
105
+ "updated_at": {"data_type": "timestamp", "field_type": "column"},
106
+ },
107
+ },
99
108
  "affiliate": {
100
109
  "endpoint": "affiliate",
101
110
  "fields": {
@@ -20,6 +20,7 @@ def freshdesk_source(
20
20
  end_date: Optional[pendulum.DateTime] = None,
21
21
  per_page: int = 100,
22
22
  endpoints: Optional[List[str]] = None,
23
+ query: Optional[str] = None,
23
24
  ) -> Iterable[DltResource]:
24
25
  """
25
26
  Retrieves data from specified Freshdesk API endpoints.
@@ -72,6 +73,7 @@ def freshdesk_source(
72
73
  per_page=per_page,
73
74
  start_date=start_date,
74
75
  end_date=end_date,
76
+ query=query,
75
77
  )
76
78
 
77
79
  # Set default endpoints if not provided
@@ -2,7 +2,7 @@
2
2
 
3
3
  import logging
4
4
  import time
5
- from typing import Any, Dict, Iterable
5
+ from typing import Any, Dict, Iterable, Optional
6
6
 
7
7
  import pendulum
8
8
  from dlt.common.typing import TDataItem
@@ -70,6 +70,7 @@ class FreshdeskClient:
70
70
  per_page: int,
71
71
  start_date: pendulum.DateTime,
72
72
  end_date: pendulum.DateTime,
73
+ query: Optional[str] = None,
73
74
  ) -> Iterable[TDataItem]:
74
75
  """
75
76
  Fetches a paginated response from a specified endpoint.
@@ -79,6 +80,9 @@ class FreshdeskClient:
79
80
  updated at the specified timestamp.
80
81
  """
81
82
  page = 1
83
+ if query is not None:
84
+ query = query.replace('"', "").strip()
85
+
82
86
  while True:
83
87
  # Construct the URL for the specific endpoint
84
88
  url = f"{self.base_url}/{endpoint}"
@@ -93,11 +97,21 @@ class FreshdeskClient:
93
97
 
94
98
  params[param_key] = start_date.to_iso8601_string()
95
99
 
100
+ if query and endpoint == "tickets":
101
+ url = f"{self.base_url}/search/tickets"
102
+ params = {
103
+ "query": f'"{query}"',
104
+ "page": page,
105
+ }
106
+
96
107
  # Handle requests with rate-limiting
97
108
  # A maximum of 300 pages (30000 tickets) will be returned.
98
109
  response = self._request_with_rate_limit(url, params=params)
99
110
  data = response.json()
100
111
 
112
+ if query and endpoint == "tickets":
113
+ data = data["results"]
114
+
101
115
  if not data:
102
116
  break # Stop if no data or max page limit reached
103
117
 
@@ -93,7 +93,6 @@ def hubspot(
93
93
  def companies(
94
94
  api_key: str = api_key,
95
95
  include_history: bool = include_history,
96
- props: Sequence[str] = DEFAULT_COMPANY_PROPS,
97
96
  include_custom_props: bool = include_custom_props,
98
97
  ) -> Iterator[TDataItems]:
99
98
  """Hubspot companies resource"""
@@ -101,7 +100,7 @@ def hubspot(
101
100
  "company",
102
101
  api_key,
103
102
  include_history=include_history,
104
- props=props,
103
+ props=DEFAULT_COMPANY_PROPS,
105
104
  include_custom_props=include_custom_props,
106
105
  )
107
106
 
@@ -109,7 +108,6 @@ def hubspot(
109
108
  def contacts(
110
109
  api_key: str = api_key,
111
110
  include_history: bool = include_history,
112
- props: Sequence[str] = DEFAULT_CONTACT_PROPS,
113
111
  include_custom_props: bool = include_custom_props,
114
112
  ) -> Iterator[TDataItems]:
115
113
  """Hubspot contacts resource"""
@@ -117,7 +115,7 @@ def hubspot(
117
115
  "contact",
118
116
  api_key,
119
117
  include_history,
120
- props,
118
+ DEFAULT_CONTACT_PROPS,
121
119
  include_custom_props,
122
120
  )
123
121
 
@@ -125,7 +123,6 @@ def hubspot(
125
123
  def deals(
126
124
  api_key: str = api_key,
127
125
  include_history: bool = include_history,
128
- props: Sequence[str] = DEFAULT_DEAL_PROPS,
129
126
  include_custom_props: bool = include_custom_props,
130
127
  ) -> Iterator[TDataItems]:
131
128
  """Hubspot deals resource"""
@@ -133,7 +130,7 @@ def hubspot(
133
130
  "deal",
134
131
  api_key,
135
132
  include_history,
136
- props,
133
+ DEFAULT_DEAL_PROPS,
137
134
  include_custom_props,
138
135
  )
139
136
 
@@ -141,7 +138,6 @@ def hubspot(
141
138
  def tickets(
142
139
  api_key: str = api_key,
143
140
  include_history: bool = include_history,
144
- props: Sequence[str] = DEFAULT_TICKET_PROPS,
145
141
  include_custom_props: bool = include_custom_props,
146
142
  ) -> Iterator[TDataItems]:
147
143
  """Hubspot tickets resource"""
@@ -149,7 +145,7 @@ def hubspot(
149
145
  "ticket",
150
146
  api_key,
151
147
  include_history,
152
- props,
148
+ DEFAULT_TICKET_PROPS,
153
149
  include_custom_props,
154
150
  )
155
151
 
@@ -157,7 +153,6 @@ def hubspot(
157
153
  def products(
158
154
  api_key: str = api_key,
159
155
  include_history: bool = include_history,
160
- props: Sequence[str] = DEFAULT_PRODUCT_PROPS,
161
156
  include_custom_props: bool = include_custom_props,
162
157
  ) -> Iterator[TDataItems]:
163
158
  """Hubspot products resource"""
@@ -165,7 +160,7 @@ def hubspot(
165
160
  "product",
166
161
  api_key,
167
162
  include_history,
168
- props,
163
+ DEFAULT_PRODUCT_PROPS,
169
164
  include_custom_props,
170
165
  )
171
166
 
@@ -180,7 +175,6 @@ def hubspot(
180
175
  def quotes(
181
176
  api_key: str = api_key,
182
177
  include_history: bool = include_history,
183
- props: Sequence[str] = DEFAULT_QUOTE_PROPS,
184
178
  include_custom_props: bool = include_custom_props,
185
179
  ) -> Iterator[TDataItems]:
186
180
  """Hubspot quotes resource"""
@@ -188,7 +182,7 @@ def hubspot(
188
182
  "quote",
189
183
  api_key,
190
184
  include_history,
191
- props,
185
+ DEFAULT_QUOTE_PROPS,
192
186
  include_custom_props,
193
187
  )
194
188
 
@@ -126,7 +126,9 @@ RESOURCE_CONFIGS = {
126
126
  "pagination_type": "cursor",
127
127
  "incremental": False,
128
128
  "transform_func": None,
129
- "columns": {},
129
+ "columns": {
130
+ "id": {"data_type": "bigint", "nullable": True},
131
+ },
130
132
  },
131
133
  }
132
134
 
@@ -0,0 +1,314 @@
1
+ """
2
+ This source provides data extraction from Jira Cloud via the REST API v3.
3
+
4
+ It defines several functions to fetch data from different parts of Jira including
5
+ projects, issues, users, boards, sprints, and various configuration objects like
6
+ issue types, statuses, and priorities.
7
+ """
8
+
9
+ from typing import Any, Iterable, Optional
10
+
11
+ import dlt
12
+ from dlt.common.typing import TDataItem
13
+
14
+ from .helpers import get_client
15
+ from .settings import (
16
+ DEFAULT_PAGE_SIZE,
17
+ DEFAULT_START_DATE,
18
+ ISSUE_FIELDS,
19
+ )
20
+
21
+
22
+ @dlt.source
23
+ def jira_source() -> Any:
24
+ """
25
+ The main function that runs all the other functions to fetch data from Jira.
26
+
27
+ Returns:
28
+ Sequence[DltResource]: A sequence of DltResource objects containing the fetched data.
29
+ """
30
+ return [
31
+ projects,
32
+ issues,
33
+ users,
34
+ issue_types,
35
+ statuses,
36
+ priorities,
37
+ resolutions,
38
+ project_versions,
39
+ project_components,
40
+ ]
41
+
42
+
43
+ @dlt.resource(write_disposition="replace")
44
+ def projects(
45
+ base_url: str = dlt.secrets.value,
46
+ email: str = dlt.secrets.value,
47
+ api_token: str = dlt.secrets.value,
48
+ expand: Optional[str] = None,
49
+ recent: Optional[int] = None,
50
+ ) -> Iterable[TDataItem]:
51
+ """
52
+ Fetches and returns a list of projects from Jira.
53
+
54
+ Args:
55
+ base_url (str): Jira instance URL (e.g., https://your-domain.atlassian.net)
56
+ email (str): User email for authentication
57
+ api_token (str): API token for authentication
58
+ expand (str): Comma-separated list of fields to expand
59
+ recent (int): Number of recent projects to return
60
+
61
+ Yields:
62
+ dict: The project data.
63
+ """
64
+ client = get_client(base_url, email, api_token)
65
+ yield from client.get_projects(expand=expand, recent=recent)
66
+
67
+
68
+ @dlt.resource(write_disposition="merge", primary_key="id")
69
+ def issues(
70
+ base_url: str = dlt.secrets.value,
71
+ email: str = dlt.secrets.value,
72
+ api_token: str = dlt.secrets.value,
73
+ jql: str = "order by updated DESC",
74
+ fields: Optional[str] = None,
75
+ expand: Optional[str] = None,
76
+ max_results: Optional[int] = None,
77
+ updated: dlt.sources.incremental[str] = dlt.sources.incremental(
78
+ "fields.updated",
79
+ initial_value=DEFAULT_START_DATE,
80
+ range_end="closed",
81
+ range_start="closed",
82
+ ),
83
+ ) -> Iterable[TDataItem]:
84
+ """
85
+ Fetches issues from Jira using JQL search.
86
+
87
+ Args:
88
+ base_url (str): Jira instance URL
89
+ email (str): User email for authentication
90
+ api_token (str): API token for authentication
91
+ jql (str): JQL query string
92
+ fields (str): Comma-separated list of fields to return
93
+ expand (str): Comma-separated list of fields to expand
94
+ max_results (int): Maximum number of results to return
95
+ updated (str): The date from which to fetch updated issues
96
+
97
+ Yields:
98
+ dict: The issue data.
99
+ """
100
+ client = get_client(base_url, email, api_token)
101
+
102
+ # Build JQL with incremental filter
103
+ incremental_jql = jql
104
+ if updated.start_value:
105
+ date_filter = f"updated >= '{updated.start_value}'"
106
+
107
+ # Check if JQL has ORDER BY clause and handle it properly
108
+ jql_upper = jql.upper()
109
+ if "ORDER BY" in jql_upper:
110
+ # Split at ORDER BY and add filter before it
111
+ order_by_index = jql_upper.find("ORDER BY")
112
+ main_query = jql[:order_by_index].strip()
113
+ order_clause = jql[order_by_index:].strip()
114
+
115
+ if main_query and (
116
+ "WHERE" in main_query.upper()
117
+ or "AND" in main_query.upper()
118
+ or "OR" in main_query.upper()
119
+ ):
120
+ incremental_jql = f"({main_query}) AND {date_filter} {order_clause}"
121
+ else:
122
+ if main_query:
123
+ incremental_jql = f"{main_query} AND {date_filter} {order_clause}"
124
+ else:
125
+ incremental_jql = f"{date_filter} {order_clause}"
126
+ else:
127
+ # No ORDER BY clause, use original logic
128
+ if "WHERE" in jql_upper or "AND" in jql_upper or "OR" in jql_upper:
129
+ incremental_jql = f"({jql}) AND {date_filter}"
130
+ else:
131
+ incremental_jql = f"{jql} AND {date_filter}"
132
+
133
+ # Use default fields if not specified
134
+ if fields is None:
135
+ fields = ",".join(ISSUE_FIELDS)
136
+
137
+ yield from client.search_issues(
138
+ jql=incremental_jql, fields=fields, expand=expand, max_results=max_results
139
+ )
140
+
141
+
142
+ @dlt.resource(write_disposition="replace")
143
+ def users(
144
+ base_url: str = dlt.secrets.value,
145
+ email: str = dlt.secrets.value,
146
+ api_token: str = dlt.secrets.value,
147
+ username: Optional[str] = None,
148
+ account_id: Optional[str] = None,
149
+ max_results: int = DEFAULT_PAGE_SIZE,
150
+ ) -> Iterable[TDataItem]:
151
+ """
152
+ Fetches users from Jira.
153
+
154
+ Args:
155
+ base_url (str): Jira instance URL
156
+ email (str): User email for authentication
157
+ api_token (str): API token for authentication
158
+ username (str): Username to search for
159
+ account_id (str): Account ID to search for
160
+ max_results (int): Maximum results per page
161
+
162
+ Yields:
163
+ dict: The user data.
164
+ """
165
+ client = get_client(base_url, email, api_token)
166
+ yield from client.get_users(
167
+ username=username, account_id=account_id, max_results=max_results
168
+ )
169
+
170
+
171
+ @dlt.resource(write_disposition="replace")
172
+ def issue_types(
173
+ base_url: str = dlt.secrets.value,
174
+ email: str = dlt.secrets.value,
175
+ api_token: str = dlt.secrets.value,
176
+ ) -> Iterable[TDataItem]:
177
+ """
178
+ Fetches all issue types from Jira.
179
+
180
+ Args:
181
+ base_url (str): Jira instance URL
182
+ email (str): User email for authentication
183
+ api_token (str): API token for authentication
184
+
185
+ Yields:
186
+ dict: The issue type data.
187
+ """
188
+ client = get_client(base_url, email, api_token)
189
+ yield from client.get_issue_types()
190
+
191
+
192
+ @dlt.resource(write_disposition="replace")
193
+ def statuses(
194
+ base_url: str = dlt.secrets.value,
195
+ email: str = dlt.secrets.value,
196
+ api_token: str = dlt.secrets.value,
197
+ ) -> Iterable[TDataItem]:
198
+ """
199
+ Fetches all statuses from Jira.
200
+
201
+ Args:
202
+ base_url (str): Jira instance URL
203
+ email (str): User email for authentication
204
+ api_token (str): API token for authentication
205
+
206
+ Yields:
207
+ dict: The status data.
208
+ """
209
+ client = get_client(base_url, email, api_token)
210
+ yield from client.get_statuses()
211
+
212
+
213
+ @dlt.resource(write_disposition="replace")
214
+ def priorities(
215
+ base_url: str = dlt.secrets.value,
216
+ email: str = dlt.secrets.value,
217
+ api_token: str = dlt.secrets.value,
218
+ ) -> Iterable[TDataItem]:
219
+ """
220
+ Fetches all priorities from Jira.
221
+
222
+ Args:
223
+ base_url (str): Jira instance URL
224
+ email (str): User email for authentication
225
+ api_token (str): API token for authentication
226
+
227
+ Yields:
228
+ dict: The priority data.
229
+ """
230
+ client = get_client(base_url, email, api_token)
231
+ yield from client.get_priorities()
232
+
233
+
234
+ @dlt.resource(write_disposition="replace")
235
+ def resolutions(
236
+ base_url: str = dlt.secrets.value,
237
+ email: str = dlt.secrets.value,
238
+ api_token: str = dlt.secrets.value,
239
+ ) -> Iterable[TDataItem]:
240
+ """
241
+ Fetches all resolutions from Jira.
242
+
243
+ Args:
244
+ base_url (str): Jira instance URL
245
+ email (str): User email for authentication
246
+ api_token (str): API token for authentication
247
+
248
+ Yields:
249
+ dict: The resolution data.
250
+ """
251
+ client = get_client(base_url, email, api_token)
252
+ yield from client.get_resolutions()
253
+
254
+
255
+ @dlt.transformer(
256
+ data_from=projects,
257
+ write_disposition="replace",
258
+ )
259
+ @dlt.defer
260
+ def project_versions(
261
+ project: TDataItem,
262
+ base_url: str = dlt.secrets.value,
263
+ email: str = dlt.secrets.value,
264
+ api_token: str = dlt.secrets.value,
265
+ ) -> Iterable[TDataItem]:
266
+ """
267
+ Fetches versions for each project from Jira.
268
+
269
+ Args:
270
+ project (dict): The project data.
271
+ base_url (str): Jira instance URL
272
+ email (str): User email for authentication
273
+ api_token (str): API token for authentication
274
+
275
+ Returns:
276
+ list[dict]: The version data for the given project.
277
+ """
278
+ client = get_client(base_url, email, api_token)
279
+ project_key = project.get("key")
280
+ if not project_key:
281
+ return []
282
+
283
+ return list(client.get_project_versions(project_key))
284
+
285
+
286
+ @dlt.transformer(
287
+ data_from=projects,
288
+ write_disposition="replace",
289
+ )
290
+ @dlt.defer
291
+ def project_components(
292
+ project: TDataItem,
293
+ base_url: str = dlt.secrets.value,
294
+ email: str = dlt.secrets.value,
295
+ api_token: str = dlt.secrets.value,
296
+ ) -> Iterable[TDataItem]:
297
+ """
298
+ Fetches components for each project from Jira.
299
+
300
+ Args:
301
+ project (dict): The project data.
302
+ base_url (str): Jira instance URL
303
+ email (str): User email for authentication
304
+ api_token (str): API token for authentication
305
+
306
+ Returns:
307
+ list[dict]: The component data for the given project.
308
+ """
309
+ client = get_client(base_url, email, api_token)
310
+ project_key = project.get("key")
311
+ if not project_key:
312
+ return []
313
+
314
+ return list(client.get_project_components(project_key))