omniload 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. omniload/conftest.py +72 -0
  2. omniload/main.py +810 -0
  3. omniload/src/.gitignore +10 -0
  4. omniload/src/adjust/__init__.py +108 -0
  5. omniload/src/adjust/adjust_helpers.py +122 -0
  6. omniload/src/airtable/__init__.py +84 -0
  7. omniload/src/allium/__init__.py +128 -0
  8. omniload/src/anthropic/__init__.py +277 -0
  9. omniload/src/anthropic/helpers.py +525 -0
  10. omniload/src/applovin/__init__.py +316 -0
  11. omniload/src/applovin_max/__init__.py +117 -0
  12. omniload/src/appsflyer/__init__.py +325 -0
  13. omniload/src/appsflyer/client.py +110 -0
  14. omniload/src/appstore/__init__.py +142 -0
  15. omniload/src/appstore/client.py +126 -0
  16. omniload/src/appstore/errors.py +15 -0
  17. omniload/src/appstore/models.py +117 -0
  18. omniload/src/appstore/resources.py +179 -0
  19. omniload/src/arrow/__init__.py +81 -0
  20. omniload/src/asana_source/__init__.py +281 -0
  21. omniload/src/asana_source/helpers.py +30 -0
  22. omniload/src/asana_source/settings.py +158 -0
  23. omniload/src/attio/__init__.py +102 -0
  24. omniload/src/attio/helpers.py +65 -0
  25. omniload/src/blob.py +95 -0
  26. omniload/src/bruin/__init__.py +76 -0
  27. omniload/src/chess/__init__.py +180 -0
  28. omniload/src/chess/helpers.py +35 -0
  29. omniload/src/chess/settings.py +18 -0
  30. omniload/src/clickup/__init__.py +85 -0
  31. omniload/src/clickup/helpers.py +47 -0
  32. omniload/src/collector/spinner.py +43 -0
  33. omniload/src/couchbase_source/__init__.py +118 -0
  34. omniload/src/couchbase_source/helpers.py +135 -0
  35. omniload/src/cursor/__init__.py +83 -0
  36. omniload/src/cursor/helpers.py +188 -0
  37. omniload/src/customer_io/__init__.py +486 -0
  38. omniload/src/customer_io/helpers.py +530 -0
  39. omniload/src/destinations.py +982 -0
  40. omniload/src/docebo/__init__.py +589 -0
  41. omniload/src/docebo/client.py +435 -0
  42. omniload/src/docebo/helpers.py +97 -0
  43. omniload/src/dune/__init__.py +104 -0
  44. omniload/src/dune/helpers.py +108 -0
  45. omniload/src/dynamodb/__init__.py +86 -0
  46. omniload/src/elasticsearch/__init__.py +80 -0
  47. omniload/src/elasticsearch/helpers.py +141 -0
  48. omniload/src/errors.py +26 -0
  49. omniload/src/facebook_ads/__init__.py +403 -0
  50. omniload/src/facebook_ads/exceptions.py +19 -0
  51. omniload/src/facebook_ads/helpers.py +296 -0
  52. omniload/src/facebook_ads/settings.py +224 -0
  53. omniload/src/facebook_ads/utils.py +53 -0
  54. omniload/src/factory.py +305 -0
  55. omniload/src/filesystem/__init__.py +133 -0
  56. omniload/src/filesystem/helpers.py +114 -0
  57. omniload/src/filesystem/readers.py +187 -0
  58. omniload/src/filters.py +62 -0
  59. omniload/src/fireflies/__init__.py +151 -0
  60. omniload/src/fireflies/helpers.py +753 -0
  61. omniload/src/fluxx/__init__.py +10013 -0
  62. omniload/src/fluxx/helpers.py +233 -0
  63. omniload/src/frankfurter/__init__.py +157 -0
  64. omniload/src/frankfurter/helpers.py +48 -0
  65. omniload/src/freshdesk/__init__.py +103 -0
  66. omniload/src/freshdesk/freshdesk_client.py +151 -0
  67. omniload/src/freshdesk/settings.py +23 -0
  68. omniload/src/fundraiseup/__init__.py +95 -0
  69. omniload/src/fundraiseup/client.py +81 -0
  70. omniload/src/github/__init__.py +202 -0
  71. omniload/src/github/helpers.py +207 -0
  72. omniload/src/github/queries.py +129 -0
  73. omniload/src/github/settings.py +24 -0
  74. omniload/src/google_ads/__init__.py +198 -0
  75. omniload/src/google_ads/field.py +17 -0
  76. omniload/src/google_ads/metrics.py +254 -0
  77. omniload/src/google_ads/predicates.py +37 -0
  78. omniload/src/google_ads/reports.py +411 -0
  79. omniload/src/google_ads/test_google_ads.py +184 -0
  80. omniload/src/google_analytics/__init__.py +144 -0
  81. omniload/src/google_analytics/helpers.py +312 -0
  82. omniload/src/google_sheets/README.md +95 -0
  83. omniload/src/google_sheets/__init__.py +166 -0
  84. omniload/src/google_sheets/helpers/__init__.py +15 -0
  85. omniload/src/google_sheets/helpers/api_calls.py +160 -0
  86. omniload/src/google_sheets/helpers/data_processing.py +316 -0
  87. omniload/src/gorgias/__init__.py +595 -0
  88. omniload/src/gorgias/helpers.py +166 -0
  89. omniload/src/hostaway/__init__.py +302 -0
  90. omniload/src/hostaway/client.py +288 -0
  91. omniload/src/http/__init__.py +38 -0
  92. omniload/src/http/readers.py +146 -0
  93. omniload/src/http_client.py +24 -0
  94. omniload/src/hubspot/__init__.py +800 -0
  95. omniload/src/hubspot/helpers.py +417 -0
  96. omniload/src/hubspot/settings.py +329 -0
  97. omniload/src/indeed/__init__.py +153 -0
  98. omniload/src/indeed/helpers.py +228 -0
  99. omniload/src/influxdb/__init__.py +46 -0
  100. omniload/src/influxdb/client.py +34 -0
  101. omniload/src/intercom/__init__.py +142 -0
  102. omniload/src/intercom/helpers.py +674 -0
  103. omniload/src/intercom/settings.py +279 -0
  104. omniload/src/isoc_pulse/__init__.py +159 -0
  105. omniload/src/jira_source/__init__.py +377 -0
  106. omniload/src/jira_source/helpers.py +510 -0
  107. omniload/src/jira_source/settings.py +184 -0
  108. omniload/src/kafka/__init__.py +120 -0
  109. omniload/src/kafka/helpers.py +241 -0
  110. omniload/src/kinesis/__init__.py +153 -0
  111. omniload/src/kinesis/helpers.py +96 -0
  112. omniload/src/klaviyo/__init__.py +237 -0
  113. omniload/src/klaviyo/client.py +212 -0
  114. omniload/src/klaviyo/helpers.py +19 -0
  115. omniload/src/linear/__init__.py +634 -0
  116. omniload/src/linear/helpers.py +111 -0
  117. omniload/src/linkedin_ads/__init__.py +266 -0
  118. omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
  119. omniload/src/linkedin_ads/helpers.py +246 -0
  120. omniload/src/loader.py +69 -0
  121. omniload/src/mailchimp/__init__.py +126 -0
  122. omniload/src/mailchimp/helpers.py +226 -0
  123. omniload/src/mailchimp/settings.py +164 -0
  124. omniload/src/masking.py +344 -0
  125. omniload/src/mixpanel/__init__.py +62 -0
  126. omniload/src/mixpanel/client.py +104 -0
  127. omniload/src/monday/__init__.py +246 -0
  128. omniload/src/monday/helpers.py +392 -0
  129. omniload/src/monday/settings.py +325 -0
  130. omniload/src/mongodb/__init__.py +281 -0
  131. omniload/src/mongodb/helpers.py +975 -0
  132. omniload/src/notion/__init__.py +69 -0
  133. omniload/src/notion/helpers/__init__.py +14 -0
  134. omniload/src/notion/helpers/client.py +178 -0
  135. omniload/src/notion/helpers/database.py +92 -0
  136. omniload/src/notion/settings.py +17 -0
  137. omniload/src/partition.py +32 -0
  138. omniload/src/personio/__init__.py +345 -0
  139. omniload/src/personio/helpers.py +100 -0
  140. omniload/src/phantombuster/__init__.py +65 -0
  141. omniload/src/phantombuster/client.py +87 -0
  142. omniload/src/pinterest/__init__.py +82 -0
  143. omniload/src/pipedrive/__init__.py +212 -0
  144. omniload/src/pipedrive/helpers/__init__.py +37 -0
  145. omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
  146. omniload/src/pipedrive/helpers/pages.py +129 -0
  147. omniload/src/pipedrive/settings.py +41 -0
  148. omniload/src/pipedrive/typing.py +17 -0
  149. omniload/src/plusvibeai/__init__.py +335 -0
  150. omniload/src/plusvibeai/helpers.py +544 -0
  151. omniload/src/plusvibeai/settings.py +252 -0
  152. omniload/src/primer/__init__.py +45 -0
  153. omniload/src/primer/helpers.py +79 -0
  154. omniload/src/quickbooks/__init__.py +117 -0
  155. omniload/src/reddit_ads/__init__.py +183 -0
  156. omniload/src/reddit_ads/helpers.py +232 -0
  157. omniload/src/resource.py +40 -0
  158. omniload/src/revenuecat/__init__.py +83 -0
  159. omniload/src/revenuecat/helpers.py +237 -0
  160. omniload/src/salesforce/__init__.py +170 -0
  161. omniload/src/salesforce/helpers.py +78 -0
  162. omniload/src/shopify/__init__.py +1953 -0
  163. omniload/src/shopify/exceptions.py +17 -0
  164. omniload/src/shopify/helpers.py +202 -0
  165. omniload/src/shopify/settings.py +19 -0
  166. omniload/src/slack/__init__.py +290 -0
  167. omniload/src/slack/helpers.py +218 -0
  168. omniload/src/slack/settings.py +36 -0
  169. omniload/src/smartsheets/__init__.py +82 -0
  170. omniload/src/snapchat_ads/__init__.py +455 -0
  171. omniload/src/snapchat_ads/client.py +72 -0
  172. omniload/src/snapchat_ads/helpers.py +630 -0
  173. omniload/src/snapchat_ads/settings.py +130 -0
  174. omniload/src/socrata_source/__init__.py +83 -0
  175. omniload/src/socrata_source/helpers.py +85 -0
  176. omniload/src/socrata_source/settings.py +8 -0
  177. omniload/src/solidgate/__init__.py +219 -0
  178. omniload/src/solidgate/helpers.py +154 -0
  179. omniload/src/sources.py +5408 -0
  180. omniload/src/sql_database/__init__.py +0 -0
  181. omniload/src/sql_database/callbacks.py +66 -0
  182. omniload/src/stripe_analytics/__init__.py +183 -0
  183. omniload/src/stripe_analytics/helpers.py +386 -0
  184. omniload/src/stripe_analytics/settings.py +80 -0
  185. omniload/src/table_definition.py +15 -0
  186. omniload/src/testdata/fakebqcredentials.json +14 -0
  187. omniload/src/tiktok_ads/__init__.py +150 -0
  188. omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
  189. omniload/src/time.py +11 -0
  190. omniload/src/trustpilot/__init__.py +48 -0
  191. omniload/src/trustpilot/client.py +48 -0
  192. omniload/src/version.py +6 -0
  193. omniload/src/wise/__init__.py +68 -0
  194. omniload/src/wise/client.py +63 -0
  195. omniload/src/zendesk/__init__.py +480 -0
  196. omniload/src/zendesk/helpers/__init__.py +39 -0
  197. omniload/src/zendesk/helpers/api_helpers.py +119 -0
  198. omniload/src/zendesk/helpers/credentials.py +68 -0
  199. omniload/src/zendesk/helpers/talk_api.py +132 -0
  200. omniload/src/zendesk/settings.py +71 -0
  201. omniload/src/zoom/__init__.py +99 -0
  202. omniload/src/zoom/helpers.py +102 -0
  203. omniload/testdata/.gitignore +2 -0
  204. omniload/testdata/create_replace.csv +21 -0
  205. omniload/testdata/delete_insert_expected.csv +6 -0
  206. omniload/testdata/delete_insert_part1.csv +5 -0
  207. omniload/testdata/delete_insert_part2.csv +6 -0
  208. omniload/testdata/merge_expected.csv +5 -0
  209. omniload/testdata/merge_part1.csv +4 -0
  210. omniload/testdata/merge_part2.csv +5 -0
  211. omniload/tests/unit/test_smartsheets.py +133 -0
  212. omniload-0.0.0.dev0.dist-info/METADATA +439 -0
  213. omniload-0.0.0.dev0.dist-info/RECORD +218 -0
  214. omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
  215. omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
  216. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
  217. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
  218. omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
@@ -0,0 +1,15 @@
1
+ class NoReportsFoundError(Exception):
2
+ def __init__(self):
3
+ super().__init__("No Report instances found for the given date range")
4
+
5
+
6
+ class NoOngoingReportRequestsFoundError(Exception):
7
+ def __init__(self):
8
+ super().__init__(
9
+ "No ONGOING report requests found (or they're stopped due to inactivity)"
10
+ )
11
+
12
+
13
+ class NoSuchReportError(Exception):
14
+ def __init__(self, report_name):
15
+ super().__init__(f"No such report found: {report_name}")
@@ -0,0 +1,117 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Optional
3
+
4
+ from dataclasses_json import dataclass_json
5
+
6
+
7
+ @dataclass_json
8
+ @dataclass
9
+ class Links:
10
+ self: str
11
+ next: Optional[str] = None
12
+
13
+
14
+ @dataclass_json
15
+ @dataclass
16
+ class ReportRequestAttributes:
17
+ accessType: str
18
+ stoppedDueToInactivity: bool
19
+
20
+
21
+ @dataclass_json
22
+ @dataclass
23
+ class ReportAttributes:
24
+ name: str
25
+ category: str
26
+
27
+
28
+ @dataclass_json
29
+ @dataclass
30
+ class ReportInstanceAttributes:
31
+ granularity: str
32
+ processingDate: str
33
+
34
+
35
+ @dataclass_json
36
+ @dataclass
37
+ class ReportSegmentAttributes:
38
+ checksum: str
39
+ url: str
40
+ sizeInBytes: int
41
+
42
+
43
+ @dataclass_json
44
+ @dataclass
45
+ class ReportRequest:
46
+ type: str
47
+ id: str
48
+ attributes: ReportRequestAttributes
49
+
50
+
51
+ @dataclass_json
52
+ @dataclass
53
+ class Report:
54
+ type: str
55
+ id: str
56
+ attributes: ReportAttributes
57
+
58
+
59
+ @dataclass_json
60
+ @dataclass
61
+ class ReportInstance:
62
+ type: str
63
+ id: str
64
+ attributes: ReportInstanceAttributes
65
+
66
+
67
+ @dataclass_json
68
+ @dataclass
69
+ class ReportSegment:
70
+ type: str
71
+ id: str
72
+ attributes: ReportSegmentAttributes
73
+
74
+
75
+ @dataclass_json
76
+ @dataclass
77
+ class PagingMeta:
78
+ total: int
79
+ limit: int
80
+
81
+
82
+ @dataclass_json
83
+ @dataclass
84
+ class Meta:
85
+ paging: PagingMeta
86
+
87
+
88
+ @dataclass_json
89
+ @dataclass
90
+ class AnalyticsReportRequestsResponse:
91
+ data: List[ReportRequest]
92
+ meta: Meta
93
+ links: Links
94
+
95
+
96
+ @dataclass_json
97
+ @dataclass
98
+ class AnalyticsReportResponse:
99
+ data: List[Report]
100
+ meta: Meta
101
+ links: Links
102
+
103
+
104
+ @dataclass_json
105
+ @dataclass
106
+ class AnalyticsReportInstancesResponse:
107
+ data: List[ReportInstance]
108
+ meta: Meta
109
+ links: Links
110
+
111
+
112
+ @dataclass_json
113
+ @dataclass
114
+ class AnalyticsReportSegmentsResponse:
115
+ data: List[ReportSegment]
116
+ meta: Meta
117
+ links: Links
@@ -0,0 +1,179 @@
1
+ from dataclasses import dataclass
2
+ from typing import List
3
+
4
+
5
+ @dataclass
6
+ class ResourceConfig:
7
+ name: str
8
+ primary_key: List[str]
9
+ columns: dict
10
+ report_name: str
11
+
12
+
13
+ RESOURCES: List[ResourceConfig] = [
14
+ ResourceConfig(
15
+ name="app-downloads-detailed",
16
+ primary_key=[
17
+ "App Apple Identifier",
18
+ "App Name",
19
+ "App Version",
20
+ "Campaign",
21
+ "Date",
22
+ "Device",
23
+ "Download Type",
24
+ "Page Title",
25
+ "Page Type",
26
+ "Platform Version",
27
+ "Pre-Order",
28
+ "Source Info",
29
+ "Source Type",
30
+ "Territory",
31
+ ],
32
+ columns={
33
+ "Date": {"data_type": "date"},
34
+ "App Apple Identifier": {"data_type": "bigint"},
35
+ "Counts": {"data_type": "bigint"},
36
+ "processing_date": {"data_type": "date"},
37
+ },
38
+ report_name="App Downloads Detailed",
39
+ ),
40
+ ResourceConfig(
41
+ name="app-store-discovery-and-engagement-detailed",
42
+ primary_key=[
43
+ "App Apple Identifier",
44
+ "App Name",
45
+ "Campaign",
46
+ "Date",
47
+ "Device",
48
+ "Engagement Type",
49
+ "Event",
50
+ "Page Title",
51
+ "Page Type",
52
+ "Platform Version",
53
+ "Source Info",
54
+ "Source Type",
55
+ "Territory",
56
+ ],
57
+ columns={
58
+ "Date": {"data_type": "date"},
59
+ "App Apple Identifier": {"data_type": "bigint"},
60
+ "Counts": {"data_type": "bigint"},
61
+ "Unique Counts": {"data_type": "bigint"},
62
+ "processing_date": {"data_type": "date"},
63
+ },
64
+ report_name="App Store Discovery and Engagement Detailed",
65
+ ),
66
+ ResourceConfig(
67
+ name="app-sessions-detailed",
68
+ primary_key=[
69
+ "Date",
70
+ "App Name",
71
+ "App Apple Identifier",
72
+ "App Version",
73
+ "Device",
74
+ "Platform Version",
75
+ "Source Type",
76
+ "Source Info",
77
+ "Campaign",
78
+ "Page Type",
79
+ "Page Title",
80
+ "App Download Date",
81
+ "Territory",
82
+ ],
83
+ columns={
84
+ "Date": {"data_type": "date"},
85
+ "App Apple Identifier": {"data_type": "bigint"},
86
+ "Sessions": {"data_type": "bigint"},
87
+ "Total Session Duration": {"data_type": "bigint"},
88
+ "Unique Devices": {"data_type": "bigint"},
89
+ "processing_date": {"data_type": "date"},
90
+ },
91
+ report_name="App Sessions Detailed",
92
+ ),
93
+ ResourceConfig(
94
+ name="app-store-installation-and-deletion-detailed",
95
+ primary_key=[
96
+ "App Apple Identifier",
97
+ "App Download Date",
98
+ "App Name",
99
+ "App Version",
100
+ "Campaign",
101
+ "Counts",
102
+ "Date",
103
+ "Device",
104
+ "Download Type",
105
+ "Event",
106
+ "Page Title",
107
+ "Page Type",
108
+ "Platform Version",
109
+ "Source Info",
110
+ "Source Type",
111
+ "Territory",
112
+ "Unique Devices",
113
+ ],
114
+ columns={
115
+ "Date": {"data_type": "date"},
116
+ "App Apple Identifier": {"data_type": "bigint"},
117
+ "Counts": {"data_type": "bigint"},
118
+ "Unique Devices": {"data_type": "bigint"},
119
+ "App Download Date": {"data_type": "date"},
120
+ "processing_date": {"data_type": "date"},
121
+ },
122
+ report_name="App Store Installation and Deletion Detailed",
123
+ ),
124
+ ResourceConfig(
125
+ name="app-store-purchases-detailed",
126
+ primary_key=[
127
+ "App Apple Identifier",
128
+ "App Download Date",
129
+ "App Name",
130
+ "Campaign",
131
+ "Content Apple Identifier",
132
+ "Content Name",
133
+ "Date",
134
+ "Device",
135
+ "Page Title",
136
+ "Page Type",
137
+ "Payment Method",
138
+ "Platform Version",
139
+ "Pre-Order",
140
+ "Purchase Type",
141
+ "Source Info",
142
+ "Source Type",
143
+ "Territory",
144
+ ],
145
+ columns={
146
+ "Date": {"data_type": "date"},
147
+ "App Apple Identifier": {"data_type": "bigint"},
148
+ "App Download Date": {"data_type": "date"},
149
+ "Content Apple Identifier": {"data_type": "bigint"},
150
+ "Purchases": {"data_type": "bigint"},
151
+ "Proceeds In USD": {"data_type": "double"},
152
+ "Sales In USD": {"data_type": "double"},
153
+ "Paying Users": {"data_type": "bigint"},
154
+ "processing_date": {"data_type": "date"},
155
+ },
156
+ report_name="App Store Purchases Detailed",
157
+ ),
158
+ ResourceConfig(
159
+ name="app-crashes-expanded",
160
+ primary_key=[
161
+ "App Name",
162
+ "App Version",
163
+ "Build",
164
+ "Date",
165
+ "Device",
166
+ "Platform",
167
+ "Release Type",
168
+ "Territory",
169
+ ],
170
+ columns={
171
+ "Date": {"data_type": "date"},
172
+ "processing_date": {"data_type": "date"},
173
+ "App Apple Identifier": {"data_type": "bigint"},
174
+ "Count": {"data_type": "bigint"},
175
+ "Unique Devices": {"data_type": "bigint"},
176
+ },
177
+ report_name="App Crashes Expanded",
178
+ ),
179
+ ]
@@ -0,0 +1,81 @@
1
+ from typing import Any, Optional
2
+
3
+ import dlt
4
+ import pyarrow as pa # type: ignore
5
+ from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
6
+ from dlt.extract.items import TTableHintTemplate
7
+
8
+
9
+ def memory_mapped_arrow(
10
+ path: str,
11
+ columns: Optional[TTableSchemaColumns] = None,
12
+ primary_key: Optional[TTableHintTemplate[TColumnNames]] = None,
13
+ merge_key: Optional[TTableHintTemplate[TColumnNames]] = None,
14
+ incremental: Optional[dlt.sources.incremental[Any]] = None,
15
+ ):
16
+ @dlt.resource(
17
+ name="arrow_mmap",
18
+ columns=columns, # type: ignore
19
+ primary_key=primary_key, # type: ignore
20
+ merge_key=merge_key, # type: ignore
21
+ )
22
+ def arrow_mmap(
23
+ incremental: Optional[dlt.sources.incremental[Any]] = incremental,
24
+ ):
25
+ import pyarrow.ipc as ipc # type: ignore
26
+
27
+ with pa.memory_map(path, "rb") as mmap:
28
+ reader: ipc.RecordBatchFileReader = ipc.open_file(mmap)
29
+ table = reader.read_all()
30
+
31
+ last_value = None
32
+ end_value = None
33
+ if incremental:
34
+ if incremental.cursor_path not in table.column_names:
35
+ raise KeyError(
36
+ f"Cursor column '{incremental.cursor_path}' does not exist in table"
37
+ )
38
+
39
+ last_value = incremental.last_value
40
+ end_value = incremental.end_value
41
+
42
+ if last_value is not None:
43
+ # Check if the column is a date type
44
+ if pa.types.is_temporal(table.schema.field(incremental.cursor_path).type): # type: ignore
45
+ if not isinstance(last_value, pa.TimestampScalar):
46
+ last_value = pa.scalar(last_value, type=pa.timestamp("ns"))
47
+
48
+ table = table.filter(
49
+ pa.compute.field(incremental.cursor_path) >= last_value # type: ignore
50
+ )
51
+ else:
52
+ # For non-date types, use direct comparison
53
+ table = table.filter(
54
+ pa.compute.field(incremental.cursor_path) >= last_value # type: ignore
55
+ )
56
+
57
+ if end_value is not None:
58
+ if pa.types.is_timestamp(table.schema.field(incremental.cursor_path).type): # type: ignore
59
+ # Convert end_value to timestamp if it's not already
60
+ if not isinstance(end_value, pa.TimestampScalar):
61
+ end_value = pa.scalar(end_value, type=pa.timestamp("ns"))
62
+ table = table.filter(
63
+ pa.compute.field(incremental.cursor_path) <= end_value # type: ignore
64
+ )
65
+ else:
66
+ # For non-date types, use direct comparison
67
+ table = table.filter(
68
+ pa.compute.field(incremental.cursor_path) <= end_value # type: ignore
69
+ )
70
+
71
+ yield table
72
+
73
+ return arrow_mmap
74
+
75
+
76
+ BATCH_SIZE = 1000
77
+
78
+
79
+ def as_list(table: pa.Table):
80
+ for batch in table.to_batches(BATCH_SIZE):
81
+ yield from batch.to_pylist()
@@ -0,0 +1,281 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ This source provides data extraction from the Asana platform via their API.
17
+
18
+ It defines several functions to fetch data from different parts of Asana including
19
+ workspaces, projects, sections, tags, tasks, stories, teams, and users. These
20
+ functions are meant to be used as part of a data loading pipeline.
21
+ """
22
+
23
+ import typing as t
24
+ from typing import Any, Iterable
25
+
26
+ import dlt
27
+ from dlt.common.typing import TDataItem
28
+
29
+ from .helpers import get_client
30
+ from .settings import (
31
+ DEFAULT_START_DATE,
32
+ PROJECT_FIELDS,
33
+ REQUEST_TIMEOUT,
34
+ SECTION_FIELDS,
35
+ STORY_FIELDS,
36
+ TAG_FIELDS,
37
+ TASK_FIELDS,
38
+ TEAMS_FIELD,
39
+ USER_FIELDS,
40
+ WORKSPACE_FIELDS,
41
+ )
42
+
43
+
44
+ @dlt.source
45
+ def asana_source() -> Any: # should be Sequence[DltResource]:
46
+ """
47
+ The main function that runs all the other functions to fetch data from Asana.
48
+ Returns:
49
+ Sequence[DltResource]: A sequence of DltResource objects containing the fetched data.
50
+ """
51
+ return [
52
+ workspaces,
53
+ projects,
54
+ sections,
55
+ tags,
56
+ tasks,
57
+ stories,
58
+ teams,
59
+ users,
60
+ ]
61
+
62
+
63
+ @dlt.resource(write_disposition="replace")
64
+ def workspaces(
65
+ access_token: str = dlt.secrets.value, fields: Iterable[str] = WORKSPACE_FIELDS
66
+ ) -> Iterable[TDataItem]:
67
+ """
68
+ Fetches and returns a list of workspaces from Asana.
69
+ Args:
70
+ access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
71
+ fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
72
+ Yields:
73
+ dict: The workspace data.
74
+ """
75
+ yield from get_client(access_token).workspaces.find_all(opt_fields=",".join(fields))
76
+
77
+
78
+ @dlt.transformer(
79
+ data_from=workspaces,
80
+ write_disposition="replace",
81
+ )
82
+ @dlt.defer
83
+ def projects(
84
+ workspace: TDataItem,
85
+ access_token: str = dlt.secrets.value,
86
+ fields: Iterable[str] = PROJECT_FIELDS,
87
+ ) -> Iterable[TDataItem]:
88
+ """
89
+ Fetches and returns a list of projects for a given workspace from Asana.
90
+ Args:
91
+ workspace (dict): The workspace data.
92
+ access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
93
+ fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
94
+ Returns:
95
+ list[dict]: The project data for the given workspace.
96
+ """
97
+ return list(
98
+ get_client(access_token).projects.find_all(
99
+ workspace=workspace["gid"],
100
+ timeout=REQUEST_TIMEOUT,
101
+ opt_fields=",".join(fields),
102
+ )
103
+ )
104
+
105
+
106
+ @dlt.transformer(
107
+ data_from=projects,
108
+ write_disposition="replace",
109
+ )
110
+ @dlt.defer
111
+ def sections(
112
+ project_array: t.List[TDataItem],
113
+ access_token: str = dlt.secrets.value,
114
+ fields: Iterable[str] = SECTION_FIELDS,
115
+ ) -> Iterable[TDataItem]:
116
+ """
117
+ Fetches all sections for a given project from Asana.
118
+ Args:
119
+ project_array (list): The project data.
120
+ access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
121
+ fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
122
+ Returns:
123
+ list[dict]: The sections data for the given project.
124
+ """
125
+ return [
126
+ section
127
+ for project in project_array
128
+ for section in get_client(access_token).sections.get_sections_for_project(
129
+ project_gid=project["gid"],
130
+ timeout=REQUEST_TIMEOUT,
131
+ opt_fields=",".join(fields),
132
+ )
133
+ ]
134
+
135
+
136
+ @dlt.transformer(data_from=workspaces, write_disposition="replace")
137
+ @dlt.defer
138
+ def tags(
139
+ workspace: TDataItem,
140
+ access_token: str = dlt.secrets.value,
141
+ fields: Iterable[str] = TAG_FIELDS,
142
+ ) -> Iterable[TDataItem]:
143
+ """
144
+ Fetches all tags for a given workspace from Asana.
145
+ Args:
146
+ workspace (dict): The workspace data.
147
+ access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
148
+ fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
149
+ Returns:
150
+ list[dict]: The tags data for the given workspace.
151
+ """
152
+ return [
153
+ tag
154
+ for tag in get_client(access_token).tags.find_all(
155
+ workspace=workspace["gid"],
156
+ timeout=REQUEST_TIMEOUT,
157
+ opt_fields=",".join(fields),
158
+ )
159
+ ]
160
+
161
+
162
+ @dlt.transformer(data_from=projects, write_disposition="merge", primary_key="gid")
163
+ def tasks(
164
+ project_array: t.List[TDataItem],
165
+ access_token: str = dlt.secrets.value,
166
+ modified_at: dlt.sources.incremental[str] = dlt.sources.incremental(
167
+ "modified_at",
168
+ initial_value=DEFAULT_START_DATE,
169
+ range_end="closed",
170
+ range_start="closed",
171
+ ),
172
+ fields: Iterable[str] = TASK_FIELDS,
173
+ ) -> Iterable[TDataItem]:
174
+ """
175
+ Fetches all tasks for a given project from Asana.
176
+ Args:
177
+ project_array (list): The project data.
178
+ access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
179
+
180
+ modified_at (str): The date from which to fetch modified tasks.
181
+ fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
182
+ Yields:
183
+ dict: The task data for the given project.
184
+ """
185
+ yield from (
186
+ task
187
+ for project in project_array
188
+ for task in get_client(access_token).tasks.find_all(
189
+ project=project["gid"],
190
+ timeout=REQUEST_TIMEOUT,
191
+ modified_since=modified_at.start_value,
192
+ opt_fields=",".join(fields),
193
+ )
194
+ )
195
+
196
+
197
+ @dlt.transformer(
198
+ data_from=tasks,
199
+ write_disposition="replace",
200
+ )
201
+ @dlt.defer
202
+ def stories(
203
+ task: TDataItem,
204
+ access_token: str = dlt.secrets.value,
205
+ fields: Iterable[str] = STORY_FIELDS,
206
+ ) -> Iterable[TDataItem]:
207
+ """
208
+ Fetches stories for a task from Asana.
209
+ Args:
210
+ task (dict): The task data.
211
+ access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
212
+ fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
213
+ Returns:
214
+ list[dict]: The stories data for the given task.
215
+ """
216
+ return [
217
+ story
218
+ for story in get_client(access_token).stories.get_stories_for_task(
219
+ task_gid=task["gid"],
220
+ timeout=REQUEST_TIMEOUT,
221
+ opt_fields=",".join(fields),
222
+ )
223
+ ]
224
+
225
+
226
+ @dlt.transformer(
227
+ data_from=workspaces,
228
+ write_disposition="replace",
229
+ )
230
+ @dlt.defer
231
+ def teams(
232
+ workspace: TDataItem,
233
+ access_token: str = dlt.secrets.value,
234
+ fields: Iterable[str] = TEAMS_FIELD,
235
+ ) -> Iterable[TDataItem]:
236
+ """
237
+ Fetches all teams for a given workspace from Asana.
238
+ Args:
239
+ workspace (dict): The workspace data.
240
+ access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
241
+ fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
242
+ Returns:
243
+ list[dict]: The teams data for the given workspace.
244
+ """
245
+ return [
246
+ team
247
+ for team in get_client(access_token).teams.find_by_organization(
248
+ organization=workspace["gid"],
249
+ timeout=REQUEST_TIMEOUT,
250
+ opt_fields=",".join(fields),
251
+ )
252
+ ]
253
+
254
+
255
+ @dlt.transformer(
256
+ data_from=workspaces,
257
+ write_disposition="replace",
258
+ )
259
+ @dlt.defer
260
+ def users(
261
+ workspace: TDataItem,
262
+ access_token: str = dlt.secrets.value,
263
+ fields: Iterable[str] = USER_FIELDS,
264
+ ) -> Iterable[TDataItem]:
265
+ """
266
+ Fetches all users for a given workspace from Asana.
267
+ Args:
268
+ workspace (dict): The workspace data.
269
+ access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
270
+ fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
271
+ Returns:
272
+ list[dict]: The user data for the given workspace.
273
+ """
274
+ return [
275
+ user
276
+ for user in get_client(access_token).users.find_all(
277
+ workspace=workspace["gid"],
278
+ timeout=REQUEST_TIMEOUT,
279
+ opt_fields=",".join(fields),
280
+ )
281
+ ]
@@ -0,0 +1,30 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Asana source helpers"""
16
+
17
+ from asana import Client as AsanaClient
18
+
19
+
20
+ def get_client(
21
+ access_token: str,
22
+ ) -> AsanaClient:
23
+ """
24
+ Returns an Asana API client.
25
+ Args:
26
+ access_token (str): The access token to authenticate the Asana API client.
27
+ Returns:
28
+ AsanaClient: The Asana API client.
29
+ """
30
+ return AsanaClient.access_token(access_token)