multiwoven-integrations 0.36.1 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/multiwoven/integrations/core/constants.rb +8 -0
- data/lib/multiwoven/integrations/rollout.rb +2 -1
- data/lib/multiwoven/integrations/source/one_drive/client.rb +555 -0
- data/lib/multiwoven/integrations/source/one_drive/config/meta.json +15 -0
- data/lib/multiwoven/integrations/source/one_drive/config/spec.json +59 -0
- data/lib/multiwoven/integrations/source/one_drive/icon.svg +31 -0
- data/lib/multiwoven/integrations.rb +1 -0
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9efeb07bcefbfd857742648e757324546f8eef5558f1d87094926c1034528cac
|
|
4
|
+
data.tar.gz: 1d62f9569a7c13fffedbc89f4c715fe8c3dac5f4cadf046a9ed845165bf009c3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 49a2c0c6c317df5dfff3fab9f8cdcaea28a11c06e7a5082e248ef19bfd0297fa81ec64131abfbc680754121e26f5daa00e7984019c14311144e1706c592e2824
|
|
7
|
+
data.tar.gz: '087db530947bd9cb26238c8b7a283e72f31bba4767510cfe422736c4a8b6985e453001879cf094eb17270067cbcbc73c37084e9072c1895c5a4f661cd24c813a'
|
|
@@ -117,6 +117,14 @@ module Multiwoven
|
|
|
117
117
|
|
|
118
118
|
# Aisquared Bolt
|
|
119
119
|
AISQUARED_BOLT_URL = ENV["LIGHTNING_URL"]
|
|
120
|
+
|
|
121
|
+
# Microsoft Graph
|
|
122
|
+
MICROSOFT_GRAPH_BASE = "https://graph.microsoft.com/v1.0"
|
|
123
|
+
MICROSOFT_GRAPH_SCOPE = "https://graph.microsoft.com/.default"
|
|
124
|
+
MICROSOFT_GRAPH_TOKEN_URL = "https://login.microsoftonline.com/%<tenant_id>s/oauth2/v2.0/token"
|
|
125
|
+
MICROSOFT_GRAPH_USER_DRIVE_URL = "#{MICROSOFT_GRAPH_BASE}/users/%<user_name>s/drive"
|
|
126
|
+
MICROSOFT_GRAPH_SHARE_ITEM_URL = "#{MICROSOFT_GRAPH_BASE}/shares/%<share_id>s/driveItem"
|
|
127
|
+
MICROSOFT_GRAPH_DRIVE_ITEM_URL = "#{MICROSOFT_GRAPH_BASE}/drives/%<drive_id>s/items/%<item_id>s"
|
|
120
128
|
end
|
|
121
129
|
end
|
|
122
130
|
end
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module Multiwoven
|
|
4
4
|
module Integrations
|
|
5
|
-
VERSION = "0.
|
|
5
|
+
VERSION = "0.37.0"
|
|
6
6
|
|
|
7
7
|
ENABLED_SOURCES = %w[
|
|
8
8
|
Snowflake
|
|
@@ -35,6 +35,7 @@ module Multiwoven
|
|
|
35
35
|
GoogleDrive
|
|
36
36
|
Http
|
|
37
37
|
Aisquared
|
|
38
|
+
OneDrive
|
|
38
39
|
].freeze
|
|
39
40
|
|
|
40
41
|
ENABLED_DESTINATIONS = %w[
|
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Multiwoven::Integrations::Source
|
|
4
|
+
module OneDrive
|
|
5
|
+
include Multiwoven::Integrations::Core
|
|
6
|
+
class Client < UnstructuredSourceConnector
|
|
7
|
+
SPREADSHEET_EXTENSIONS = %w[.csv .xlsx .xls .xlsm].freeze
|
|
8
|
+
EXPIRED_ACCESS_TOKEN_ERROR_CODE = "InvalidAuthenticationToken"
|
|
9
|
+
|
|
10
|
+
def check_connection(connection_config)
|
|
11
|
+
connection_config = connection_config.with_indifferent_access
|
|
12
|
+
if unstructured_data?(connection_config)
|
|
13
|
+
create_connection(connection_config)
|
|
14
|
+
fetch_list_items
|
|
15
|
+
else
|
|
16
|
+
conn = create_connection(connection_config)
|
|
17
|
+
@sync_id = "check_connection"
|
|
18
|
+
files = spreadsheet_files(fetch_list_items)
|
|
19
|
+
raise StandardError, "No spreadsheet files found" if files.empty?
|
|
20
|
+
|
|
21
|
+
files.each { |file| describe_spreadsheet_file(conn, file) }
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
success_status
|
|
25
|
+
rescue StandardError, NotImplementedError => e
|
|
26
|
+
handle_exception(e, {
|
|
27
|
+
context: "ONE_DRIVE:CHECK_CONNECTION:EXCEPTION",
|
|
28
|
+
type: "error"
|
|
29
|
+
})
|
|
30
|
+
failure_status(e)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def discover(connection_config)
|
|
34
|
+
connection_config = connection_config.with_indifferent_access
|
|
35
|
+
|
|
36
|
+
streams = if unstructured_data?(connection_config)
|
|
37
|
+
[create_unstructured_stream]
|
|
38
|
+
else
|
|
39
|
+
conn = create_connection(connection_config)
|
|
40
|
+
@sync_id = "discover"
|
|
41
|
+
files = spreadsheet_files(fetch_list_items)
|
|
42
|
+
raise StandardError, "No spreadsheet files found" if files.empty?
|
|
43
|
+
|
|
44
|
+
files.map { |file| discover_stream_for_file(conn, file) }
|
|
45
|
+
end
|
|
46
|
+
catalog = Catalog.new(streams: streams)
|
|
47
|
+
catalog.to_multiwoven_message
|
|
48
|
+
rescue StandardError => e
|
|
49
|
+
handle_exception(e, { context: "ONE_DRIVE:DISCOVER:EXCEPTION", type: "error" })
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def read(sync_config)
|
|
53
|
+
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
|
54
|
+
@connector_instance = sync_config&.source&.connector_instance
|
|
55
|
+
|
|
56
|
+
return handle_unstructured_data(sync_config) if unstructured_data?(connection_config)
|
|
57
|
+
|
|
58
|
+
conn = create_connection(connection_config)
|
|
59
|
+
|
|
60
|
+
@connection_config = connection_config
|
|
61
|
+
@sync_id = sync_config.sync_id
|
|
62
|
+
|
|
63
|
+
query = sync_config.model.query
|
|
64
|
+
query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
|
|
65
|
+
query(conn, query)
|
|
66
|
+
rescue StandardError => e
|
|
67
|
+
handle_exception(e, {
|
|
68
|
+
context: "ONE_DRIVE:READ:EXCEPTION",
|
|
69
|
+
type: "error",
|
|
70
|
+
sync_id: sync_config.sync_id,
|
|
71
|
+
sync_run_id: sync_config.sync_run_id
|
|
72
|
+
})
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
def load_connection_config(connection_config)
|
|
78
|
+
@user_name = connection_config[:user_name]
|
|
79
|
+
@tenant_id = connection_config[:tenant_id]
|
|
80
|
+
@client_id = connection_config[:client_id]
|
|
81
|
+
@client_secret = connection_config[:client_secret]
|
|
82
|
+
@data_type = connection_config[:data_type]
|
|
83
|
+
@file_name = connection_config[:file_name]
|
|
84
|
+
@share_url = connection_config[:share_url]
|
|
85
|
+
stored_token = @connector_instance&.configuration&.dig("access_token")
|
|
86
|
+
@access_token = stored_token.presence || refresh_access_token
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def create_connection(connection_config)
|
|
90
|
+
load_connection_config(connection_config)
|
|
91
|
+
|
|
92
|
+
if @share_url.present?
|
|
93
|
+
@drive_id = shared_folder_reference[:drive_id]
|
|
94
|
+
else
|
|
95
|
+
response = microsoft_graph_request(user_drive_url)
|
|
96
|
+
raise graph_api_error(response.body) unless success?(response)
|
|
97
|
+
|
|
98
|
+
@drive_id = JSON.parse(response.body)["id"]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
return if @data_type.to_s == "unstructured"
|
|
102
|
+
|
|
103
|
+
duckdb_connection
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def refresh_access_token
|
|
107
|
+
@access_token = fetch_access_token
|
|
108
|
+
persist_access_token(@access_token)
|
|
109
|
+
@access_token
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def persist_access_token(token)
|
|
113
|
+
return unless @connector_instance&.configuration
|
|
114
|
+
|
|
115
|
+
config = @connector_instance.configuration
|
|
116
|
+
config = {} unless config.is_a?(Hash)
|
|
117
|
+
@connector_instance.update!(configuration: config.merge("access_token" => token))
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def microsoft_graph_request(url)
|
|
121
|
+
response = graph_http_get(url)
|
|
122
|
+
return response unless expired_access_token_error?(response.body)
|
|
123
|
+
|
|
124
|
+
refresh_access_token
|
|
125
|
+
graph_http_get(url)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def graph_http_get(url)
|
|
129
|
+
Multiwoven::Integrations::Core::HttpClient.request(
|
|
130
|
+
url,
|
|
131
|
+
HTTP_GET,
|
|
132
|
+
headers: auth_headers(@access_token)
|
|
133
|
+
)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def fetch_access_token
|
|
137
|
+
response = Multiwoven::Integrations::Core::HttpClient.request(
|
|
138
|
+
format(MICROSOFT_GRAPH_TOKEN_URL, tenant_id: @tenant_id),
|
|
139
|
+
HTTP_POST,
|
|
140
|
+
payload: form_urlencoded_payload(
|
|
141
|
+
client_id: @client_id,
|
|
142
|
+
client_secret: @client_secret,
|
|
143
|
+
scope: MICROSOFT_GRAPH_SCOPE,
|
|
144
|
+
grant_type: "client_credentials"
|
|
145
|
+
),
|
|
146
|
+
headers: {
|
|
147
|
+
"Content-Type" => "application/x-www-form-urlencoded"
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
raise graph_api_error(response.body) unless success?(response)
|
|
151
|
+
|
|
152
|
+
JSON.parse(response.body)["access_token"]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def handle_unstructured_data(sync_config)
|
|
156
|
+
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
|
157
|
+
command = sync_config.model.query.strip
|
|
158
|
+
create_connection(connection_config)
|
|
159
|
+
|
|
160
|
+
case command
|
|
161
|
+
when LIST_FILES_CMD
|
|
162
|
+
list_files_in_folder(connection_config)
|
|
163
|
+
when /^#{DOWNLOAD_FILE_CMD}\s+(.+)$/
|
|
164
|
+
file_name = ::Regexp.last_match(1).strip
|
|
165
|
+
file_name = file_name.gsub(/^["']|["']$/, "")
|
|
166
|
+
download_unstructured_file(connection_config, file_name, sync_config.sync_id)
|
|
167
|
+
else
|
|
168
|
+
raise ArgumentError, "Invalid command. Supported commands: #{LIST_FILES_CMD}, #{DOWNLOAD_FILE_CMD} <file_path>"
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def list_files_in_folder(_connection_config)
|
|
173
|
+
files_in_folder.map do |file|
|
|
174
|
+
RecordMessage.new(
|
|
175
|
+
data: {
|
|
176
|
+
element_id: file["id"],
|
|
177
|
+
file_name: file["name"],
|
|
178
|
+
file_path: file["name"],
|
|
179
|
+
size: file["size"],
|
|
180
|
+
file_type: File.extname(file["name"]).sub(".", ""),
|
|
181
|
+
created_date: file["createdDateTime"],
|
|
182
|
+
modified_date: file["lastModifiedDateTime"],
|
|
183
|
+
text: ""
|
|
184
|
+
},
|
|
185
|
+
emitted_at: Time.now.to_i
|
|
186
|
+
).to_multiwoven_message
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def download_unstructured_file(_connection_config, file_path, sync_id)
|
|
191
|
+
file_name = resolve_download_file_name(file_path)
|
|
192
|
+
file_item = files_in_folder.find { |item| item["name"] == file_name }
|
|
193
|
+
raise StandardError, "File not found." if file_item.nil?
|
|
194
|
+
|
|
195
|
+
local_path = download_file_to_local(
|
|
196
|
+
file_name,
|
|
197
|
+
sync_id,
|
|
198
|
+
item_id: file_item["id"],
|
|
199
|
+
drive_id: file_item.dig("parentReference", "driveId")
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
[RecordMessage.new(
|
|
203
|
+
data: {
|
|
204
|
+
element_id: file_item["id"],
|
|
205
|
+
local_path: local_path,
|
|
206
|
+
file_name: file_name,
|
|
207
|
+
file_path: file_name,
|
|
208
|
+
size: file_item["size"],
|
|
209
|
+
file_type: File.extname(file_name).sub(".", ""),
|
|
210
|
+
created_date: file_item["createdDateTime"],
|
|
211
|
+
modified_date: file_item["lastModifiedDateTime"],
|
|
212
|
+
text: ""
|
|
213
|
+
},
|
|
214
|
+
emitted_at: Time.now.to_i
|
|
215
|
+
).to_multiwoven_message]
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def files_in_folder
|
|
219
|
+
records = fetch_list_items
|
|
220
|
+
records["value"].select do |item|
|
|
221
|
+
item["folder"].blank? && matching_file_name?(item["name"])
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def resolve_download_file_name(file_path)
|
|
226
|
+
return File.basename(file_path) unless file_path.to_s.start_with?("http")
|
|
227
|
+
|
|
228
|
+
@file_name.to_s.strip.presence || File.basename(file_path)
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def matching_file_name?(name)
|
|
232
|
+
configured_name = @file_name.to_s.strip
|
|
233
|
+
configured_name.blank? || configured_name == name
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def discover_stream_for_file(conn, file)
|
|
237
|
+
describe_results = describe_spreadsheet_file(conn, file)
|
|
238
|
+
columns = build_discover_columns(describe_results)
|
|
239
|
+
|
|
240
|
+
Multiwoven::Integrations::Protocol::Stream.new(
|
|
241
|
+
name: stream_name_for(file["name"]),
|
|
242
|
+
action: StreamAction["fetch"],
|
|
243
|
+
json_schema: convert_to_json_schema(columns)
|
|
244
|
+
)
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def describe_spreadsheet_file(conn, file)
|
|
248
|
+
local_file = nil
|
|
249
|
+
file_name = file["name"]
|
|
250
|
+
local_file = download_file_to_local(
|
|
251
|
+
file_name,
|
|
252
|
+
@sync_id,
|
|
253
|
+
item_id: file["id"],
|
|
254
|
+
drive_id: file.dig("parentReference", "driveId")
|
|
255
|
+
)
|
|
256
|
+
duckdb_file = read_local_file(conn, file_name, local_file)
|
|
257
|
+
get_results(conn, "DESCRIBE SELECT * FROM #{duckdb_file};")
|
|
258
|
+
ensure
|
|
259
|
+
cleanup_ephemeral_download(local_file)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def build_discover_columns(describe_results)
|
|
263
|
+
describe_results.map do |row|
|
|
264
|
+
{
|
|
265
|
+
column_name: row["column_name"],
|
|
266
|
+
type: column_schema_helper(row["column_type"])
|
|
267
|
+
}
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Maps DuckDB column types before convert_to_json_schema. Note that map_type_to_json_schema
|
|
272
|
+
# only recognizes "NUMBER" and "vector", so integer/number/boolean here still become "string"
|
|
273
|
+
# in the emitted json_schema (inherited from amazon_s3; not a typed-schema connector).
|
|
274
|
+
def column_schema_helper(column_type)
|
|
275
|
+
case column_type
|
|
276
|
+
when "VARCHAR", "BIT", "DATE", "TIME", "TIMESTAMP", "UUID"
|
|
277
|
+
"string"
|
|
278
|
+
when "DOUBLE"
|
|
279
|
+
"number"
|
|
280
|
+
when "BIGINT", "HUGEINT", "INTEGER", "SMALLINT"
|
|
281
|
+
"integer"
|
|
282
|
+
when "BOOLEAN"
|
|
283
|
+
"boolean"
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def query(connection, query)
|
|
288
|
+
local_file = nil
|
|
289
|
+
file_name = extract_file_name_from_query(query)
|
|
290
|
+
local_file = download_file_to_local(file_name, @sync_id)
|
|
291
|
+
|
|
292
|
+
file = read_local_file(connection, file_name, local_file)
|
|
293
|
+
query = apply_local_file_to_query(query, file)
|
|
294
|
+
get_results(connection, query).map do |row|
|
|
295
|
+
RecordMessage.new(data: row, emitted_at: Time.now.to_i).to_multiwoven_message
|
|
296
|
+
end
|
|
297
|
+
ensure
|
|
298
|
+
cleanup_ephemeral_download(local_file)
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def extract_file_name_from_query(sql_query)
|
|
302
|
+
match = sql_query.match(
|
|
303
|
+
/\bFROM\s+(?:[`"]([^`"]+)[`"]|'([^']+)'|([^\s;]+))/i
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
match&.captures&.compact&.first ||
|
|
307
|
+
raise(ArgumentError, "Could not extract file name from query")
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
def duckdb_connection
|
|
311
|
+
conn = DuckDB::Database.open.connect
|
|
312
|
+
conn.execute(INSTALL_HTTPFS_QUERY)
|
|
313
|
+
conn
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
def read_local_file(conn, file_name, local_file)
|
|
317
|
+
escaped_path = local_file.gsub("'", "''")
|
|
318
|
+
|
|
319
|
+
case File.extname(file_name).downcase
|
|
320
|
+
when ".csv"
|
|
321
|
+
"read_csv_auto('#{escaped_path}')"
|
|
322
|
+
when ".xlsx", ".xls", ".xlsm"
|
|
323
|
+
conn.execute("INSTALL excel; LOAD excel;")
|
|
324
|
+
"read_xlsx('#{escaped_path}')"
|
|
325
|
+
else
|
|
326
|
+
raise ArgumentError, "Unsupported file type: #{file_name}"
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def apply_local_file_to_query(sql_query, file)
|
|
331
|
+
sql_query.sub(/\bFROM\s+(?:`[^`]+`|"[^"]+"|'[^']+'|[^\s;]+)/i, "FROM #{file}")
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def get_results(conn, sql_query)
|
|
335
|
+
hash_array_values(conn.query(sql_query))
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def hash_array_values(results)
|
|
339
|
+
keys = results.columns.map(&:name)
|
|
340
|
+
results.map do |row|
|
|
341
|
+
Hash[keys.zip(row)]
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def download_file_to_local(file_name, sync_id, item_id: nil, drive_id: nil)
|
|
346
|
+
local_file = local_download_path(file_name, sync_id)
|
|
347
|
+
FileUtils.mkdir_p(File.dirname(local_file))
|
|
348
|
+
|
|
349
|
+
response = fetch_file_content(file_content_url(file_name, item_id: item_id, drive_id: drive_id))
|
|
350
|
+
raise graph_api_error(response.body) unless success?(response)
|
|
351
|
+
|
|
352
|
+
File.binwrite(local_file, response.body)
|
|
353
|
+
local_file
|
|
354
|
+
rescue StandardError => e
|
|
355
|
+
raise StandardError, "Failed to download file #{file_name}: #{e.message}"
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
def local_download_path(file_name, sync_id)
|
|
359
|
+
download_path = ENV["FILE_DOWNLOAD_PATH"]
|
|
360
|
+
if download_path
|
|
361
|
+
File.join(download_path, "syncs", sync_id, File.basename(file_name))
|
|
362
|
+
else
|
|
363
|
+
@temp_download_dir ||= Dir.mktmpdir("one_drive_#{sync_id}")
|
|
364
|
+
File.join(@temp_download_dir, File.basename(file_name))
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
def cleanup_ephemeral_download(local_file)
|
|
369
|
+
return if local_file.blank? || ENV["FILE_DOWNLOAD_PATH"].present?
|
|
370
|
+
return unless ephemeral_download?(local_file)
|
|
371
|
+
|
|
372
|
+
File.delete(local_file) if File.exist?(local_file)
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
def ephemeral_download?(local_file)
|
|
376
|
+
@temp_download_dir.present? && local_file.start_with?(@temp_download_dir)
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
def file_content_url(file_name, item_id: nil, drive_id: nil)
|
|
380
|
+
if item_id.present?
|
|
381
|
+
resolved_drive_id = drive_id || @drive_id
|
|
382
|
+
return "#{drive_item_url(resolved_drive_id, item_id)}/content"
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
if @share_url.present? && shared_folder_reference[:is_file]
|
|
386
|
+
shared = shared_folder_reference
|
|
387
|
+
return "#{drive_item_url(shared[:drive_id], shared[:item_id])}/content"
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
"#{single_file_item_url(file_name)}:/content"
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
def single_file_item_url(file_name)
|
|
394
|
+
encoded_file = URI::DEFAULT_PARSER.escape(file_name)
|
|
395
|
+
|
|
396
|
+
if @share_url.present?
|
|
397
|
+
shared = shared_folder_reference
|
|
398
|
+
"#{drive_item_url(shared[:drive_id], shared[:item_id])}:/#{encoded_file}"
|
|
399
|
+
else
|
|
400
|
+
"#{drive_root_url(@drive_id)}:/#{encoded_file}"
|
|
401
|
+
end
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
def list_items_url
|
|
405
|
+
if @share_url.present?
|
|
406
|
+
"#{share_item_url}/children"
|
|
407
|
+
else
|
|
408
|
+
"#{drive_root_url(@drive_id)}/children"
|
|
409
|
+
end
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
def user_drive_url
|
|
413
|
+
format(MICROSOFT_GRAPH_USER_DRIVE_URL, user_name: @user_name)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
def share_item_url
|
|
417
|
+
share_id = encode_sharing_url(@share_url)
|
|
418
|
+
format(MICROSOFT_GRAPH_SHARE_ITEM_URL, share_id: share_id)
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def drive_item_url(drive_id, item_id)
|
|
422
|
+
format(MICROSOFT_GRAPH_DRIVE_ITEM_URL, drive_id: drive_id, item_id: item_id)
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
def drive_root_url(drive_id)
|
|
426
|
+
"#{MICROSOFT_GRAPH_BASE}/drives/#{drive_id}/root"
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def fetch_file_content(url)
|
|
430
|
+
response = microsoft_graph_request(url)
|
|
431
|
+
return response unless response.is_a?(Net::HTTPRedirection)
|
|
432
|
+
|
|
433
|
+
Multiwoven::Integrations::Core::HttpClient.request(response["location"], HTTP_GET)
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def fetch_list_items
|
|
437
|
+
return { "value" => [fetch_single_file_item] } if single_file_mode?
|
|
438
|
+
|
|
439
|
+
return { "value" => [fetch_shared_item_metadata] } if @share_url.present? && shared_folder_reference[:is_file]
|
|
440
|
+
|
|
441
|
+
paginated_graph_collection(list_items_url)
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def fetch_shared_item_metadata
|
|
445
|
+
response = microsoft_graph_request(share_item_url)
|
|
446
|
+
raise graph_api_error(response.body) unless success?(response)
|
|
447
|
+
|
|
448
|
+
JSON.parse(response.body)
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
def single_file_mode?
|
|
452
|
+
@file_name.to_s.strip.present? && @data_type.to_s == "unstructured"
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def fetch_single_file_item
|
|
456
|
+
return fetch_shared_item_metadata if @share_url.present? && shared_folder_reference[:is_file]
|
|
457
|
+
|
|
458
|
+
response = microsoft_graph_request(single_file_item_url(@file_name))
|
|
459
|
+
raise graph_api_error(response.body) unless success?(response)
|
|
460
|
+
|
|
461
|
+
JSON.parse(response.body)
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
def paginated_graph_collection(url)
|
|
465
|
+
items = []
|
|
466
|
+
next_url = url
|
|
467
|
+
|
|
468
|
+
loop do
|
|
469
|
+
response = microsoft_graph_request(next_url)
|
|
470
|
+
raise graph_api_error(response.body) unless success?(response)
|
|
471
|
+
|
|
472
|
+
page = JSON.parse(response.body)
|
|
473
|
+
items.concat(page["value"] || [])
|
|
474
|
+
next_url = page["@odata.nextLink"]
|
|
475
|
+
break if next_url.blank?
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
{ "value" => items }
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
def shared_folder_reference
|
|
482
|
+
@shared_folder_reference ||= begin
|
|
483
|
+
response = microsoft_graph_request(share_item_url)
|
|
484
|
+
raise graph_api_error(response.body) unless success?(response)
|
|
485
|
+
|
|
486
|
+
item = JSON.parse(response.body)
|
|
487
|
+
drive_id = item.dig("parentReference", "driveId")
|
|
488
|
+
item_id = item["id"]
|
|
489
|
+
raise StandardError, "Could not resolve shared folder drive reference" if drive_id.blank? || item_id.blank?
|
|
490
|
+
|
|
491
|
+
{ drive_id: drive_id, item_id: item_id, is_file: item["file"].present? }
|
|
492
|
+
end
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
def spreadsheet_files(records)
|
|
496
|
+
records["value"].select do |record|
|
|
497
|
+
record["folder"].blank? &&
|
|
498
|
+
SPREADSHEET_EXTENSIONS.include?(File.extname(record["name"].to_s).downcase) &&
|
|
499
|
+
matching_file_name?(record["name"])
|
|
500
|
+
end
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
# Keep the file extension in the stream name — TableSelector generates
|
|
504
|
+
# `SELECT * FROM ${stream.name}`, and read_local_file keys off File.extname.
|
|
505
|
+
def stream_name_for(file_name)
|
|
506
|
+
File.basename(file_name)
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
def encode_sharing_url(url)
|
|
510
|
+
encoded = Base64.strict_encode64(url).tr("+/", "-_").delete("=")
|
|
511
|
+
"u!#{encoded}"
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
def graph_api_error(response_body)
|
|
515
|
+
parsed = JSON.parse(response_body)
|
|
516
|
+
error = parsed["error"]
|
|
517
|
+
|
|
518
|
+
message = if error.is_a?(Hash)
|
|
519
|
+
"#{error["code"]}: #{error["message"]}"
|
|
520
|
+
elsif error.is_a?(String)
|
|
521
|
+
description = parsed["error_description"]
|
|
522
|
+
description.present? ? "#{error}: #{description}" : error
|
|
523
|
+
else
|
|
524
|
+
response_body
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
StandardError.new(message)
|
|
528
|
+
rescue JSON::ParserError, TypeError
|
|
529
|
+
StandardError.new(response_body.to_s)
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
def expired_access_token_error?(response_body)
|
|
533
|
+
error = JSON.parse(response_body)["error"]
|
|
534
|
+
return false unless error.is_a?(Hash)
|
|
535
|
+
|
|
536
|
+
error["code"] == EXPIRED_ACCESS_TOKEN_ERROR_CODE
|
|
537
|
+
rescue JSON::ParserError
|
|
538
|
+
false
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
# HttpClient.request always calls payload.to_json.
|
|
542
|
+
# Microsoft OAuth token endpoints require
|
|
543
|
+
# application/x-www-form-urlencoded bodies instead of JSON.
|
|
544
|
+
# This wrapper overrides to_json so HttpClient sends a
|
|
545
|
+
# form-encoded string rather than a JSON document.
|
|
546
|
+
def form_urlencoded_payload(fields)
|
|
547
|
+
payload = Object.new
|
|
548
|
+
payload.define_singleton_method(:to_json) do |*_args|
|
|
549
|
+
URI.encode_www_form(fields)
|
|
550
|
+
end
|
|
551
|
+
payload
|
|
552
|
+
end
|
|
553
|
+
end
|
|
554
|
+
end
|
|
555
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"data": {
|
|
3
|
+
"name": "OneDrive",
|
|
4
|
+
"title": "One Drive",
|
|
5
|
+
"connector_type": "source",
|
|
6
|
+
"category": "File Storage",
|
|
7
|
+
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/one_drive",
|
|
8
|
+
"github_issue_label": "source-one-drive",
|
|
9
|
+
"icon": "icon.svg",
|
|
10
|
+
"license": "MIT",
|
|
11
|
+
"release_stage": "alpha",
|
|
12
|
+
"support_level": "community",
|
|
13
|
+
"tags": ["language:ruby", "multiwoven"]
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
{
|
|
2
|
+
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/one_drive",
|
|
3
|
+
"stream_type": "dynamic",
|
|
4
|
+
"connector_query_type": "raw_sql",
|
|
5
|
+
"connection_specification": {
|
|
6
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
7
|
+
"title": "OneDrive",
|
|
8
|
+
"type": "object",
|
|
9
|
+
"required": ["user_name", "tenant_id", "client_id", "client_secret"],
|
|
10
|
+
"properties": {
|
|
11
|
+
"data_type": {
|
|
12
|
+
"description": "Type of data in files",
|
|
13
|
+
"type": "string",
|
|
14
|
+
"title": "Data Format Type",
|
|
15
|
+
"oneOf": [
|
|
16
|
+
{ "const": "structured", "title": "Tables & Records (Structured)" },
|
|
17
|
+
{ "const": "unstructured", "title": "Documents & Files (Unstructured)" }
|
|
18
|
+
],
|
|
19
|
+
"default": "structured"
|
|
20
|
+
},
|
|
21
|
+
"user_name": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"description": "Email address of the OneDrive user to access on behalf of",
|
|
24
|
+
"title": "User Name"
|
|
25
|
+
},
|
|
26
|
+
"tenant_id": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"description": "Azure AD tenant ID",
|
|
29
|
+
"title": "Tenant ID"
|
|
30
|
+
},
|
|
31
|
+
"client_id": {
|
|
32
|
+
"type": "string",
|
|
33
|
+
"multiwoven_secret": true,
|
|
34
|
+
"title": "Client ID"
|
|
35
|
+
},
|
|
36
|
+
"client_secret": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"multiwoven_secret": true,
|
|
39
|
+
"title": "Client Secret"
|
|
40
|
+
},
|
|
41
|
+
"share_url": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"title": "Share URL",
|
|
44
|
+
"description": "OneDrive or SharePoint sharing link for the folder to read from. When not set, the user's OneDrive root folder is used."
|
|
45
|
+
},
|
|
46
|
+
"file_name": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"title": "File Name",
|
|
49
|
+
"description": "Optional. For unstructured data, fetches only this file from the shared folder."
|
|
50
|
+
},
|
|
51
|
+
"access_token": {
|
|
52
|
+
"type": "string",
|
|
53
|
+
"multiwoven_secret": true,
|
|
54
|
+
"title": "Access Token",
|
|
55
|
+
"description": "Leave blank during connection setup. Will fetch access token from Microsoft Graph."
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
2
|
+
|
|
3
|
+
<svg width="800px" height="800px" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg">
|
|
4
|
+
<mask id="mask0_87_7796" style="mask-type:alpha" maskUnits="userSpaceOnUse" x="0" y="6" width="32" height="20">
|
|
5
|
+
<path d="M7.82979 26C3.50549 26 0 22.5675 0 18.3333C0 14.1921 3.35322 10.8179 7.54613 10.6716C9.27535 7.87166 12.4144 6 16 6C20.6308 6 24.5169 9.12183 25.5829 13.3335C29.1316 13.3603 32 16.1855 32 19.6667C32 23.0527 29 26 25.8723 25.9914L7.82979 26Z" fill="#C4C4C4"/>
|
|
6
|
+
</mask>
|
|
7
|
+
<g mask="url(#mask0_87_7796)">
|
|
8
|
+
<path d="M7.83017 26.0001C5.37824 26.0001 3.18957 24.8966 1.75391 23.1691L18.0429 16.3335L30.7089 23.4647C29.5926 24.9211 27.9066 26.0001 26.0004 25.9915C23.1254 26.0001 12.0629 26.0001 7.83017 26.0001Z" fill="url(#paint0_linear_87_7796)"/>
|
|
9
|
+
<path d="M25.5785 13.3149L18.043 16.3334L30.709 23.4647C31.5199 22.4065 32.0004 21.0916 32.0004 19.6669C32.0004 16.1857 29.1321 13.3605 25.5833 13.3337C25.5817 13.3274 25.5801 13.3212 25.5785 13.3149Z" fill="url(#paint1_linear_87_7796)"/>
|
|
10
|
+
<path d="M7.06445 10.7028L18.0423 16.3333L25.5779 13.3148C24.5051 9.11261 20.6237 6 15.9997 6C12.4141 6 9.27508 7.87166 7.54586 10.6716C7.3841 10.6773 7.22358 10.6877 7.06445 10.7028Z" fill="url(#paint2_linear_87_7796)"/>
|
|
11
|
+
<path d="M1.7535 23.1687L18.0425 16.3331L7.06471 10.7026C3.09947 11.0792 0 14.3517 0 18.3331C0 20.1665 0.657197 21.8495 1.7535 23.1687Z" fill="url(#paint3_linear_87_7796)"/>
|
|
12
|
+
</g>
|
|
13
|
+
<defs>
|
|
14
|
+
<linearGradient id="paint0_linear_87_7796" x1="4.42591" y1="24.6668" x2="27.2309" y2="23.2764" gradientUnits="userSpaceOnUse">
|
|
15
|
+
<stop stop-color="#2086B8"/>
|
|
16
|
+
<stop offset="1" stop-color="#46D3F6"/>
|
|
17
|
+
</linearGradient>
|
|
18
|
+
<linearGradient id="paint1_linear_87_7796" x1="23.8302" y1="19.6668" x2="30.2108" y2="15.2082" gradientUnits="userSpaceOnUse">
|
|
19
|
+
<stop stop-color="#1694DB"/>
|
|
20
|
+
<stop offset="1" stop-color="#62C3FE"/>
|
|
21
|
+
</linearGradient>
|
|
22
|
+
<linearGradient id="paint2_linear_87_7796" x1="8.51037" y1="7.33333" x2="23.3335" y2="15.9348" gradientUnits="userSpaceOnUse">
|
|
23
|
+
<stop stop-color="#0D3D78"/>
|
|
24
|
+
<stop offset="1" stop-color="#063B83"/>
|
|
25
|
+
</linearGradient>
|
|
26
|
+
<linearGradient id="paint3_linear_87_7796" x1="-0.340429" y1="19.9998" x2="14.5634" y2="14.4649" gradientUnits="userSpaceOnUse">
|
|
27
|
+
<stop stop-color="#16589B"/>
|
|
28
|
+
<stop offset="1" stop-color="#1464B7"/>
|
|
29
|
+
</linearGradient>
|
|
30
|
+
</defs>
|
|
31
|
+
</svg>
|
|
@@ -100,6 +100,7 @@ require_relative "integrations/source/odoo/client"
|
|
|
100
100
|
require_relative "integrations/source/google_drive/client"
|
|
101
101
|
require_relative "integrations/source/http/client"
|
|
102
102
|
require_relative "integrations/source/aisquared/client"
|
|
103
|
+
require_relative "integrations/source/one_drive/client"
|
|
103
104
|
|
|
104
105
|
# Destination
|
|
105
106
|
require_relative "integrations/destination/klaviyo/client"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: multiwoven-integrations
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.37.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Subin T P
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-06-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: activesupport
|
|
@@ -819,6 +819,10 @@ files:
|
|
|
819
819
|
- lib/multiwoven/integrations/source/odoo/config/meta.json
|
|
820
820
|
- lib/multiwoven/integrations/source/odoo/config/spec.json
|
|
821
821
|
- lib/multiwoven/integrations/source/odoo/icon.svg
|
|
822
|
+
- lib/multiwoven/integrations/source/one_drive/client.rb
|
|
823
|
+
- lib/multiwoven/integrations/source/one_drive/config/meta.json
|
|
824
|
+
- lib/multiwoven/integrations/source/one_drive/config/spec.json
|
|
825
|
+
- lib/multiwoven/integrations/source/one_drive/icon.svg
|
|
822
826
|
- lib/multiwoven/integrations/source/open_ai/client.rb
|
|
823
827
|
- lib/multiwoven/integrations/source/open_ai/config/catalog.json
|
|
824
828
|
- lib/multiwoven/integrations/source/open_ai/config/meta.json
|