connectors_sdk 8.3.0.0.pre.20220510T144908Z → 8.3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/connectors_sdk/atlassian/custom_client.rb +15 -6
- data/lib/connectors_sdk/base/{http_call_wrapper.rb → connector.rb} +54 -23
- data/lib/connectors_sdk/base/custom_client.rb +1 -1
- data/lib/connectors_sdk/base/extractor.rb +3 -0
- data/lib/connectors_sdk/base/registry.rb +9 -4
- data/lib/connectors_sdk/confluence/extractor.rb +2 -7
- data/lib/connectors_sdk/confluence_cloud/connector.rb +110 -0
- data/lib/connectors_sdk/confluence_cloud/extractor.rb +2 -1
- data/lib/connectors_sdk/gitlab/adapter.rb +42 -0
- data/lib/connectors_sdk/gitlab/config.rb +26 -0
- data/lib/connectors_sdk/gitlab/connector.rb +71 -0
- data/lib/connectors_sdk/gitlab/custom_client.rb +40 -0
- data/lib/connectors_sdk/gitlab/extractor.rb +123 -0
- data/lib/connectors_sdk/office365/custom_client.rb +22 -63
- data/lib/connectors_sdk/office365/extractor.rb +12 -28
- data/lib/connectors_sdk/share_point/adapter.rb +12 -0
- data/lib/connectors_sdk/share_point/{http_call_wrapper.rb → connector.rb} +30 -6
- data/lib/connectors_sdk/stub_connector/connector.rb +62 -0
- data/lib/connectors_shared/constants.rb +12 -0
- data/lib/connectors_shared/job_status.rb +18 -0
- data/lib/connectors_shared/middleware/restrict_hostnames.rb +1 -1
- data/lib/connectors_shared.rb +1 -0
- metadata +131 -13
- data/lib/connectors_sdk/confluence_cloud/http_call_wrapper.rb +0 -59
- data/lib/stubs/enterprise_search/exception_tracking.rb +0 -43
@@ -0,0 +1,123 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors_sdk/base/extractor'
|
10
|
+
require 'connectors_sdk/gitlab/custom_client'
|
11
|
+
require 'connectors_sdk/gitlab/adapter'
|
12
|
+
require 'connectors_sdk/gitlab/config'
|
13
|
+
require 'rack/utils'
|
14
|
+
|
15
|
+
module ConnectorsSdk
|
16
|
+
module GitLab
|
17
|
+
class Extractor < ConnectorsSdk::Base::Extractor
|
18
|
+
PAGE_SIZE = 100 # max is 100
|
19
|
+
|
20
|
+
def yield_document_changes(modified_since: nil)
|
21
|
+
query_params = {
|
22
|
+
:pagination => :keyset,
|
23
|
+
:per_page => PAGE_SIZE,
|
24
|
+
:order_by => :id,
|
25
|
+
:sort => :desc
|
26
|
+
}
|
27
|
+
# looks like it's an incremental sync
|
28
|
+
if modified_since.present?
|
29
|
+
date_since = modified_since.is_a?(Time) ? modified_since : Time.new(modified_since)
|
30
|
+
query_params[:last_activity_after] = date_since.iso8601
|
31
|
+
end
|
32
|
+
|
33
|
+
next_page_link = nil
|
34
|
+
|
35
|
+
loop do
|
36
|
+
if next_page_link.present?
|
37
|
+
if (matcher = /(https?:[^>]*)/.match(next_page_link))
|
38
|
+
clean_query = URI.parse(matcher.captures[0]).query
|
39
|
+
query_params = Rack::Utils.parse_query(clean_query)
|
40
|
+
else
|
41
|
+
raise "Next page link has unexpected format: #{next_page_link}"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
response = client.get('projects', query_params)
|
45
|
+
|
46
|
+
JSON.parse(response.body).map do |doc|
|
47
|
+
doc = doc.with_indifferent_access
|
48
|
+
if config.index_permissions
|
49
|
+
doc = doc.merge(project_permissions(doc[:id], doc[:visibility]))
|
50
|
+
end
|
51
|
+
yield :create_or_update, ConnectorsSdk::GitLab::Adapter.to_es_document(:project, doc), nil
|
52
|
+
end
|
53
|
+
|
54
|
+
next_page_link = response.headers['Link'] || nil
|
55
|
+
break unless next_page_link.present?
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def yield_deleted_ids(ids)
|
60
|
+
if ids.present?
|
61
|
+
ids.each do |id|
|
62
|
+
response = client.get("projects/#{id}")
|
63
|
+
if response.status == 404
|
64
|
+
# not found - assume deleted
|
65
|
+
yield id
|
66
|
+
else
|
67
|
+
unless response.success?
|
68
|
+
raise "Could not get a project by ID: #{id}, response code: #{response.status}, response: #{response.body}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def yield_permissions(source_user_id)
|
76
|
+
result = []
|
77
|
+
if source_user_id.present?
|
78
|
+
result.push("user:#{source_user_id}")
|
79
|
+
|
80
|
+
user_response = client.get("users/#{source_user_id}")
|
81
|
+
if user_response.success?
|
82
|
+
username = JSON.parse(user_response.body).with_indifferent_access[:username]
|
83
|
+
query = { :external => true, :username => username }
|
84
|
+
external_response = client.get('users', query)
|
85
|
+
if external_response.success?
|
86
|
+
external_users = Hashie::Array.new(JSON.parse(external_response.body))
|
87
|
+
if external_users.empty?
|
88
|
+
# the user is not external
|
89
|
+
result.push('type:internal')
|
90
|
+
end
|
91
|
+
else
|
92
|
+
raise "Could not check external user status by ID: #{source_user_id}"
|
93
|
+
end
|
94
|
+
else
|
95
|
+
raise "User isn't found by ID: #{source_user_id}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
yield result
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
def project_permissions(id, visibility)
|
104
|
+
result = []
|
105
|
+
if visibility.to_sym == :public || !config.index_permissions
|
106
|
+
# visible-to-all
|
107
|
+
return {}
|
108
|
+
end
|
109
|
+
if visibility.to_sym == :internal
|
110
|
+
result.push('type:internal')
|
111
|
+
end
|
112
|
+
response = client.get("projects/#{id}/members/all")
|
113
|
+
if response.success?
|
114
|
+
members = Hashie::Array.new(JSON.parse(response.body))
|
115
|
+
result.concat(members.map { |user| "user:#{user[:id]}" })
|
116
|
+
else
|
117
|
+
raise "Could not get project members by project ID: #{id}, response code: #{response.status}, response: #{response.body}"
|
118
|
+
end
|
119
|
+
{ :_allow_permissions => result }
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -52,7 +52,6 @@ module ConnectorsSdk
|
|
52
52
|
def initialize(access_token:, cursors: {}, ensure_fresh_auth: nil)
|
53
53
|
@access_token = access_token
|
54
54
|
@cursors = cursors || {}
|
55
|
-
@cursors[ConnectorsSdk::Office365::Extractor::DRIVE_IDS_CURSOR_KEY] ||= {}
|
56
55
|
super(:ensure_fresh_auth => ensure_fresh_auth)
|
57
56
|
end
|
58
57
|
|
@@ -77,12 +76,11 @@ module ConnectorsSdk
|
|
77
76
|
# recently created groups (new Private Team site will be there) to reduce friction and index this site
|
78
77
|
# earlier.
|
79
78
|
# See: https://github.com/elastic/ent-search/pull/3581
|
80
|
-
share_point_sites = (sites(:fields => %w[id]) + recent_share_point_group_sites(:fields => %[id]))
|
79
|
+
share_point_sites = (sites(:fields => %w[id,name]) + recent_share_point_group_sites(:fields => %w[id,name]))
|
81
80
|
|
82
81
|
share_point_sites
|
83
|
-
.
|
84
|
-
.
|
85
|
-
.map { |site_id| site_drives(site_id, :fields => fields) }
|
82
|
+
.uniq(&:id)
|
83
|
+
.map { |site| site_drives(site, :fields => fields) }
|
86
84
|
.flatten
|
87
85
|
.compact
|
88
86
|
end
|
@@ -104,47 +102,32 @@ module ConnectorsSdk
|
|
104
102
|
request_all(:endpoint => 'sites/', :fields => fields, :additional_query_params => { :search => '', :top => 10 })
|
105
103
|
end
|
106
104
|
|
107
|
-
def site_drives(
|
105
|
+
def site_drives(site, fields: [])
|
108
106
|
document_libraries(
|
109
|
-
request_all(:endpoint => "sites/#{
|
110
|
-
)
|
107
|
+
request_all(:endpoint => "sites/#{site.id}/drives/", :fields => fields)
|
108
|
+
).map do |drive|
|
109
|
+
drive.site_name = site.name
|
110
|
+
drive
|
111
|
+
end
|
111
112
|
rescue ClientError => e
|
112
113
|
ConnectorsShared::Logger.info("Received response of #{e.status_code} trying to get drive for Site with Id = #{site_id}: #{e.message}")
|
113
114
|
nil
|
114
115
|
end
|
115
116
|
|
116
|
-
def list_items(drive_id, fields: []
|
117
|
+
def list_items(drive_id, fields: [])
|
117
118
|
# MSFT Graph API does not have a recursive list items, have to do this dfs style
|
118
|
-
|
119
|
-
stack = if break_after_page && cursors['page_cursor'].present?
|
120
|
-
cursors.delete('page_cursor')
|
121
|
-
else
|
122
|
-
[get_root_item(drive_id, ['id']).id]
|
123
|
-
end
|
124
|
-
|
119
|
+
stack = [get_root_item(drive_id, ['id']).id]
|
125
120
|
# We rely on the id field below to perform our DFS
|
126
121
|
fields_with_id = fields.any? ? fields | ['id'] : fields
|
127
|
-
yielded = 0
|
128
122
|
while stack.any?
|
129
123
|
folder_id = stack.pop
|
130
|
-
item_children(drive_id, folder_id, :fields => fields_with_id
|
124
|
+
item_children(drive_id, folder_id, :fields => fields_with_id) do |item|
|
131
125
|
if item.folder
|
132
126
|
stack << item.id
|
133
127
|
end
|
134
128
|
yield item
|
135
|
-
|
136
|
-
yielded += 1
|
137
129
|
end
|
138
130
|
|
139
|
-
if break_after_page && yielded >= 100
|
140
|
-
if cursors['item_children_next_link'].present?
|
141
|
-
stack << folder_id
|
142
|
-
end
|
143
|
-
if stack.any?
|
144
|
-
cursors['page_cursor'] = stack.dup
|
145
|
-
break
|
146
|
-
end
|
147
|
-
end
|
148
131
|
end
|
149
132
|
end
|
150
133
|
|
@@ -152,19 +135,16 @@ module ConnectorsSdk
|
|
152
135
|
request_endpoint(:endpoint => "drives/#{drive_id}/items/#{item_id}/permissions").value
|
153
136
|
end
|
154
137
|
|
155
|
-
def list_changes(drive_id:, start_delta_link: nil, last_modified: nil
|
138
|
+
def list_changes(drive_id:, start_delta_link: nil, last_modified: nil)
|
156
139
|
query_params = { :'$select' => %w(id content.downloadUrl lastModifiedDateTime lastModifiedBy root deleted file folder package name webUrl createdBy createdDateTime size).join(',') }
|
157
140
|
response =
|
158
|
-
if
|
159
|
-
request_json(:url => cursors.delete('page_cursor'))
|
160
|
-
elsif start_delta_link.nil?
|
141
|
+
if start_delta_link.nil?
|
161
142
|
endpoint = "drives/#{drive_id}/root/delta"
|
162
143
|
request_endpoint(:endpoint => endpoint, :query_params => query_params)
|
163
144
|
else
|
164
145
|
request_json(:url => start_delta_link, :query_params => query_params)
|
165
146
|
end
|
166
147
|
|
167
|
-
yielded = 0
|
168
148
|
loop do
|
169
149
|
response.value.each do |change|
|
170
150
|
# MSFT Graph API does not allow us to view "changes" in chronological order, so if there is no cursor,
|
@@ -172,25 +152,18 @@ module ConnectorsSdk
|
|
172
152
|
# since to get another cursor, we would have to go through all the changes anyway
|
173
153
|
next if last_modified.present? && Time.parse(change.lastModifiedDateTime) < last_modified
|
174
154
|
next if change.root # We don't want to index the root of the drive
|
175
|
-
|
176
155
|
yield change
|
177
|
-
yielded += 1
|
178
|
-
end
|
179
|
-
|
180
|
-
if break_after_page && yielded >= 100 && response['@odata.nextLink'].present?
|
181
|
-
cursors['page_cursor'] = response['@odata.nextLink']
|
182
|
-
break
|
183
156
|
end
|
184
157
|
|
185
158
|
break if response['@odata.nextLink'].nil?
|
186
159
|
response = request_json(:url => response['@odata.nextLink'])
|
187
160
|
end
|
188
161
|
|
189
|
-
cursors[
|
162
|
+
cursors[drive_id] = response['@odata.deltaLink']
|
190
163
|
end
|
191
164
|
|
192
165
|
def get_latest_delta_link(drive_id)
|
193
|
-
cursors[
|
166
|
+
cursors[drive_id] || exhaustively_get_delta_link(drive_id)
|
194
167
|
end
|
195
168
|
|
196
169
|
def exhaustively_get_delta_link(drive_id)
|
@@ -210,6 +183,7 @@ module ConnectorsSdk
|
|
210
183
|
def download_item(download_url)
|
211
184
|
request(:url => download_url) do |request|
|
212
185
|
request.options.params_encoder = Office365DownloadParamsEncoder
|
186
|
+
request.options.timeout = 30
|
213
187
|
end.body
|
214
188
|
end
|
215
189
|
|
@@ -235,7 +209,7 @@ module ConnectorsSdk
|
|
235
209
|
|
236
210
|
groups(:fields => %w(id createdDateTime))
|
237
211
|
.select { |group| group.createdDateTime > created_date_time_threshold }
|
238
|
-
.map { |group| group_root_site(group.id, :fields =>
|
212
|
+
.map { |group| group_root_site(group.id, :fields => fields) }.compact
|
239
213
|
end
|
240
214
|
|
241
215
|
def document_libraries(drives)
|
@@ -263,30 +237,15 @@ module ConnectorsSdk
|
|
263
237
|
request_endpoint(:endpoint => "drives/#{drive_id}/root", :query_params => query_params)
|
264
238
|
end
|
265
239
|
|
266
|
-
def item_children(drive_id, item_id, fields: [],
|
267
|
-
|
268
|
-
|
269
|
-
response =
|
270
|
-
request_json(:url => next_link)
|
271
|
-
else
|
272
|
-
endpoint = "drives/#{drive_id}/items/#{item_id}/children"
|
273
|
-
query_params = transform_fields_to_request_query_params(fields)
|
274
|
-
request_endpoint(:endpoint => endpoint, :query_params => query_params)
|
275
|
-
end
|
240
|
+
def item_children(drive_id, item_id, fields: [], &block)
|
241
|
+
endpoint = "drives/#{drive_id}/items/#{item_id}/children"
|
242
|
+
query_params = transform_fields_to_request_query_params(fields)
|
243
|
+
response = request_endpoint(:endpoint => endpoint, :query_params => query_params)
|
276
244
|
|
277
|
-
yielded = 0
|
278
245
|
loop do
|
279
246
|
response.value.each(&block)
|
280
247
|
next_link = response['@odata.nextLink']
|
281
|
-
|
282
248
|
break if next_link.nil?
|
283
|
-
|
284
|
-
yielded += response.value.size
|
285
|
-
if break_after_page && yielded >= 100
|
286
|
-
cursors['item_children_next_link'] = next_link
|
287
|
-
break
|
288
|
-
end
|
289
|
-
|
290
249
|
response = request_json(:url => next_link)
|
291
250
|
end
|
292
251
|
end
|
@@ -14,52 +14,34 @@ module ConnectorsSdk
|
|
14
14
|
class Extractor < ConnectorsSdk::Base::Extractor
|
15
15
|
DRIVE_IDS_CURSOR_KEY = 'drive_ids'.freeze
|
16
16
|
|
17
|
-
def yield_document_changes(modified_since: nil,
|
17
|
+
def yield_document_changes(modified_since: nil, &block)
|
18
18
|
drives_to_index.each do |drive|
|
19
19
|
drive_id = drive.id
|
20
|
-
|
21
|
-
if break_after_page
|
22
|
-
current_drive_id = config.cursors['current_drive_id']
|
23
|
-
if current_drive_id.present? && current_drive_id > drive_id # they come alpha sorted
|
24
|
-
next
|
25
|
-
end
|
26
|
-
config.cursors['current_drive_id'] = drive_id
|
27
|
-
end
|
28
|
-
|
29
20
|
drive_owner_name = drive.dig(:owner, :user, :displayName)
|
30
21
|
drive_name = drive.name
|
22
|
+
site_name = drive.site_name
|
31
23
|
|
32
24
|
drive_id_to_delta_link = config.cursors.fetch(DRIVE_IDS_CURSOR_KEY, {})
|
33
25
|
begin
|
34
26
|
if start_delta_link = drive_id_to_delta_link[drive_id]
|
35
27
|
log_debug("Starting an incremental crawl with cursor for #{service_type.classify} with drive_id: #{drive_id}")
|
36
28
|
begin
|
37
|
-
yield_changes(drive_id, :start_delta_link => start_delta_link, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :
|
29
|
+
yield_changes(drive_id, :start_delta_link => start_delta_link, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
|
38
30
|
rescue ConnectorsSdk::Office365::CustomClient::Office365InvalidCursorsError
|
39
31
|
log_warn("Error listing changes with start_delta_link: #{start_delta_link}, falling back to full crawl")
|
40
|
-
yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :
|
32
|
+
yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
|
41
33
|
end
|
42
34
|
elsif modified_since.present?
|
43
35
|
log_debug("Starting an incremental crawl using last_modified (no cursor found) for #{service_type.classify} with drive_id: #{drive_id}")
|
44
|
-
yield_changes(drive_id, :last_modified => modified_since, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :
|
36
|
+
yield_changes(drive_id, :last_modified => modified_since, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
|
45
37
|
else
|
46
38
|
log_debug("Starting a full crawl #{service_type.classify} with drive_id: #{drive_id}")
|
47
|
-
yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :
|
39
|
+
yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
|
48
40
|
end
|
49
41
|
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
|
50
42
|
log_warn("Error searching and listing drive #{drive_id}")
|
51
43
|
capture_exception(e)
|
52
44
|
end
|
53
|
-
|
54
|
-
if break_after_page && (config.cursors['page_cursor'].present? || config.cursors['item_children_next_link'].present?)
|
55
|
-
break
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
if break_after_page && config.cursors['page_cursor'].blank? && config.cursors['item_children_next_link'].blank?
|
60
|
-
@completed = true
|
61
|
-
config.overwrite_cursors!(retrieve_latest_cursors)
|
62
|
-
log_debug("Completed #{modified_since.nil? ? 'full' : 'incremental'} extraction")
|
63
45
|
end
|
64
46
|
|
65
47
|
nil
|
@@ -156,11 +138,12 @@ module ConnectorsSdk
|
|
156
138
|
ConnectorsShared::ExceptionTracking.capture_exception(office365_client_error, options)
|
157
139
|
end
|
158
140
|
|
159
|
-
def yield_drive_items(drive_id, drive_owner_name:, drive_name:,
|
160
|
-
client.list_items(drive_id
|
141
|
+
def yield_drive_items(drive_id, drive_owner_name:, drive_name:, site_name:, &block)
|
142
|
+
client.list_items(drive_id) do |item|
|
161
143
|
yield_single_document_change(:identifier => "Office365 change: #{item&.id} (#{Office365::Adapter::GraphItem.get_path(item)})") do
|
162
144
|
item.drive_owner_name = drive_owner_name
|
163
145
|
item.drive_name = drive_name
|
146
|
+
item.site_name = site_name
|
164
147
|
yield_create_or_update(drive_id, item, &block)
|
165
148
|
end
|
166
149
|
end
|
@@ -174,11 +157,12 @@ module ConnectorsSdk
|
|
174
157
|
end
|
175
158
|
end
|
176
159
|
|
177
|
-
def yield_changes(drive_id, drive_owner_name:, drive_name:, start_delta_link: nil, last_modified: nil,
|
178
|
-
client.list_changes(:drive_id => drive_id, :start_delta_link => start_delta_link, :last_modified => last_modified
|
160
|
+
def yield_changes(drive_id, drive_owner_name:, drive_name:, site_name:, start_delta_link: nil, last_modified: nil, &block)
|
161
|
+
client.list_changes(:drive_id => drive_id, :start_delta_link => start_delta_link, :last_modified => last_modified) do |item|
|
179
162
|
yield_single_document_change(:identifier => "Office365 change: #{item&.id} (#{Office365::Adapter::GraphItem.get_path(item)})") do
|
180
163
|
item.drive_owner_name = drive_owner_name
|
181
164
|
item.drive_name = drive_name
|
165
|
+
item.site_name = site_name
|
182
166
|
yield_correct_actions_and_converted_item(drive_id, item, &block)
|
183
167
|
end
|
184
168
|
end
|
@@ -10,6 +10,12 @@ require 'connectors_sdk/office365/adapter'
|
|
10
10
|
|
11
11
|
module ConnectorsSdk
|
12
12
|
module SharePoint
|
13
|
+
module SitePrefix
|
14
|
+
def get_path(item)
|
15
|
+
item.site_name.present? ? "/sites/#{item.site_name}#{super}" : super
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
13
19
|
class Adapter < Office365::Adapter
|
14
20
|
generate_id_helpers :share_point, 'share_point'
|
15
21
|
|
@@ -26,18 +32,24 @@ module ConnectorsSdk
|
|
26
32
|
end
|
27
33
|
|
28
34
|
class FileGraphItem < Office365::Adapter::FileGraphItem
|
35
|
+
include SitePrefix
|
36
|
+
|
29
37
|
def self.convert_id_to_es_id(id)
|
30
38
|
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
31
39
|
end
|
32
40
|
end
|
33
41
|
|
34
42
|
class FolderGraphItem < Office365::Adapter::FolderGraphItem
|
43
|
+
include SitePrefix
|
44
|
+
|
35
45
|
def self.convert_id_to_es_id(id)
|
36
46
|
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
37
47
|
end
|
38
48
|
end
|
39
49
|
|
40
50
|
class PackageGraphItem < Office365::Adapter::PackageGraphItem
|
51
|
+
include SitePrefix
|
52
|
+
|
41
53
|
def self.convert_id_to_es_id(id)
|
42
54
|
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
43
55
|
end
|
@@ -6,22 +6,46 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
+
require 'connectors_sdk/base/connector'
|
9
10
|
require 'connectors_sdk/office365/config'
|
10
11
|
require 'connectors_sdk/share_point/extractor'
|
11
12
|
require 'connectors_sdk/share_point/authorization'
|
12
|
-
require 'connectors_sdk/base/http_call_wrapper'
|
13
13
|
|
14
14
|
module ConnectorsSdk
|
15
15
|
module SharePoint
|
16
|
-
class
|
16
|
+
class Connector < ConnectorsSdk::Base::Connector
|
17
17
|
SERVICE_TYPE = 'share_point'
|
18
18
|
|
19
|
-
def
|
20
|
-
|
19
|
+
def compare_secrets(params)
|
20
|
+
missing_secrets?(params)
|
21
|
+
|
22
|
+
previous_user = client(:access_token => params[:other_secret][:access_token]).me
|
23
|
+
equivalent = previous_user.nil? ? false : previous_user.id == client(:access_token => params[:secret][:access_token]).me&.id
|
24
|
+
|
25
|
+
{
|
26
|
+
:equivalent => equivalent
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def display_name
|
31
|
+
'SharePoint Online'
|
32
|
+
end
|
33
|
+
|
34
|
+
def connection_requires_redirect
|
35
|
+
true
|
21
36
|
end
|
22
37
|
|
23
|
-
def
|
24
|
-
|
38
|
+
def configurable_fields
|
39
|
+
[
|
40
|
+
{
|
41
|
+
'key' => 'client_id',
|
42
|
+
'label' => 'Client ID'
|
43
|
+
},
|
44
|
+
{
|
45
|
+
'key' => 'client_secret',
|
46
|
+
'label' => 'Client Secret'
|
47
|
+
},
|
48
|
+
]
|
25
49
|
end
|
26
50
|
|
27
51
|
private
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors_sdk/base/connector'
|
10
|
+
|
11
|
+
module ConnectorsSdk
|
12
|
+
module StubConnector
|
13
|
+
class Connector < ConnectorsSdk::Base::Connector
|
14
|
+
SERVICE_TYPE = 'stub_connector'
|
15
|
+
|
16
|
+
def display_name
|
17
|
+
'Stub Connector'
|
18
|
+
end
|
19
|
+
|
20
|
+
def configurable_fields
|
21
|
+
[
|
22
|
+
{
|
23
|
+
'key' => 'third_party_url',
|
24
|
+
'label' => 'Third Party URL'
|
25
|
+
},
|
26
|
+
{
|
27
|
+
'key' => 'third_party_api_key',
|
28
|
+
'label' => 'Third Party API Key'
|
29
|
+
}
|
30
|
+
]
|
31
|
+
end
|
32
|
+
|
33
|
+
def health_check(_params)
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
37
|
+
def document_batch(_params)
|
38
|
+
results = 30.times.map do |i|
|
39
|
+
{
|
40
|
+
:action => :create_or_update,
|
41
|
+
:document => {
|
42
|
+
:id => "document_#{i}",
|
43
|
+
:type => 'document',
|
44
|
+
:body => "contents for document number: #{i}"
|
45
|
+
},
|
46
|
+
:download => nil
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
[results, {}, true]
|
51
|
+
end
|
52
|
+
|
53
|
+
def deleted(_params)
|
54
|
+
[]
|
55
|
+
end
|
56
|
+
|
57
|
+
def permissions(_params)
|
58
|
+
[]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -10,5 +10,17 @@ module ConnectorsShared
|
|
10
10
|
SUBEXTRACTOR_RESERVED_FIELDS = %w[_subextracted_as_of _subextracted_version].freeze
|
11
11
|
ALLOW_FIELD = '_allow_permissions'.freeze
|
12
12
|
DENY_FIELD = '_deny_permissions'.freeze
|
13
|
+
|
14
|
+
# The following section reads as following:
|
15
|
+
# The job will extract documents until the job queue size will reach
|
16
|
+
# JOB_QUEUE_SIZE_IDLE_THRESHOLD items. After that, the job will attempt to sleep
|
17
|
+
# for IDLE_SLEEP_TIME seconds and check the queue size again. If the queue is still
|
18
|
+
# full, it will sleep for maximum MAX_IDDLE_ATTEMPTS times, and if the queue is still
|
19
|
+
# full, then job will be terminated.
|
20
|
+
JOB_QUEUE_SIZE_IDLE_THRESHOLD = 500 # How many documents the job queue stores until it sleeps
|
21
|
+
IDLE_SLEEP_TIME = 10 # For how long job queue will sleep before checking the queue size again
|
22
|
+
MAX_IDLE_ATTEMPTS = 30 # How many consecutive times job will try to sleep until it's destroyed
|
23
|
+
|
24
|
+
STALE_JOB_TIMEOUT = 60 * 30 # Time in seconds after which the job will be cleaned up if the job is considered stuck
|
13
25
|
end
|
14
26
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
module ConnectorsShared
|
8
|
+
class JobStatus
|
9
|
+
CREATED = 'created'
|
10
|
+
RUNNING = 'running'
|
11
|
+
FINISHED = 'finished'
|
12
|
+
FAILED = 'failed'
|
13
|
+
|
14
|
+
def self.is_valid?(status)
|
15
|
+
[CREATED, RUNNING, FINISHED, FAILED].include? status
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -36,7 +36,7 @@ module ConnectorsShared
|
|
36
36
|
def ips_from_hosts(hosts)
|
37
37
|
hosts&.flat_map do |host|
|
38
38
|
if URL_PATTERN.match(host)
|
39
|
-
lookup_ips(URI.parse(host).
|
39
|
+
lookup_ips(Addressable::URI.parse(host).hostname)
|
40
40
|
elsif Resolv::IPv4::Regex.match(host) || Resolv::IPv6::Regex.match(host)
|
41
41
|
IPAddr.new(host)
|
42
42
|
else
|
data/lib/connectors_shared.rb
CHANGED
@@ -8,5 +8,6 @@ require 'connectors_shared/constants'
|
|
8
8
|
require 'connectors_shared/errors'
|
9
9
|
require 'connectors_shared/exception_tracking'
|
10
10
|
require 'connectors_shared/extension_mapping_util'
|
11
|
+
require 'connectors_shared/job_status'
|
11
12
|
require 'connectors_shared/logger'
|
12
13
|
require 'connectors_shared/monitor'
|