connectors_sdk 8.3.0.0.pre.20220510T144908Z → 8.3.0.0.pre.20220517T144653Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 94b75b9fa3a5f0c46a271a34d073f71977629db6095b6364c8710b0aab92374b
4
- data.tar.gz: d133f34052f43e0b8b65ba10eb946c55a6ef421a6296d2d4438e7f4f92b45696
3
+ metadata.gz: b86ca5e489e3cef3b9f2c04a462baf71d6b43805731b0cb52ba2f56f5078d6d3
4
+ data.tar.gz: 044e860f11163e82c63f66276c3d5628b761c5dfcc5168fc8f80b63ca87d19f0
5
5
  SHA512:
6
- metadata.gz: 712b819efcfa755ce19e4cdb7060dbadefecf37321d07bfe4e7bc9b18254e0f11f5fc29967ff171b1314064d77427bde730ab400a4a2dcde89b31dc6344d4a34
7
- data.tar.gz: fa1fedb0c7449b9b2b50b1a56a710178cd3343a108500938244987eb31c694ab0d0214f383df69b6efa6b384eb558697553813c9b93cb142f011b374e05f2c49
6
+ metadata.gz: 528fa5260cf80a3ebb918478e1be2e7cac1668588853ad72b1259095086090c15a0cbc028523b1de8b775b3b64ed7427c6fab7ffccc562d559c8164456b84c4b
7
+ data.tar.gz: a58d353e2b48ffda33aa287d0fa6bb1400c531aae2794995ff3b644b779426d10bb3efc8735d8e07c353fae64bc0b3b0af58427d989f499210a19244b23c1a35
@@ -74,7 +74,7 @@ module ConnectorsSdk
74
74
  faraday.use(*middleware_config)
75
75
  end
76
76
 
77
- faraday.adapter(:httpclient)
77
+ faraday.adapter :httpclient
78
78
  end
79
79
  end
80
80
 
@@ -22,31 +22,31 @@ module ConnectorsSdk
22
22
  )
23
23
  end
24
24
 
25
- def document_batch(params)
25
+ def extract(params)
26
26
  convert_third_party_errors do
27
- results = []
28
-
29
27
  extractor = extractor(params)
30
28
 
31
- extractor.yield_document_changes(:break_after_page => true, :modified_since => extractor.config.cursors['modified_since']) do |action, doc, download_args_and_proc|
29
+ extractor.yield_document_changes(:modified_since => extractor.config.cursors[:modified_since]) do |action, doc, download_args_and_proc|
32
30
  download_obj = nil
33
31
  if download_args_and_proc
34
32
  download_obj = {
35
- id: download_args_and_proc[0],
36
- name: download_args_and_proc[1],
37
- size: download_args_and_proc[2],
38
- download_args: download_args_and_proc[3]
33
+ id: download_args_and_proc[0],
34
+ name: download_args_and_proc[1],
35
+ size: download_args_and_proc[2],
36
+ download_args: download_args_and_proc[3]
39
37
  }
40
38
  end
41
39
 
42
- results << {
43
- :action => action,
44
- :document => doc,
45
- :download => download_obj
40
+ doc = {
41
+ :action => action,
42
+ :document => doc,
43
+ :download => download_obj
46
44
  }
45
+
46
+ yield doc
47
47
  end
48
48
 
49
- [results, extractor.config.cursors, extractor.completed]
49
+ extractor.config.to_h[:cursors]
50
50
  end
51
51
  end
52
52
 
@@ -20,7 +20,7 @@ module ConnectorsSdk
20
20
 
21
21
  ConnectorsSdk::Base::Extractor::TRANSIENT_SERVER_ERROR_CLASSES << Atlassian::CustomClient::ServiceUnavailableError
22
22
 
23
- def yield_document_changes(modified_since: nil, break_after_page: false)
23
+ def yield_document_changes(modified_since: nil)
24
24
  @space_permissions_cache = {}
25
25
  @content_restriction_cache = {}
26
26
  yield_spaces do |space|
@@ -50,11 +50,6 @@ module ConnectorsSdk
50
50
  yield :create_or_update, Confluence::Adapter.es_document_from_confluence_content(content, content_base_url, restrictions)
51
51
  end
52
52
  end
53
-
54
- if break_after_page
55
- @completed = true
56
- break
57
- end
58
53
  end
59
54
  end
60
55
 
@@ -52,7 +52,6 @@ module ConnectorsSdk
52
52
  def initialize(access_token:, cursors: {}, ensure_fresh_auth: nil)
53
53
  @access_token = access_token
54
54
  @cursors = cursors || {}
55
- @cursors[ConnectorsSdk::Office365::Extractor::DRIVE_IDS_CURSOR_KEY] ||= {}
56
55
  super(:ensure_fresh_auth => ensure_fresh_auth)
57
56
  end
58
57
 
@@ -77,12 +76,11 @@ module ConnectorsSdk
77
76
  # recently created groups (new Private Team site will be there) to reduce friction and index this site
78
77
  # earlier.
79
78
  # See: https://github.com/elastic/ent-search/pull/3581
80
- share_point_sites = (sites(:fields => %w[id]) + recent_share_point_group_sites(:fields => %[id]))
79
+ share_point_sites = (sites(:fields => %w[id,name]) + recent_share_point_group_sites(:fields => %w[id,name]))
81
80
 
82
81
  share_point_sites
83
- .map(&:id)
84
- .uniq
85
- .map { |site_id| site_drives(site_id, :fields => fields) }
82
+ .uniq(&:id)
83
+ .map { |site| site_drives(site, :fields => fields) }
86
84
  .flatten
87
85
  .compact
88
86
  end
@@ -104,47 +102,32 @@ module ConnectorsSdk
104
102
  request_all(:endpoint => 'sites/', :fields => fields, :additional_query_params => { :search => '', :top => 10 })
105
103
  end
106
104
 
107
- def site_drives(site_id, fields: [])
105
+ def site_drives(site, fields: [])
108
106
  document_libraries(
109
- request_all(:endpoint => "sites/#{site_id}/drives/", :fields => fields)
110
- )
107
+ request_all(:endpoint => "sites/#{site.id}/drives/", :fields => fields)
108
+ ).map do |drive|
109
+ drive.site_name = site.name
110
+ drive
111
+ end
111
112
  rescue ClientError => e
112
113
  ConnectorsShared::Logger.info("Received response of #{e.status_code} trying to get drive for Site with Id = #{site_id}: #{e.message}")
113
114
  nil
114
115
  end
115
116
 
116
- def list_items(drive_id, fields: [], break_after_page: false)
117
+ def list_items(drive_id, fields: [])
117
118
  # MSFT Graph API does not have a recursive list items, have to do this dfs style
118
-
119
- stack = if break_after_page && cursors['page_cursor'].present?
120
- cursors.delete('page_cursor')
121
- else
122
- [get_root_item(drive_id, ['id']).id]
123
- end
124
-
119
+ stack = [get_root_item(drive_id, ['id']).id]
125
120
  # We rely on the id field below to perform our DFS
126
121
  fields_with_id = fields.any? ? fields | ['id'] : fields
127
- yielded = 0
128
122
  while stack.any?
129
123
  folder_id = stack.pop
130
- item_children(drive_id, folder_id, :fields => fields_with_id, :break_after_page => break_after_page) do |item|
124
+ item_children(drive_id, folder_id, :fields => fields_with_id) do |item|
131
125
  if item.folder
132
126
  stack << item.id
133
127
  end
134
128
  yield item
135
-
136
- yielded += 1
137
129
  end
138
130
 
139
- if break_after_page && yielded >= 100
140
- if cursors['item_children_next_link'].present?
141
- stack << folder_id
142
- end
143
- if stack.any?
144
- cursors['page_cursor'] = stack.dup
145
- break
146
- end
147
- end
148
131
  end
149
132
  end
150
133
 
@@ -152,19 +135,16 @@ module ConnectorsSdk
152
135
  request_endpoint(:endpoint => "drives/#{drive_id}/items/#{item_id}/permissions").value
153
136
  end
154
137
 
155
- def list_changes(drive_id:, start_delta_link: nil, last_modified: nil, break_after_page: false)
138
+ def list_changes(drive_id:, start_delta_link: nil, last_modified: nil)
156
139
  query_params = { :'$select' => %w(id content.downloadUrl lastModifiedDateTime lastModifiedBy root deleted file folder package name webUrl createdBy createdDateTime size).join(',') }
157
140
  response =
158
- if break_after_page && cursors['page_cursor'].present?
159
- request_json(:url => cursors.delete('page_cursor'))
160
- elsif start_delta_link.nil?
141
+ if start_delta_link.nil?
161
142
  endpoint = "drives/#{drive_id}/root/delta"
162
143
  request_endpoint(:endpoint => endpoint, :query_params => query_params)
163
144
  else
164
145
  request_json(:url => start_delta_link, :query_params => query_params)
165
146
  end
166
147
 
167
- yielded = 0
168
148
  loop do
169
149
  response.value.each do |change|
170
150
  # MSFT Graph API does not allow us to view "changes" in chronological order, so if there is no cursor,
@@ -172,25 +152,18 @@ module ConnectorsSdk
172
152
  # since to get another cursor, we would have to go through all the changes anyway
173
153
  next if last_modified.present? && Time.parse(change.lastModifiedDateTime) < last_modified
174
154
  next if change.root # We don't want to index the root of the drive
175
-
176
155
  yield change
177
- yielded += 1
178
- end
179
-
180
- if break_after_page && yielded >= 100 && response['@odata.nextLink'].present?
181
- cursors['page_cursor'] = response['@odata.nextLink']
182
- break
183
156
  end
184
157
 
185
158
  break if response['@odata.nextLink'].nil?
186
159
  response = request_json(:url => response['@odata.nextLink'])
187
160
  end
188
161
 
189
- cursors[ConnectorsSdk::Office365::Extractor::DRIVE_IDS_CURSOR_KEY][drive_id] = response['@odata.deltaLink']
162
+ cursors[drive_id] = response['@odata.deltaLink']
190
163
  end
191
164
 
192
165
  def get_latest_delta_link(drive_id)
193
- cursors[ConnectorsSdk::Office365::Extractor::DRIVE_IDS_CURSOR_KEY][drive_id] || exhaustively_get_delta_link(drive_id)
166
+ cursors[drive_id] || exhaustively_get_delta_link(drive_id)
194
167
  end
195
168
 
196
169
  def exhaustively_get_delta_link(drive_id)
@@ -210,6 +183,7 @@ module ConnectorsSdk
210
183
  def download_item(download_url)
211
184
  request(:url => download_url) do |request|
212
185
  request.options.params_encoder = Office365DownloadParamsEncoder
186
+ request.options.timeout = 30
213
187
  end.body
214
188
  end
215
189
 
@@ -235,7 +209,7 @@ module ConnectorsSdk
235
209
 
236
210
  groups(:fields => %w(id createdDateTime))
237
211
  .select { |group| group.createdDateTime > created_date_time_threshold }
238
- .map { |group| group_root_site(group.id, :fields => %w[id]) }.compact
212
+ .map { |group| group_root_site(group.id, :fields => fields) }.compact
239
213
  end
240
214
 
241
215
  def document_libraries(drives)
@@ -263,30 +237,15 @@ module ConnectorsSdk
263
237
  request_endpoint(:endpoint => "drives/#{drive_id}/root", :query_params => query_params)
264
238
  end
265
239
 
266
- def item_children(drive_id, item_id, fields: [], break_after_page: false, &block)
267
- next_link = cursors.delete('item_children_next_link') if break_after_page
268
-
269
- response = if next_link.present?
270
- request_json(:url => next_link)
271
- else
272
- endpoint = "drives/#{drive_id}/items/#{item_id}/children"
273
- query_params = transform_fields_to_request_query_params(fields)
274
- request_endpoint(:endpoint => endpoint, :query_params => query_params)
275
- end
240
+ def item_children(drive_id, item_id, fields: [], &block)
241
+ endpoint = "drives/#{drive_id}/items/#{item_id}/children"
242
+ query_params = transform_fields_to_request_query_params(fields)
243
+ response = request_endpoint(:endpoint => endpoint, :query_params => query_params)
276
244
 
277
- yielded = 0
278
245
  loop do
279
246
  response.value.each(&block)
280
247
  next_link = response['@odata.nextLink']
281
-
282
248
  break if next_link.nil?
283
-
284
- yielded += response.value.size
285
- if break_after_page && yielded >= 100
286
- cursors['item_children_next_link'] = next_link
287
- break
288
- end
289
-
290
249
  response = request_json(:url => next_link)
291
250
  end
292
251
  end
@@ -14,52 +14,34 @@ module ConnectorsSdk
14
14
  class Extractor < ConnectorsSdk::Base::Extractor
15
15
  DRIVE_IDS_CURSOR_KEY = 'drive_ids'.freeze
16
16
 
17
- def yield_document_changes(modified_since: nil, break_after_page: false, &block)
17
+ def yield_document_changes(modified_since: nil, &block)
18
18
  drives_to_index.each do |drive|
19
19
  drive_id = drive.id
20
-
21
- if break_after_page
22
- current_drive_id = config.cursors['current_drive_id']
23
- if current_drive_id.present? && current_drive_id > drive_id # they come alpha sorted
24
- next
25
- end
26
- config.cursors['current_drive_id'] = drive_id
27
- end
28
-
29
20
  drive_owner_name = drive.dig(:owner, :user, :displayName)
30
21
  drive_name = drive.name
22
+ site_name = drive.site_name
31
23
 
32
24
  drive_id_to_delta_link = config.cursors.fetch(DRIVE_IDS_CURSOR_KEY, {})
33
25
  begin
34
26
  if start_delta_link = drive_id_to_delta_link[drive_id]
35
27
  log_debug("Starting an incremental crawl with cursor for #{service_type.classify} with drive_id: #{drive_id}")
36
28
  begin
37
- yield_changes(drive_id, :start_delta_link => start_delta_link, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :break_after_page => break_after_page, &block)
29
+ yield_changes(drive_id, :start_delta_link => start_delta_link, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
38
30
  rescue ConnectorsSdk::Office365::CustomClient::Office365InvalidCursorsError
39
31
  log_warn("Error listing changes with start_delta_link: #{start_delta_link}, falling back to full crawl")
40
- yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :break_after_page => break_after_page, &block)
32
+ yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
41
33
  end
42
34
  elsif modified_since.present?
43
35
  log_debug("Starting an incremental crawl using last_modified (no cursor found) for #{service_type.classify} with drive_id: #{drive_id}")
44
- yield_changes(drive_id, :last_modified => modified_since, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :break_after_page => break_after_page, &block)
36
+ yield_changes(drive_id, :last_modified => modified_since, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
45
37
  else
46
38
  log_debug("Starting a full crawl #{service_type.classify} with drive_id: #{drive_id}")
47
- yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :break_after_page => break_after_page, &block)
39
+ yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
48
40
  end
49
41
  rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
50
42
  log_warn("Error searching and listing drive #{drive_id}")
51
43
  capture_exception(e)
52
44
  end
53
-
54
- if break_after_page && (config.cursors['page_cursor'].present? || config.cursors['item_children_next_link'].present?)
55
- break
56
- end
57
- end
58
-
59
- if break_after_page && config.cursors['page_cursor'].blank? && config.cursors['item_children_next_link'].blank?
60
- @completed = true
61
- config.overwrite_cursors!(retrieve_latest_cursors)
62
- log_debug("Completed #{modified_since.nil? ? 'full' : 'incremental'} extraction")
63
45
  end
64
46
 
65
47
  nil
@@ -156,11 +138,12 @@ module ConnectorsSdk
156
138
  ConnectorsShared::ExceptionTracking.capture_exception(office365_client_error, options)
157
139
  end
158
140
 
159
- def yield_drive_items(drive_id, drive_owner_name:, drive_name:, break_after_page: false, &block)
160
- client.list_items(drive_id, break_after_page: break_after_page) do |item|
141
+ def yield_drive_items(drive_id, drive_owner_name:, drive_name:, site_name:, &block)
142
+ client.list_items(drive_id) do |item|
161
143
  yield_single_document_change(:identifier => "Office365 change: #{item&.id} (#{Office365::Adapter::GraphItem.get_path(item)})") do
162
144
  item.drive_owner_name = drive_owner_name
163
145
  item.drive_name = drive_name
146
+ item.site_name = site_name
164
147
  yield_create_or_update(drive_id, item, &block)
165
148
  end
166
149
  end
@@ -174,11 +157,12 @@ module ConnectorsSdk
174
157
  end
175
158
  end
176
159
 
177
- def yield_changes(drive_id, drive_owner_name:, drive_name:, start_delta_link: nil, last_modified: nil, break_after_page: false, &block)
178
- client.list_changes(:drive_id => drive_id, :start_delta_link => start_delta_link, :last_modified => last_modified, :break_after_page => break_after_page) do |item|
160
+ def yield_changes(drive_id, drive_owner_name:, drive_name:, site_name:, start_delta_link: nil, last_modified: nil, &block)
161
+ client.list_changes(:drive_id => drive_id, :start_delta_link => start_delta_link, :last_modified => last_modified) do |item|
179
162
  yield_single_document_change(:identifier => "Office365 change: #{item&.id} (#{Office365::Adapter::GraphItem.get_path(item)})") do
180
163
  item.drive_owner_name = drive_owner_name
181
164
  item.drive_name = drive_name
165
+ item.site_name = site_name
182
166
  yield_correct_actions_and_converted_item(drive_id, item, &block)
183
167
  end
184
168
  end
@@ -10,6 +10,12 @@ require 'connectors_sdk/office365/adapter'
10
10
 
11
11
  module ConnectorsSdk
12
12
  module SharePoint
13
+ module SitePrefix
14
+ def get_path(item)
15
+ item.site_name.present? ? "/sites/#{item.site_name}#{super}" : super
16
+ end
17
+ end
18
+
13
19
  class Adapter < Office365::Adapter
14
20
  generate_id_helpers :share_point, 'share_point'
15
21
 
@@ -26,18 +32,24 @@ module ConnectorsSdk
26
32
  end
27
33
 
28
34
  class FileGraphItem < Office365::Adapter::FileGraphItem
35
+ include SitePrefix
36
+
29
37
  def self.convert_id_to_es_id(id)
30
38
  ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
31
39
  end
32
40
  end
33
41
 
34
42
  class FolderGraphItem < Office365::Adapter::FolderGraphItem
43
+ include SitePrefix
44
+
35
45
  def self.convert_id_to_es_id(id)
36
46
  ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
37
47
  end
38
48
  end
39
49
 
40
50
  class PackageGraphItem < Office365::Adapter::PackageGraphItem
51
+ include SitePrefix
52
+
41
53
  def self.convert_id_to_es_id(id)
42
54
  ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
43
55
  end
@@ -0,0 +1,18 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ module ConnectorsShared
8
+ class JobStatus
9
+ CREATED = 'created'
10
+ RUNNING = 'running'
11
+ FINISHED = 'finished'
12
+ FAILED = 'failed'
13
+
14
+ def self.is_valid?(status)
15
+ [CREATED, RUNNING, FINISHED, FAILED].include? status
16
+ end
17
+ end
18
+ end
@@ -8,5 +8,6 @@ require 'connectors_shared/constants'
8
8
  require 'connectors_shared/errors'
9
9
  require 'connectors_shared/exception_tracking'
10
10
  require 'connectors_shared/extension_mapping_util'
11
+ require 'connectors_shared/job_status'
11
12
  require 'connectors_shared/logger'
12
13
  require 'connectors_shared/monitor'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.3.0.0.pre.20220510T144908Z
4
+ version: 8.3.0.0.pre.20220517T144653Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-10 00:00:00.000000000 Z
11
+ date: 2022-05-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -92,6 +92,7 @@ files:
92
92
  - lib/connectors_shared/exception_tracking.rb
93
93
  - lib/connectors_shared/extension_mapping_util.rb
94
94
  - lib/connectors_shared/extraction_utils.rb
95
+ - lib/connectors_shared/job_status.rb
95
96
  - lib/connectors_shared/logger.rb
96
97
  - lib/connectors_shared/middleware/basic_auth.rb
97
98
  - lib/connectors_shared/middleware/bearer_auth.rb
@@ -102,7 +103,7 @@ homepage: https://github.com/elastic/connectors
102
103
  licenses:
103
104
  - Elastic-2.0
104
105
  metadata:
105
- revision: fb1187beef857b555633e1804eef3ed5e586091d
106
+ revision: 9f25f35e17ffb36dfda754d657794ed9b5d2d75a
106
107
  repository: git@github.com:elastic/connectors.git
107
108
  post_install_message:
108
109
  rdoc_options: []