aws-sdk-kendra 1.26.0 → 1.27.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db6db0d7989bf395d9d66113616dd832292db250ceae884e79f1ff68fc72c2da
4
- data.tar.gz: 5d5c1a752c7eb99a07c0c60f67b913244b2731f0301eb4918171af9bdbe95ba6
3
+ metadata.gz: 18f9a0673af8278a3066d865646525130986d0d6fce53f4a84fc8b10efb26e21
4
+ data.tar.gz: 1b782bf81e35136fc3b2fd540bcc97821f38707b6159977a0e6243bc51b33253
5
5
  SHA512:
6
- metadata.gz: 3c11d993010e54b7ba55c7503fa00b331b0b34da815eac768955ec402bc436885327483dceac3b85a46da932002ad9be3049b3db2bac82eae20ed84900e5be53
7
- data.tar.gz: 13d14d88704eb78936b534371ef132863b4db06403b68648b7e3a03a53da263a1a2e5641e6cc5d5678241270af4b94960de35301e5cfb0c1fc1f3062400f121a
6
+ metadata.gz: e91319bbb2301cbf8c9fb6900922854688faf4fac89b62c2ce9c5e1f5f36265e33cc3e5faa5dbfb581128c06ea406acc49be1b6c168c9920707b87aa28dffd72
7
+ data.tar.gz: ab38e61328b8e4b91f9354a2f23e74b98040d4477742ad86219cfc5de2c79e3ca36423574a13dbe2c6de1b5d45e1a36de594a36d8d9e29cd464a08a760986408
data/CHANGELOG.md CHANGED
@@ -1,6 +1,11 @@
1
1
  Unreleased Changes
2
2
  ------------------
3
3
 
4
+ 1.27.0 (2021-06-17)
5
+ ------------------
6
+
7
+ * Feature - Amazon Kendra now supports the indexing of web documents for search through the web crawler.
8
+
4
9
  1.26.0 (2021-06-09)
5
10
  ------------------
6
11
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.26.0
1
+ 1.27.0
@@ -48,6 +48,6 @@ require_relative 'aws-sdk-kendra/customizations'
48
48
  # @!group service
49
49
  module Aws::Kendra
50
50
 
51
- GEM_VERSION = '1.26.0'
51
+ GEM_VERSION = '1.27.0'
52
52
 
53
53
  end
@@ -679,7 +679,7 @@ module Aws::Kendra
679
679
  # resp = client.create_data_source({
680
680
  # name: "DataSourceName", # required
681
681
  # index_id: "IndexId", # required
682
- # type: "S3", # required, accepts S3, SHAREPOINT, DATABASE, SALESFORCE, ONEDRIVE, SERVICENOW, CUSTOM, CONFLUENCE, GOOGLEDRIVE
682
+ # type: "S3", # required, accepts S3, SHAREPOINT, DATABASE, SALESFORCE, ONEDRIVE, SERVICENOW, CUSTOM, CONFLUENCE, GOOGLEDRIVE, WEBCRAWLER
683
683
  # configuration: {
684
684
  # s3_configuration: {
685
685
  # bucket_name: "S3BucketName", # required
@@ -942,6 +942,37 @@ module Aws::Kendra
942
942
  # exclude_user_accounts: ["UserAccount"],
943
943
  # exclude_shared_drives: ["SharedDriveId"],
944
944
  # },
945
+ # web_crawler_configuration: {
946
+ # urls: { # required
947
+ # seed_url_configuration: {
948
+ # seed_urls: ["SeedUrl"], # required
949
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
950
+ # },
951
+ # site_maps_configuration: {
952
+ # site_maps: ["SiteMap"], # required
953
+ # },
954
+ # },
955
+ # crawl_depth: 1,
956
+ # max_links_per_page: 1,
957
+ # max_content_size_per_page_in_mega_bytes: 1.0,
958
+ # max_urls_per_minute_crawl_rate: 1,
959
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
960
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
961
+ # proxy_configuration: {
962
+ # host: "Host", # required
963
+ # port: 1, # required
964
+ # credentials: "SecretArn",
965
+ # },
966
+ # authentication_configuration: {
967
+ # basic_authentication: [
968
+ # {
969
+ # host: "Host", # required
970
+ # port: 1, # required
971
+ # credentials: "SecretArn", # required
972
+ # },
973
+ # ],
974
+ # },
975
+ # },
945
976
  # },
946
977
  # description: "Description",
947
978
  # schedule: "ScanSchedule",
@@ -1525,7 +1556,7 @@ module Aws::Kendra
1525
1556
  # resp.id #=> String
1526
1557
  # resp.index_id #=> String
1527
1558
  # resp.name #=> String
1528
- # resp.type #=> String, one of "S3", "SHAREPOINT", "DATABASE", "SALESFORCE", "ONEDRIVE", "SERVICENOW", "CUSTOM", "CONFLUENCE", "GOOGLEDRIVE"
1559
+ # resp.type #=> String, one of "S3", "SHAREPOINT", "DATABASE", "SALESFORCE", "ONEDRIVE", "SERVICENOW", "CUSTOM", "CONFLUENCE", "GOOGLEDRIVE", "WEBCRAWLER"
1529
1560
  # resp.configuration.s3_configuration.bucket_name #=> String
1530
1561
  # resp.configuration.s3_configuration.inclusion_prefixes #=> Array
1531
1562
  # resp.configuration.s3_configuration.inclusion_prefixes[0] #=> String
@@ -1711,6 +1742,26 @@ module Aws::Kendra
1711
1742
  # resp.configuration.google_drive_configuration.exclude_user_accounts[0] #=> String
1712
1743
  # resp.configuration.google_drive_configuration.exclude_shared_drives #=> Array
1713
1744
  # resp.configuration.google_drive_configuration.exclude_shared_drives[0] #=> String
1745
+ # resp.configuration.web_crawler_configuration.urls.seed_url_configuration.seed_urls #=> Array
1746
+ # resp.configuration.web_crawler_configuration.urls.seed_url_configuration.seed_urls[0] #=> String
1747
+ # resp.configuration.web_crawler_configuration.urls.seed_url_configuration.web_crawler_mode #=> String, one of "HOST_ONLY", "SUBDOMAINS", "EVERYTHING"
1748
+ # resp.configuration.web_crawler_configuration.urls.site_maps_configuration.site_maps #=> Array
1749
+ # resp.configuration.web_crawler_configuration.urls.site_maps_configuration.site_maps[0] #=> String
1750
+ # resp.configuration.web_crawler_configuration.crawl_depth #=> Integer
1751
+ # resp.configuration.web_crawler_configuration.max_links_per_page #=> Integer
1752
+ # resp.configuration.web_crawler_configuration.max_content_size_per_page_in_mega_bytes #=> Float
1753
+ # resp.configuration.web_crawler_configuration.max_urls_per_minute_crawl_rate #=> Integer
1754
+ # resp.configuration.web_crawler_configuration.url_inclusion_patterns #=> Array
1755
+ # resp.configuration.web_crawler_configuration.url_inclusion_patterns[0] #=> String
1756
+ # resp.configuration.web_crawler_configuration.url_exclusion_patterns #=> Array
1757
+ # resp.configuration.web_crawler_configuration.url_exclusion_patterns[0] #=> String
1758
+ # resp.configuration.web_crawler_configuration.proxy_configuration.host #=> String
1759
+ # resp.configuration.web_crawler_configuration.proxy_configuration.port #=> Integer
1760
+ # resp.configuration.web_crawler_configuration.proxy_configuration.credentials #=> String
1761
+ # resp.configuration.web_crawler_configuration.authentication_configuration.basic_authentication #=> Array
1762
+ # resp.configuration.web_crawler_configuration.authentication_configuration.basic_authentication[0].host #=> String
1763
+ # resp.configuration.web_crawler_configuration.authentication_configuration.basic_authentication[0].port #=> Integer
1764
+ # resp.configuration.web_crawler_configuration.authentication_configuration.basic_authentication[0].credentials #=> String
1714
1765
  # resp.created_at #=> Time
1715
1766
  # resp.updated_at #=> Time
1716
1767
  # resp.description #=> String
@@ -2182,7 +2233,7 @@ module Aws::Kendra
2182
2233
  # resp.summary_items #=> Array
2183
2234
  # resp.summary_items[0].name #=> String
2184
2235
  # resp.summary_items[0].id #=> String
2185
- # resp.summary_items[0].type #=> String, one of "S3", "SHAREPOINT", "DATABASE", "SALESFORCE", "ONEDRIVE", "SERVICENOW", "CUSTOM", "CONFLUENCE", "GOOGLEDRIVE"
2236
+ # resp.summary_items[0].type #=> String, one of "S3", "SHAREPOINT", "DATABASE", "SALESFORCE", "ONEDRIVE", "SERVICENOW", "CUSTOM", "CONFLUENCE", "GOOGLEDRIVE", "WEBCRAWLER"
2186
2237
  # resp.summary_items[0].created_at #=> Time
2187
2238
  # resp.summary_items[0].updated_at #=> Time
2188
2239
  # resp.summary_items[0].status #=> String, one of "CREATING", "DELETING", "FAILED", "UPDATING", "ACTIVE"
@@ -3180,6 +3231,37 @@ module Aws::Kendra
3180
3231
  # exclude_user_accounts: ["UserAccount"],
3181
3232
  # exclude_shared_drives: ["SharedDriveId"],
3182
3233
  # },
3234
+ # web_crawler_configuration: {
3235
+ # urls: { # required
3236
+ # seed_url_configuration: {
3237
+ # seed_urls: ["SeedUrl"], # required
3238
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
3239
+ # },
3240
+ # site_maps_configuration: {
3241
+ # site_maps: ["SiteMap"], # required
3242
+ # },
3243
+ # },
3244
+ # crawl_depth: 1,
3245
+ # max_links_per_page: 1,
3246
+ # max_content_size_per_page_in_mega_bytes: 1.0,
3247
+ # max_urls_per_minute_crawl_rate: 1,
3248
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
3249
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
3250
+ # proxy_configuration: {
3251
+ # host: "Host", # required
3252
+ # port: 1, # required
3253
+ # credentials: "SecretArn",
3254
+ # },
3255
+ # authentication_configuration: {
3256
+ # basic_authentication: [
3257
+ # {
3258
+ # host: "Host", # required
3259
+ # port: 1, # required
3260
+ # credentials: "SecretArn", # required
3261
+ # },
3262
+ # ],
3263
+ # },
3264
+ # },
3183
3265
  # },
3184
3266
  # description: "Description",
3185
3267
  # schedule: "ScanSchedule",
@@ -3509,7 +3591,7 @@ module Aws::Kendra
3509
3591
  params: params,
3510
3592
  config: config)
3511
3593
  context[:gem_name] = 'aws-sdk-kendra'
3512
- context[:gem_version] = '1.26.0'
3594
+ context[:gem_version] = '1.27.0'
3513
3595
  Seahorse::Client::Request.new(handlers, context)
3514
3596
  end
3515
3597
 
@@ -23,6 +23,9 @@ module Aws::Kendra
23
23
  AmazonResourceName = Shapes::StringShape.new(name: 'AmazonResourceName')
24
24
  AttributeFilter = Shapes::StructureShape.new(name: 'AttributeFilter')
25
25
  AttributeFilterList = Shapes::ListShape.new(name: 'AttributeFilterList')
26
+ AuthenticationConfiguration = Shapes::StructureShape.new(name: 'AuthenticationConfiguration')
27
+ BasicAuthenticationConfiguration = Shapes::StructureShape.new(name: 'BasicAuthenticationConfiguration')
28
+ BasicAuthenticationConfigurationList = Shapes::ListShape.new(name: 'BasicAuthenticationConfigurationList')
26
29
  BatchDeleteDocumentRequest = Shapes::StructureShape.new(name: 'BatchDeleteDocumentRequest')
27
30
  BatchDeleteDocumentResponse = Shapes::StructureShape.new(name: 'BatchDeleteDocumentResponse')
28
31
  BatchDeleteDocumentResponseFailedDocument = Shapes::StructureShape.new(name: 'BatchDeleteDocumentResponseFailedDocument')
@@ -69,6 +72,7 @@ module Aws::Kendra
69
72
  ConfluenceVersion = Shapes::StringShape.new(name: 'ConfluenceVersion')
70
73
  ConnectionConfiguration = Shapes::StructureShape.new(name: 'ConnectionConfiguration')
71
74
  ContentType = Shapes::StringShape.new(name: 'ContentType')
75
+ CrawlDepth = Shapes::IntegerShape.new(name: 'CrawlDepth')
72
76
  CreateDataSourceRequest = Shapes::StructureShape.new(name: 'CreateDataSourceRequest')
73
77
  CreateDataSourceResponse = Shapes::StructureShape.new(name: 'CreateDataSourceResponse')
74
78
  CreateFaqRequest = Shapes::StructureShape.new(name: 'CreateFaqRequest')
@@ -172,6 +176,7 @@ module Aws::Kendra
172
176
  Highlight = Shapes::StructureShape.new(name: 'Highlight')
173
177
  HighlightList = Shapes::ListShape.new(name: 'HighlightList')
174
178
  HighlightType = Shapes::StringShape.new(name: 'HighlightType')
179
+ Host = Shapes::StringShape.new(name: 'Host')
175
180
  Importance = Shapes::IntegerShape.new(name: 'Importance')
176
181
  IndexConfigurationSummary = Shapes::StructureShape.new(name: 'IndexConfigurationSummary')
177
182
  IndexConfigurationSummaryList = Shapes::ListShape.new(name: 'IndexConfigurationSummaryList')
@@ -206,12 +211,15 @@ module Aws::Kendra
206
211
  ListThesauriRequest = Shapes::StructureShape.new(name: 'ListThesauriRequest')
207
212
  ListThesauriResponse = Shapes::StructureShape.new(name: 'ListThesauriResponse')
208
213
  Long = Shapes::IntegerShape.new(name: 'Long')
214
+ MaxContentSizePerPageInMegaBytes = Shapes::FloatShape.new(name: 'MaxContentSizePerPageInMegaBytes')
215
+ MaxLinksPerPage = Shapes::IntegerShape.new(name: 'MaxLinksPerPage')
209
216
  MaxResultsIntegerForListDataSourceSyncJobsRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListDataSourceSyncJobsRequest')
210
217
  MaxResultsIntegerForListDataSourcesRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListDataSourcesRequest')
211
218
  MaxResultsIntegerForListFaqsRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListFaqsRequest')
212
219
  MaxResultsIntegerForListIndicesRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListIndicesRequest')
213
220
  MaxResultsIntegerForListQuerySuggestionsBlockLists = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListQuerySuggestionsBlockLists')
214
221
  MaxResultsIntegerForListThesauriRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListThesauriRequest')
222
+ MaxUrlsPerMinuteCrawlRate = Shapes::IntegerShape.new(name: 'MaxUrlsPerMinuteCrawlRate')
215
223
  MetricValue = Shapes::StringShape.new(name: 'MetricValue')
216
224
  MimeType = Shapes::StringShape.new(name: 'MimeType')
217
225
  MinimumNumberOfQueryingUsers = Shapes::IntegerShape.new(name: 'MinimumNumberOfQueryingUsers')
@@ -224,10 +232,12 @@ module Aws::Kendra
224
232
  OneDriveUserList = Shapes::ListShape.new(name: 'OneDriveUserList')
225
233
  OneDriveUsers = Shapes::StructureShape.new(name: 'OneDriveUsers')
226
234
  Order = Shapes::StringShape.new(name: 'Order')
235
+ Port = Shapes::IntegerShape.new(name: 'Port')
227
236
  Principal = Shapes::StructureShape.new(name: 'Principal')
228
237
  PrincipalList = Shapes::ListShape.new(name: 'PrincipalList')
229
238
  PrincipalName = Shapes::StringShape.new(name: 'PrincipalName')
230
239
  PrincipalType = Shapes::StringShape.new(name: 'PrincipalType')
240
+ ProxyConfiguration = Shapes::StructureShape.new(name: 'ProxyConfiguration')
231
241
  QueryCapacityUnit = Shapes::IntegerShape.new(name: 'QueryCapacityUnit')
232
242
  QueryId = Shapes::StringShape.new(name: 'QueryId')
233
243
  QueryIdentifiersEnclosingOption = Shapes::StringShape.new(name: 'QueryIdentifiersEnclosingOption')
@@ -280,6 +290,9 @@ module Aws::Kendra
280
290
  Search = Shapes::StructureShape.new(name: 'Search')
281
291
  SecretArn = Shapes::StringShape.new(name: 'SecretArn')
282
292
  SecurityGroupIdList = Shapes::ListShape.new(name: 'SecurityGroupIdList')
293
+ SeedUrl = Shapes::StringShape.new(name: 'SeedUrl')
294
+ SeedUrlConfiguration = Shapes::StructureShape.new(name: 'SeedUrlConfiguration')
295
+ SeedUrlList = Shapes::ListShape.new(name: 'SeedUrlList')
283
296
  ServerSideEncryptionConfiguration = Shapes::StructureShape.new(name: 'ServerSideEncryptionConfiguration')
284
297
  ServiceNowAuthenticationType = Shapes::StringShape.new(name: 'ServiceNowAuthenticationType')
285
298
  ServiceNowBuildVersionType = Shapes::StringShape.new(name: 'ServiceNowBuildVersionType')
@@ -293,6 +306,9 @@ module Aws::Kendra
293
306
  SharePointUrlList = Shapes::ListShape.new(name: 'SharePointUrlList')
294
307
  SharePointVersion = Shapes::StringShape.new(name: 'SharePointVersion')
295
308
  SharedDriveId = Shapes::StringShape.new(name: 'SharedDriveId')
309
+ SiteMap = Shapes::StringShape.new(name: 'SiteMap')
310
+ SiteMapsConfiguration = Shapes::StructureShape.new(name: 'SiteMapsConfiguration')
311
+ SiteMapsList = Shapes::ListShape.new(name: 'SiteMapsList')
296
312
  SortOrder = Shapes::StringShape.new(name: 'SortOrder')
297
313
  SortingConfiguration = Shapes::StructureShape.new(name: 'SortingConfiguration')
298
314
  SqlConfiguration = Shapes::StructureShape.new(name: 'SqlConfiguration')
@@ -341,6 +357,7 @@ module Aws::Kendra
341
357
  UpdateQuerySuggestionsConfigRequest = Shapes::StructureShape.new(name: 'UpdateQuerySuggestionsConfigRequest')
342
358
  UpdateThesaurusRequest = Shapes::StructureShape.new(name: 'UpdateThesaurusRequest')
343
359
  Url = Shapes::StringShape.new(name: 'Url')
360
+ Urls = Shapes::StructureShape.new(name: 'Urls')
344
361
  UserAccount = Shapes::StringShape.new(name: 'UserAccount')
345
362
  UserContext = Shapes::StructureShape.new(name: 'UserContext')
346
363
  UserContextPolicy = Shapes::StringShape.new(name: 'UserContextPolicy')
@@ -352,6 +369,8 @@ module Aws::Kendra
352
369
  ValueImportanceMapKey = Shapes::StringShape.new(name: 'ValueImportanceMapKey')
353
370
  VisitorId = Shapes::StringShape.new(name: 'VisitorId')
354
371
  VpcSecurityGroupId = Shapes::StringShape.new(name: 'VpcSecurityGroupId')
372
+ WebCrawlerConfiguration = Shapes::StructureShape.new(name: 'WebCrawlerConfiguration')
373
+ WebCrawlerMode = Shapes::StringShape.new(name: 'WebCrawlerMode')
355
374
 
356
375
  AccessControlListConfiguration.add_member(:key_path, Shapes::ShapeRef.new(shape: S3ObjectKey, location_name: "KeyPath"))
357
376
  AccessControlListConfiguration.struct_class = Types::AccessControlListConfiguration
@@ -386,6 +405,16 @@ module Aws::Kendra
386
405
 
387
406
  AttributeFilterList.member = Shapes::ShapeRef.new(shape: AttributeFilter)
388
407
 
408
+ AuthenticationConfiguration.add_member(:basic_authentication, Shapes::ShapeRef.new(shape: BasicAuthenticationConfigurationList, location_name: "BasicAuthentication"))
409
+ AuthenticationConfiguration.struct_class = Types::AuthenticationConfiguration
410
+
411
+ BasicAuthenticationConfiguration.add_member(:host, Shapes::ShapeRef.new(shape: Host, required: true, location_name: "Host"))
412
+ BasicAuthenticationConfiguration.add_member(:port, Shapes::ShapeRef.new(shape: Port, required: true, location_name: "Port"))
413
+ BasicAuthenticationConfiguration.add_member(:credentials, Shapes::ShapeRef.new(shape: SecretArn, required: true, location_name: "Credentials"))
414
+ BasicAuthenticationConfiguration.struct_class = Types::BasicAuthenticationConfiguration
415
+
416
+ BasicAuthenticationConfigurationList.member = Shapes::ShapeRef.new(shape: BasicAuthenticationConfiguration)
417
+
389
418
  BatchDeleteDocumentRequest.add_member(:index_id, Shapes::ShapeRef.new(shape: IndexId, required: true, location_name: "IndexId"))
390
419
  BatchDeleteDocumentRequest.add_member(:document_id_list, Shapes::ShapeRef.new(shape: DocumentIdList, required: true, location_name: "DocumentIdList"))
391
420
  BatchDeleteDocumentRequest.add_member(:data_source_sync_job_metric_target, Shapes::ShapeRef.new(shape: DataSourceSyncJobMetricTarget, location_name: "DataSourceSyncJobMetricTarget"))
@@ -595,6 +624,7 @@ module Aws::Kendra
595
624
  DataSourceConfiguration.add_member(:service_now_configuration, Shapes::ShapeRef.new(shape: ServiceNowConfiguration, location_name: "ServiceNowConfiguration"))
596
625
  DataSourceConfiguration.add_member(:confluence_configuration, Shapes::ShapeRef.new(shape: ConfluenceConfiguration, location_name: "ConfluenceConfiguration"))
597
626
  DataSourceConfiguration.add_member(:google_drive_configuration, Shapes::ShapeRef.new(shape: GoogleDriveConfiguration, location_name: "GoogleDriveConfiguration"))
627
+ DataSourceConfiguration.add_member(:web_crawler_configuration, Shapes::ShapeRef.new(shape: WebCrawlerConfiguration, location_name: "WebCrawlerConfiguration"))
598
628
  DataSourceConfiguration.struct_class = Types::DataSourceConfiguration
599
629
 
600
630
  DataSourceInclusionsExclusionsStrings.member = Shapes::ShapeRef.new(shape: DataSourceInclusionsExclusionsStringsMember)
@@ -1007,6 +1037,11 @@ module Aws::Kendra
1007
1037
 
1008
1038
  PrincipalList.member = Shapes::ShapeRef.new(shape: Principal)
1009
1039
 
1040
+ ProxyConfiguration.add_member(:host, Shapes::ShapeRef.new(shape: Host, required: true, location_name: "Host"))
1041
+ ProxyConfiguration.add_member(:port, Shapes::ShapeRef.new(shape: Port, required: true, location_name: "Port"))
1042
+ ProxyConfiguration.add_member(:credentials, Shapes::ShapeRef.new(shape: SecretArn, location_name: "Credentials"))
1043
+ ProxyConfiguration.struct_class = Types::ProxyConfiguration
1044
+
1010
1045
  QueryRequest.add_member(:index_id, Shapes::ShapeRef.new(shape: IndexId, required: true, location_name: "IndexId"))
1011
1046
  QueryRequest.add_member(:query_text, Shapes::ShapeRef.new(shape: QueryText, required: true, location_name: "QueryText"))
1012
1047
  QueryRequest.add_member(:attribute_filter, Shapes::ShapeRef.new(shape: AttributeFilter, location_name: "AttributeFilter"))
@@ -1150,6 +1185,12 @@ module Aws::Kendra
1150
1185
 
1151
1186
  SecurityGroupIdList.member = Shapes::ShapeRef.new(shape: VpcSecurityGroupId)
1152
1187
 
1188
+ SeedUrlConfiguration.add_member(:seed_urls, Shapes::ShapeRef.new(shape: SeedUrlList, required: true, location_name: "SeedUrls"))
1189
+ SeedUrlConfiguration.add_member(:web_crawler_mode, Shapes::ShapeRef.new(shape: WebCrawlerMode, location_name: "WebCrawlerMode"))
1190
+ SeedUrlConfiguration.struct_class = Types::SeedUrlConfiguration
1191
+
1192
+ SeedUrlList.member = Shapes::ShapeRef.new(shape: SeedUrl)
1193
+
1153
1194
  ServerSideEncryptionConfiguration.add_member(:kms_key_id, Shapes::ShapeRef.new(shape: KmsKeyId, location_name: "KmsKeyId"))
1154
1195
  ServerSideEncryptionConfiguration.struct_class = Types::ServerSideEncryptionConfiguration
1155
1196
 
@@ -1196,6 +1237,11 @@ module Aws::Kendra
1196
1237
 
1197
1238
  SharePointUrlList.member = Shapes::ShapeRef.new(shape: Url)
1198
1239
 
1240
+ SiteMapsConfiguration.add_member(:site_maps, Shapes::ShapeRef.new(shape: SiteMapsList, required: true, location_name: "SiteMaps"))
1241
+ SiteMapsConfiguration.struct_class = Types::SiteMapsConfiguration
1242
+
1243
+ SiteMapsList.member = Shapes::ShapeRef.new(shape: SiteMap)
1244
+
1199
1245
  SortingConfiguration.add_member(:document_attribute_key, Shapes::ShapeRef.new(shape: DocumentAttributeKey, required: true, location_name: "DocumentAttributeKey"))
1200
1246
  SortingConfiguration.add_member(:sort_order, Shapes::ShapeRef.new(shape: SortOrder, required: true, location_name: "SortOrder"))
1201
1247
  SortingConfiguration.struct_class = Types::SortingConfiguration
@@ -1334,6 +1380,10 @@ module Aws::Kendra
1334
1380
  UpdateThesaurusRequest.add_member(:source_s3_path, Shapes::ShapeRef.new(shape: S3Path, location_name: "SourceS3Path"))
1335
1381
  UpdateThesaurusRequest.struct_class = Types::UpdateThesaurusRequest
1336
1382
 
1383
+ Urls.add_member(:seed_url_configuration, Shapes::ShapeRef.new(shape: SeedUrlConfiguration, location_name: "SeedUrlConfiguration"))
1384
+ Urls.add_member(:site_maps_configuration, Shapes::ShapeRef.new(shape: SiteMapsConfiguration, location_name: "SiteMapsConfiguration"))
1385
+ Urls.struct_class = Types::Urls
1386
+
1337
1387
  UserContext.add_member(:token, Shapes::ShapeRef.new(shape: Token, location_name: "Token"))
1338
1388
  UserContext.struct_class = Types::UserContext
1339
1389
 
@@ -1349,6 +1399,17 @@ module Aws::Kendra
1349
1399
  ValueImportanceMap.key = Shapes::ShapeRef.new(shape: ValueImportanceMapKey)
1350
1400
  ValueImportanceMap.value = Shapes::ShapeRef.new(shape: Importance)
1351
1401
 
1402
+ WebCrawlerConfiguration.add_member(:urls, Shapes::ShapeRef.new(shape: Urls, required: true, location_name: "Urls"))
1403
+ WebCrawlerConfiguration.add_member(:crawl_depth, Shapes::ShapeRef.new(shape: CrawlDepth, location_name: "CrawlDepth"))
1404
+ WebCrawlerConfiguration.add_member(:max_links_per_page, Shapes::ShapeRef.new(shape: MaxLinksPerPage, location_name: "MaxLinksPerPage"))
1405
+ WebCrawlerConfiguration.add_member(:max_content_size_per_page_in_mega_bytes, Shapes::ShapeRef.new(shape: MaxContentSizePerPageInMegaBytes, location_name: "MaxContentSizePerPageInMegaBytes"))
1406
+ WebCrawlerConfiguration.add_member(:max_urls_per_minute_crawl_rate, Shapes::ShapeRef.new(shape: MaxUrlsPerMinuteCrawlRate, location_name: "MaxUrlsPerMinuteCrawlRate"))
1407
+ WebCrawlerConfiguration.add_member(:url_inclusion_patterns, Shapes::ShapeRef.new(shape: DataSourceInclusionsExclusionsStrings, location_name: "UrlInclusionPatterns"))
1408
+ WebCrawlerConfiguration.add_member(:url_exclusion_patterns, Shapes::ShapeRef.new(shape: DataSourceInclusionsExclusionsStrings, location_name: "UrlExclusionPatterns"))
1409
+ WebCrawlerConfiguration.add_member(:proxy_configuration, Shapes::ShapeRef.new(shape: ProxyConfiguration, location_name: "ProxyConfiguration"))
1410
+ WebCrawlerConfiguration.add_member(:authentication_configuration, Shapes::ShapeRef.new(shape: AuthenticationConfiguration, location_name: "AuthenticationConfiguration"))
1411
+ WebCrawlerConfiguration.struct_class = Types::WebCrawlerConfiguration
1412
+
1352
1413
 
1353
1414
  # @api private
1354
1415
  API = Seahorse::Model::Api.new.tap do |api|
@@ -492,6 +492,87 @@ module Aws::Kendra
492
492
  include Aws::Structure
493
493
  end
494
494
 
495
+ # Provides the configuration information to connect to websites that
496
+ # require user authentication.
497
+ #
498
+ # @note When making an API call, you may pass AuthenticationConfiguration
499
+ # data as a hash:
500
+ #
501
+ # {
502
+ # basic_authentication: [
503
+ # {
504
+ # host: "Host", # required
505
+ # port: 1, # required
506
+ # credentials: "SecretArn", # required
507
+ # },
508
+ # ],
509
+ # }
510
+ #
511
+ # @!attribute [rw] basic_authentication
512
+ # The list of configuration information that's required to connect to
513
+ # and crawl a website host using basic authentication credentials.
514
+ #
515
+ # The list includes the name and port number of the website host.
516
+ # @return [Array<Types::BasicAuthenticationConfiguration>]
517
+ #
518
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/AuthenticationConfiguration AWS API Documentation
519
+ #
520
+ class AuthenticationConfiguration < Struct.new(
521
+ :basic_authentication)
522
+ SENSITIVE = []
523
+ include Aws::Structure
524
+ end
525
+
526
+ # Provides the configuration information to connect to websites that
527
+ # require basic user authentication.
528
+ #
529
+ # @note When making an API call, you may pass BasicAuthenticationConfiguration
530
+ # data as a hash:
531
+ #
532
+ # {
533
+ # host: "Host", # required
534
+ # port: 1, # required
535
+ # credentials: "SecretArn", # required
536
+ # }
537
+ #
538
+ # @!attribute [rw] host
539
+ # The name of the website host you want to connect to using
540
+ # authentication credentials.
541
+ #
542
+ # For example, the host name of https://a.example.com/page1.html is
543
+ # "a.example.com".
544
+ # @return [String]
545
+ #
546
+ # @!attribute [rw] port
547
+ # The port number of the website host you want to connect to using
548
+ # authentication credentials.
549
+ #
550
+ # For example, the port for https://a.example.com/page1.html is 443,
551
+ # the standard port for HTTPS.
552
+ # @return [Integer]
553
+ #
554
+ # @!attribute [rw] credentials
555
+ # Your secret ARN, which you can create in [AWS Secrets Manager][1]
556
+ #
557
+ # You use a secret if basic authentication credentials are required to
558
+ # connect to a website. The secret stores your credentials of user
559
+ # name and password.
560
+ #
561
+ #
562
+ #
563
+ # [1]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html
564
+ # @return [String]
565
+ #
566
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/BasicAuthenticationConfiguration AWS API Documentation
567
+ #
568
+ class BasicAuthenticationConfiguration < Struct.new(
569
+ :host,
570
+ :port,
571
+ :credentials)
572
+ SENSITIVE = []
573
+ include Aws::Structure
574
+ end
575
+
495
576
  # @note When making an API call, you may pass BatchDeleteDocumentRequest
496
577
  # data as a hash:
497
578
  #
@@ -801,8 +882,9 @@ module Aws::Kendra
801
882
  include Aws::Structure
802
883
  end
803
884
 
804
- # Specifies capacity units configured for your index. You can add and
805
- # remove capacity units to tune an index to your requirements.
885
+ # Specifies capacity units configured for your enterprise edition index.
886
+ # You can add and remove capacity units to tune an index to your
887
+ # requirements.
806
888
  #
807
889
  # @note When making an API call, you may pass CapacityUnitsConfiguration
808
890
  # data as a hash:
@@ -813,14 +895,28 @@ module Aws::Kendra
813
895
  # }
814
896
  #
815
897
  # @!attribute [rw] storage_capacity_units
816
- # The amount of extra storage capacity for an index. Each capacity
817
- # unit provides 150 Gb of storage space or 500,000 documents,
818
- # whichever is reached first.
898
+ # The amount of extra storage capacity for an index. A single capacity
899
+ # unit for an index provides 150 GB of storage space or 500,000
900
+ # documents, whichever is reached first.
819
901
  # @return [Integer]
820
902
  #
821
903
  # @!attribute [rw] query_capacity_units
822
- # The amount of extra query capacity for an index. Each capacity unit
823
- # provides 0.5 queries per second and 40,000 queries per day.
904
+ # The amount of extra query capacity for an index and
905
+ # [GetQuerySuggestions][1] capacity.
906
+ #
907
+ # A single extra capacity unit for an index provides 0.5 queries per
908
+ # second or approximately 40,000 queries per day.
909
+ #
910
+ # `GetQuerySuggestions` capacity is 5 times the provisioned query
911
+ # capacity for an index. For example, the base capacity for an index
912
+ # is 0.5 queries per second, so GetQuerySuggestions capacity is 2.5
913
+ # calls per second. If adding another 0.5 queries per second to total
914
+ # 1 queries per second for an index, the `GetQuerySuggestions`
915
+ # capacity is 5 calls per second.
916
+ #
917
+ #
918
+ #
919
+ # [1]: https://docs.aws.amazon.com/kendra/latest/dg/API_GetQuerySuggestions.html
824
920
  # @return [Integer]
825
921
  #
826
922
  # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/CapacityUnitsConfiguration AWS API Documentation
@@ -1499,7 +1595,7 @@ module Aws::Kendra
1499
1595
  # {
1500
1596
  # name: "DataSourceName", # required
1501
1597
  # index_id: "IndexId", # required
1502
- # type: "S3", # required, accepts S3, SHAREPOINT, DATABASE, SALESFORCE, ONEDRIVE, SERVICENOW, CUSTOM, CONFLUENCE, GOOGLEDRIVE
1598
+ # type: "S3", # required, accepts S3, SHAREPOINT, DATABASE, SALESFORCE, ONEDRIVE, SERVICENOW, CUSTOM, CONFLUENCE, GOOGLEDRIVE, WEBCRAWLER
1503
1599
  # configuration: {
1504
1600
  # s3_configuration: {
1505
1601
  # bucket_name: "S3BucketName", # required
@@ -1762,6 +1858,37 @@ module Aws::Kendra
1762
1858
  # exclude_user_accounts: ["UserAccount"],
1763
1859
  # exclude_shared_drives: ["SharedDriveId"],
1764
1860
  # },
1861
+ # web_crawler_configuration: {
1862
+ # urls: { # required
1863
+ # seed_url_configuration: {
1864
+ # seed_urls: ["SeedUrl"], # required
1865
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
1866
+ # },
1867
+ # site_maps_configuration: {
1868
+ # site_maps: ["SiteMap"], # required
1869
+ # },
1870
+ # },
1871
+ # crawl_depth: 1,
1872
+ # max_links_per_page: 1,
1873
+ # max_content_size_per_page_in_mega_bytes: 1.0,
1874
+ # max_urls_per_minute_crawl_rate: 1,
1875
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
1876
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
1877
+ # proxy_configuration: {
1878
+ # host: "Host", # required
1879
+ # port: 1, # required
1880
+ # credentials: "SecretArn",
1881
+ # },
1882
+ # authentication_configuration: {
1883
+ # basic_authentication: [
1884
+ # {
1885
+ # host: "Host", # required
1886
+ # port: 1, # required
1887
+ # credentials: "SecretArn", # required
1888
+ # },
1889
+ # ],
1890
+ # },
1891
+ # },
1765
1892
  # },
1766
1893
  # description: "Description",
1767
1894
  # schedule: "ScanSchedule",
@@ -2570,6 +2697,37 @@ module Aws::Kendra
2570
2697
  # exclude_user_accounts: ["UserAccount"],
2571
2698
  # exclude_shared_drives: ["SharedDriveId"],
2572
2699
  # },
2700
+ # web_crawler_configuration: {
2701
+ # urls: { # required
2702
+ # seed_url_configuration: {
2703
+ # seed_urls: ["SeedUrl"], # required
2704
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
2705
+ # },
2706
+ # site_maps_configuration: {
2707
+ # site_maps: ["SiteMap"], # required
2708
+ # },
2709
+ # },
2710
+ # crawl_depth: 1,
2711
+ # max_links_per_page: 1,
2712
+ # max_content_size_per_page_in_mega_bytes: 1.0,
2713
+ # max_urls_per_minute_crawl_rate: 1,
2714
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
2715
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
2716
+ # proxy_configuration: {
2717
+ # host: "Host", # required
2718
+ # port: 1, # required
2719
+ # credentials: "SecretArn",
2720
+ # },
2721
+ # authentication_configuration: {
2722
+ # basic_authentication: [
2723
+ # {
2724
+ # host: "Host", # required
2725
+ # port: 1, # required
2726
+ # credentials: "SecretArn", # required
2727
+ # },
2728
+ # ],
2729
+ # },
2730
+ # },
2573
2731
  # }
2574
2732
  #
2575
2733
  # @!attribute [rw] s3_configuration
@@ -2612,6 +2770,11 @@ module Aws::Kendra
2612
2770
  # Drive.
2613
2771
  # @return [Types::GoogleDriveConfiguration]
2614
2772
  #
2773
+ # @!attribute [rw] web_crawler_configuration
2774
+ # Provides the configuration information required for Amazon Kendra
2775
+ # web crawler.
2776
+ # @return [Types::WebCrawlerConfiguration]
2777
+ #
2615
2778
  # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/DataSourceConfiguration AWS API Documentation
2616
2779
  #
2617
2780
  class DataSourceConfiguration < Struct.new(
@@ -2622,7 +2785,8 @@ module Aws::Kendra
2622
2785
  :one_drive_configuration,
2623
2786
  :service_now_configuration,
2624
2787
  :confluence_configuration,
2625
- :google_drive_configuration)
2788
+ :google_drive_configuration,
2789
+ :web_crawler_configuration)
2626
2790
  SENSITIVE = []
2627
2791
  include Aws::Structure
2628
2792
  end
@@ -5048,6 +5212,57 @@ module Aws::Kendra
5048
5212
  include Aws::Structure
5049
5213
  end
5050
5214
 
5215
+ # Provides the configuration information for a web proxy to connect to
5216
+ # website hosts.
5217
+ #
5218
+ # @note When making an API call, you may pass ProxyConfiguration
5219
+ # data as a hash:
5220
+ #
5221
+ # {
5222
+ # host: "Host", # required
5223
+ # port: 1, # required
5224
+ # credentials: "SecretArn",
5225
+ # }
5226
+ #
5227
+ # @!attribute [rw] host
5228
+ # The name of the website host you want to connect to via a web proxy
5229
+ # server.
5230
+ #
5231
+ # For example, the host name of https://a.example.com/page1.html is
5232
+ # "a.example.com".
5233
+ # @return [String]
5234
+ #
5235
+ # @!attribute [rw] port
5236
+ # The port number of the website host you want to connect to via a web
5237
+ # proxy server.
5238
+ #
5239
+ # For example, the port for https://a.example.com/page1.html is 443,
5240
+ # the standard port for HTTPS.
5241
+ # @return [Integer]
5242
+ #
5243
+ # @!attribute [rw] credentials
5244
+ # Your secret ARN, which you can create in [AWS Secrets Manager][1]
5245
+ #
5246
+ # The credentials are optional. You use a secret if web proxy
5247
+ # credentials are required to connect to a website host. Amazon Kendra
5248
+ # currently support basic authentication to connect to a web proxy
5249
+ # server. The secret stores your credentials.
5250
+ #
5251
+ #
5252
+ #
5253
+ # [1]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html
5254
+ # @return [String]
5255
+ #
5256
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/ProxyConfiguration AWS API Documentation
5257
+ #
5258
+ class ProxyConfiguration < Struct.new(
5259
+ :host,
5260
+ :port,
5261
+ :credentials)
5262
+ SENSITIVE = []
5263
+ include Aws::Structure
5264
+ end
5265
+
5051
5266
  # @note When making an API call, you may pass QueryRequest
5052
5267
  # data as a hash:
5053
5268
  #
@@ -6253,6 +6468,59 @@ module Aws::Kendra
6253
6468
  include Aws::Structure
6254
6469
  end
6255
6470
 
6471
+ # Provides the configuration information of the seed or starting point
6472
+ # URLs to crawl.
6473
+ #
6474
+ # *When selecting websites to index, you must adhere to the [Amazon
6475
+ # Acceptable Use Policy][1] and all other Amazon terms. Remember that
6476
+ # you must only use the Amazon Kendra web crawler to index your own
6477
+ # webpages, or webpages that you have authorization to index.*
6478
+ #
6479
+ #
6480
+ #
6481
+ # [1]: https://aws.amazon.com/aup/
6482
+ #
6483
+ # @note When making an API call, you may pass SeedUrlConfiguration
6484
+ # data as a hash:
6485
+ #
6486
+ # {
6487
+ # seed_urls: ["SeedUrl"], # required
6488
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
6489
+ # }
6490
+ #
6491
+ # @!attribute [rw] seed_urls
6492
+ # The list of seed or starting point URLs of the websites you want to
6493
+ # crawl.
6494
+ #
6495
+ # The list can include a maximum of 100 seed URLs.
6496
+ # @return [Array<String>]
6497
+ #
6498
+ # @!attribute [rw] web_crawler_mode
6499
+ # You can choose one of the following modes:
6500
+ #
6501
+ # * `HOST_ONLY` – crawl only the website host names. For example, if
6502
+ # the seed URL is "abc.example.com", then only URLs with host name
6503
+ # "abc.example.com" are crawled.
6504
+ #
6505
+ # * `SUBDOMAINS` – crawl the website host names with subdomains. For
6506
+ # example, if the seed URL is "abc.example.com", then
6507
+ # "a.abc.example.com" and "b.abc.example.com" are also crawled.
6508
+ #
6509
+ # * `EVERYTHING` – crawl the website host names with subdomains and
6510
+ # other domains that the webpages link to.
6511
+ #
6512
+ # The default mode is set to `HOST_ONLY`.
6513
+ # @return [String]
6514
+ #
6515
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/SeedUrlConfiguration AWS API Documentation
6516
+ #
6517
+ class SeedUrlConfiguration < Struct.new(
6518
+ :seed_urls,
6519
+ :web_crawler_mode)
6520
+ SENSITIVE = []
6521
+ include Aws::Structure
6522
+ end
6523
+
6256
6524
  # Provides the identifier of the AWS KMS customer master key (CMK) used
6257
6525
  # to encrypt data indexed by Amazon Kendra. Amazon Kendra doesn't
6258
6526
  # support asymmetric CMKs.
@@ -6675,6 +6943,38 @@ module Aws::Kendra
6675
6943
  include Aws::Structure
6676
6944
  end
6677
6945
 
6946
+ # Provides the configuration information of the sitemap URLs to crawl.
6947
+ #
6948
+ # *When selecting websites to index, you must adhere to the [Amazon
6949
+ # Acceptable Use Policy][1] and all other Amazon terms. Remember that
6950
+ # you must only use the Amazon Kendra web crawler to index your own
6951
+ # webpages, or webpages that you have authorization to index.*
6952
+ #
6953
+ #
6954
+ #
6955
+ # [1]: https://aws.amazon.com/aup/
6956
+ #
6957
+ # @note When making an API call, you may pass SiteMapsConfiguration
6958
+ # data as a hash:
6959
+ #
6960
+ # {
6961
+ # site_maps: ["SiteMap"], # required
6962
+ # }
6963
+ #
6964
+ # @!attribute [rw] site_maps
6965
+ # The list of sitemap URLs of the websites you want to crawl.
6966
+ #
6967
+ # The list can include a maximum of three sitemap URLs.
6968
+ # @return [Array<String>]
6969
+ #
6970
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/SiteMapsConfiguration AWS API Documentation
6971
+ #
6972
+ class SiteMapsConfiguration < Struct.new(
6973
+ :site_maps)
6974
+ SENSITIVE = []
6975
+ include Aws::Structure
6976
+ end
6977
+
6678
6978
  # Specifies the document attribute to use to sort the response to a
6679
6979
  # Amazon Kendra query. You can specify a single attribute for sorting.
6680
6980
  # The attribute must have the `Sortable` flag set to `true`, otherwise
@@ -7476,6 +7776,37 @@ module Aws::Kendra
7476
7776
  # exclude_user_accounts: ["UserAccount"],
7477
7777
  # exclude_shared_drives: ["SharedDriveId"],
7478
7778
  # },
7779
+ # web_crawler_configuration: {
7780
+ # urls: { # required
7781
+ # seed_url_configuration: {
7782
+ # seed_urls: ["SeedUrl"], # required
7783
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
7784
+ # },
7785
+ # site_maps_configuration: {
7786
+ # site_maps: ["SiteMap"], # required
7787
+ # },
7788
+ # },
7789
+ # crawl_depth: 1,
7790
+ # max_links_per_page: 1,
7791
+ # max_content_size_per_page_in_mega_bytes: 1.0,
7792
+ # max_urls_per_minute_crawl_rate: 1,
7793
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
7794
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
7795
+ # proxy_configuration: {
7796
+ # host: "Host", # required
7797
+ # port: 1, # required
7798
+ # credentials: "SecretArn",
7799
+ # },
7800
+ # authentication_configuration: {
7801
+ # basic_authentication: [
7802
+ # {
7803
+ # host: "Host", # required
7804
+ # port: 1, # required
7805
+ # credentials: "SecretArn", # required
7806
+ # },
7807
+ # ],
7808
+ # },
7809
+ # },
7479
7810
  # },
7480
7811
  # description: "Description",
7481
7812
  # schedule: "ScanSchedule",
@@ -7838,6 +8169,58 @@ module Aws::Kendra
7838
8169
  include Aws::Structure
7839
8170
  end
7840
8171
 
8172
+ # Provides the configuration information of the URLs to crawl.
8173
+ #
8174
+ # *When selecting websites to index, you must adhere to the [Amazon
8175
+ # Acceptable Use Policy][1] and all other Amazon terms. Remember that
8176
+ # you must only use the Amazon Kendra web crawler to index your own
8177
+ # webpages, or webpages that you have authorization to index.*
8178
+ #
8179
+ #
8180
+ #
8181
+ # [1]: https://aws.amazon.com/aup/
8182
+ #
8183
+ # @note When making an API call, you may pass Urls
8184
+ # data as a hash:
8185
+ #
8186
+ # {
8187
+ # seed_url_configuration: {
8188
+ # seed_urls: ["SeedUrl"], # required
8189
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
8190
+ # },
8191
+ # site_maps_configuration: {
8192
+ # site_maps: ["SiteMap"], # required
8193
+ # },
8194
+ # }
8195
+ #
8196
+ # @!attribute [rw] seed_url_configuration
8197
+ # Provides the configuration of the seed or starting point URLs of the
8198
+ # websites you want to crawl.
8199
+ #
8200
+ # You can choose to crawl only the website host names, or the website
8201
+ # host names with subdomains, or the website host names with
8202
+ # subdomains and other domains that the webpages link to.
8203
+ #
8204
+ # You can list up to 100 seed URLs.
8205
+ # @return [Types::SeedUrlConfiguration]
8206
+ #
8207
+ # @!attribute [rw] site_maps_configuration
8208
+ # Provides the configuration of the sitemap URLs of the websites you
8209
+ # want to crawl.
8210
+ #
8211
+ # Only URLs belonging to the same website host names are crawled. You
8212
+ # can list up to three sitemap URLs.
8213
+ # @return [Types::SiteMapsConfiguration]
8214
+ #
8215
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/Urls AWS API Documentation
8216
+ #
8217
+ class Urls < Struct.new(
8218
+ :seed_url_configuration,
8219
+ :site_maps_configuration)
8220
+ SENSITIVE = []
8221
+ include Aws::Structure
8222
+ end
8223
+
7841
8224
  # Provides information about the user context for a Amazon Kendra index.
7842
8225
  #
7843
8226
  # @note When making an API call, you may pass UserContext
@@ -7908,5 +8291,167 @@ module Aws::Kendra
7908
8291
  include Aws::Structure
7909
8292
  end
7910
8293
 
8294
+ # Provides the configuration information required for Amazon Kendra web
8295
+ # crawler.
8296
+ #
8297
+ # @note When making an API call, you may pass WebCrawlerConfiguration
8298
+ # data as a hash:
8299
+ #
8300
+ # {
8301
+ # urls: { # required
8302
+ # seed_url_configuration: {
8303
+ # seed_urls: ["SeedUrl"], # required
8304
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
8305
+ # },
8306
+ # site_maps_configuration: {
8307
+ # site_maps: ["SiteMap"], # required
8308
+ # },
8309
+ # },
8310
+ # crawl_depth: 1,
8311
+ # max_links_per_page: 1,
8312
+ # max_content_size_per_page_in_mega_bytes: 1.0,
8313
+ # max_urls_per_minute_crawl_rate: 1,
8314
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
8315
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
8316
+ # proxy_configuration: {
8317
+ # host: "Host", # required
8318
+ # port: 1, # required
8319
+ # credentials: "SecretArn",
8320
+ # },
8321
+ # authentication_configuration: {
8322
+ # basic_authentication: [
8323
+ # {
8324
+ # host: "Host", # required
8325
+ # port: 1, # required
8326
+ # credentials: "SecretArn", # required
8327
+ # },
8328
+ # ],
8329
+ # },
8330
+ # }
8331
+ #
8332
+ # @!attribute [rw] urls
8333
+ # Specifies the seed or starting point URLs of the websites or the
8334
+ # sitemap URLs of the websites you want to crawl.
8335
+ #
8336
+ # You can include website subdomains. You can list up to 100 seed URLs
8337
+ # and up to three sitemap URLs.
8338
+ #
8339
+ # *When selecting websites to index, you must adhere to the [Amazon
8340
+ # Acceptable Use Policy][1] and all other Amazon terms. Remember that
8341
+ # you must only use the Amazon Kendra web crawler to index your own
8342
+ # webpages, or webpages that you have authorization to index.*
8343
+ #
8344
+ #
8345
+ #
8346
+ # [1]: https://aws.amazon.com/aup/
8347
+ # @return [Types::Urls]
8348
+ #
8349
+ # @!attribute [rw] crawl_depth
8350
+ # Specifies the number of levels in a website that you want to crawl.
8351
+ #
8352
+ # The first level begins from the website seed or starting point URL.
8353
+ # For example, if a website has 3 levels – index level (i.e. seed in
8354
+ # this example), sections level, and subsections level – and you are
8355
+ # only interested in crawling information up to the sections level
8356
+ # (i.e. levels 0-1), you can set your depth to 1.
8357
+ #
8358
+ # The default crawl depth is set to 2.
8359
+ # @return [Integer]
8360
+ #
8361
+ # @!attribute [rw] max_links_per_page
8362
+ # The maximum number of URLs on a webpage to include when crawling a
8363
+ # website. This number is per webpage.
8364
+ #
8365
+ # As a website’s webpages are crawled, any URLs the webpages link to
8366
+ # are also crawled. URLs on a webpage are crawled in order of
8367
+ # appearance.
8368
+ #
8369
+ # The default maximum links per page is 100.
8370
+ # @return [Integer]
8371
+ #
8372
+ # @!attribute [rw] max_content_size_per_page_in_mega_bytes
8373
+ # The maximum size (in MB) of a webpage or attachment to crawl.
8374
+ #
8375
+ # Files larger than this size (in MB) are skipped/not crawled.
8376
+ #
8377
+ # The default maximum size of a webpage or attachment is set to 50 MB.
8378
+ # @return [Float]
8379
+ #
8380
+ # @!attribute [rw] max_urls_per_minute_crawl_rate
8381
+ # The maximum number of URLs crawled per website host per minute.
8382
+ #
8383
+ # A minimum of one URL is required.
8384
+ #
8385
+ # The default maximum number of URLs crawled per website host per
8386
+ # minute is 300.
8387
+ # @return [Integer]
8388
+ #
8389
+ # @!attribute [rw] url_inclusion_patterns
8390
+ # The regular expression pattern to include certain URLs to crawl.
8391
+ #
8392
+ # If there is a regular expression pattern to exclude certain URLs
8393
+ # that conflicts with the include pattern, the exclude pattern takes
8394
+ # precedence.
8395
+ # @return [Array<String>]
8396
+ #
8397
+ # @!attribute [rw] url_exclusion_patterns
8398
+ # The regular expression pattern to exclude certain URLs to crawl.
8399
+ #
8400
+ # If there is a regular expression pattern to include certain URLs
8401
+ # that conflicts with the exclude pattern, the exclude pattern takes
8402
+ # precedence.
8403
+ # @return [Array<String>]
8404
+ #
8405
+ # @!attribute [rw] proxy_configuration
8406
+ # Provides configuration information required to connect to your
8407
+ # internal websites via a web proxy.
8408
+ #
8409
+ # You must provide the website host name and port number. For example,
8410
+ # the host name of https://a.example.com/page1.html is
8411
+ # "a.example.com" and the port is 443, the standard port for HTTPS.
8412
+ #
8413
+ # Web proxy credentials are optional and you can use them to connect
8414
+ # to a web proxy server that requires basic authentication. To store
8415
+ # web proxy credentials, you use a secret in [AWS Secrets Manager][1].
8416
+ #
8417
+ #
8418
+ #
8419
+ # [1]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html
8420
+ # @return [Types::ProxyConfiguration]
8421
+ #
8422
+ # @!attribute [rw] authentication_configuration
8423
+ # Provides configuration information required to connect to websites
8424
+ # using authentication.
8425
+ #
8426
+ # You can connect to websites using basic authentication of user name
8427
+ # and password.
8428
+ #
8429
+ # You must provide the website host name and port number. For example,
8430
+ # the host name of https://a.example.com/page1.html is
8431
+ # "a.example.com" and the port is 443, the standard port for HTTPS.
8432
+ # You use a secret in [AWS Secrets Manager][1] to store your
8433
+ # authentication credentials.
8434
+ #
8435
+ #
8436
+ #
8437
+ # [1]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html
8438
+ # @return [Types::AuthenticationConfiguration]
8439
+ #
8440
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/WebCrawlerConfiguration AWS API Documentation
8441
+ #
8442
+ class WebCrawlerConfiguration < Struct.new(
8443
+ :urls,
8444
+ :crawl_depth,
8445
+ :max_links_per_page,
8446
+ :max_content_size_per_page_in_mega_bytes,
8447
+ :max_urls_per_minute_crawl_rate,
8448
+ :url_inclusion_patterns,
8449
+ :url_exclusion_patterns,
8450
+ :proxy_configuration,
8451
+ :authentication_configuration)
8452
+ SENSITIVE = []
8453
+ include Aws::Structure
8454
+ end
8455
+
7911
8456
  end
7912
8457
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aws-sdk-kendra
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.26.0
4
+ version: 1.27.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Amazon Web Services
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-09 00:00:00.000000000 Z
11
+ date: 2021-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-core