aws-sdk-kendra 1.26.0 → 1.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db6db0d7989bf395d9d66113616dd832292db250ceae884e79f1ff68fc72c2da
4
- data.tar.gz: 5d5c1a752c7eb99a07c0c60f67b913244b2731f0301eb4918171af9bdbe95ba6
3
+ metadata.gz: 18f9a0673af8278a3066d865646525130986d0d6fce53f4a84fc8b10efb26e21
4
+ data.tar.gz: 1b782bf81e35136fc3b2fd540bcc97821f38707b6159977a0e6243bc51b33253
5
5
  SHA512:
6
- metadata.gz: 3c11d993010e54b7ba55c7503fa00b331b0b34da815eac768955ec402bc436885327483dceac3b85a46da932002ad9be3049b3db2bac82eae20ed84900e5be53
7
- data.tar.gz: 13d14d88704eb78936b534371ef132863b4db06403b68648b7e3a03a53da263a1a2e5641e6cc5d5678241270af4b94960de35301e5cfb0c1fc1f3062400f121a
6
+ metadata.gz: e91319bbb2301cbf8c9fb6900922854688faf4fac89b62c2ce9c5e1f5f36265e33cc3e5faa5dbfb581128c06ea406acc49be1b6c168c9920707b87aa28dffd72
7
+ data.tar.gz: ab38e61328b8e4b91f9354a2f23e74b98040d4477742ad86219cfc5de2c79e3ca36423574a13dbe2c6de1b5d45e1a36de594a36d8d9e29cd464a08a760986408
data/CHANGELOG.md CHANGED
@@ -1,6 +1,11 @@
1
1
  Unreleased Changes
2
2
  ------------------
3
3
 
4
+ 1.27.0 (2021-06-17)
5
+ ------------------
6
+
7
+ * Feature - Amazon Kendra now supports the indexing of web documents for search through the web crawler.
8
+
4
9
  1.26.0 (2021-06-09)
5
10
  ------------------
6
11
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.26.0
1
+ 1.27.0
@@ -48,6 +48,6 @@ require_relative 'aws-sdk-kendra/customizations'
48
48
  # @!group service
49
49
  module Aws::Kendra
50
50
 
51
- GEM_VERSION = '1.26.0'
51
+ GEM_VERSION = '1.27.0'
52
52
 
53
53
  end
@@ -679,7 +679,7 @@ module Aws::Kendra
679
679
  # resp = client.create_data_source({
680
680
  # name: "DataSourceName", # required
681
681
  # index_id: "IndexId", # required
682
- # type: "S3", # required, accepts S3, SHAREPOINT, DATABASE, SALESFORCE, ONEDRIVE, SERVICENOW, CUSTOM, CONFLUENCE, GOOGLEDRIVE
682
+ # type: "S3", # required, accepts S3, SHAREPOINT, DATABASE, SALESFORCE, ONEDRIVE, SERVICENOW, CUSTOM, CONFLUENCE, GOOGLEDRIVE, WEBCRAWLER
683
683
  # configuration: {
684
684
  # s3_configuration: {
685
685
  # bucket_name: "S3BucketName", # required
@@ -942,6 +942,37 @@ module Aws::Kendra
942
942
  # exclude_user_accounts: ["UserAccount"],
943
943
  # exclude_shared_drives: ["SharedDriveId"],
944
944
  # },
945
+ # web_crawler_configuration: {
946
+ # urls: { # required
947
+ # seed_url_configuration: {
948
+ # seed_urls: ["SeedUrl"], # required
949
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
950
+ # },
951
+ # site_maps_configuration: {
952
+ # site_maps: ["SiteMap"], # required
953
+ # },
954
+ # },
955
+ # crawl_depth: 1,
956
+ # max_links_per_page: 1,
957
+ # max_content_size_per_page_in_mega_bytes: 1.0,
958
+ # max_urls_per_minute_crawl_rate: 1,
959
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
960
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
961
+ # proxy_configuration: {
962
+ # host: "Host", # required
963
+ # port: 1, # required
964
+ # credentials: "SecretArn",
965
+ # },
966
+ # authentication_configuration: {
967
+ # basic_authentication: [
968
+ # {
969
+ # host: "Host", # required
970
+ # port: 1, # required
971
+ # credentials: "SecretArn", # required
972
+ # },
973
+ # ],
974
+ # },
975
+ # },
945
976
  # },
946
977
  # description: "Description",
947
978
  # schedule: "ScanSchedule",
@@ -1525,7 +1556,7 @@ module Aws::Kendra
1525
1556
  # resp.id #=> String
1526
1557
  # resp.index_id #=> String
1527
1558
  # resp.name #=> String
1528
- # resp.type #=> String, one of "S3", "SHAREPOINT", "DATABASE", "SALESFORCE", "ONEDRIVE", "SERVICENOW", "CUSTOM", "CONFLUENCE", "GOOGLEDRIVE"
1559
+ # resp.type #=> String, one of "S3", "SHAREPOINT", "DATABASE", "SALESFORCE", "ONEDRIVE", "SERVICENOW", "CUSTOM", "CONFLUENCE", "GOOGLEDRIVE", "WEBCRAWLER"
1529
1560
  # resp.configuration.s3_configuration.bucket_name #=> String
1530
1561
  # resp.configuration.s3_configuration.inclusion_prefixes #=> Array
1531
1562
  # resp.configuration.s3_configuration.inclusion_prefixes[0] #=> String
@@ -1711,6 +1742,26 @@ module Aws::Kendra
1711
1742
  # resp.configuration.google_drive_configuration.exclude_user_accounts[0] #=> String
1712
1743
  # resp.configuration.google_drive_configuration.exclude_shared_drives #=> Array
1713
1744
  # resp.configuration.google_drive_configuration.exclude_shared_drives[0] #=> String
1745
+ # resp.configuration.web_crawler_configuration.urls.seed_url_configuration.seed_urls #=> Array
1746
+ # resp.configuration.web_crawler_configuration.urls.seed_url_configuration.seed_urls[0] #=> String
1747
+ # resp.configuration.web_crawler_configuration.urls.seed_url_configuration.web_crawler_mode #=> String, one of "HOST_ONLY", "SUBDOMAINS", "EVERYTHING"
1748
+ # resp.configuration.web_crawler_configuration.urls.site_maps_configuration.site_maps #=> Array
1749
+ # resp.configuration.web_crawler_configuration.urls.site_maps_configuration.site_maps[0] #=> String
1750
+ # resp.configuration.web_crawler_configuration.crawl_depth #=> Integer
1751
+ # resp.configuration.web_crawler_configuration.max_links_per_page #=> Integer
1752
+ # resp.configuration.web_crawler_configuration.max_content_size_per_page_in_mega_bytes #=> Float
1753
+ # resp.configuration.web_crawler_configuration.max_urls_per_minute_crawl_rate #=> Integer
1754
+ # resp.configuration.web_crawler_configuration.url_inclusion_patterns #=> Array
1755
+ # resp.configuration.web_crawler_configuration.url_inclusion_patterns[0] #=> String
1756
+ # resp.configuration.web_crawler_configuration.url_exclusion_patterns #=> Array
1757
+ # resp.configuration.web_crawler_configuration.url_exclusion_patterns[0] #=> String
1758
+ # resp.configuration.web_crawler_configuration.proxy_configuration.host #=> String
1759
+ # resp.configuration.web_crawler_configuration.proxy_configuration.port #=> Integer
1760
+ # resp.configuration.web_crawler_configuration.proxy_configuration.credentials #=> String
1761
+ # resp.configuration.web_crawler_configuration.authentication_configuration.basic_authentication #=> Array
1762
+ # resp.configuration.web_crawler_configuration.authentication_configuration.basic_authentication[0].host #=> String
1763
+ # resp.configuration.web_crawler_configuration.authentication_configuration.basic_authentication[0].port #=> Integer
1764
+ # resp.configuration.web_crawler_configuration.authentication_configuration.basic_authentication[0].credentials #=> String
1714
1765
  # resp.created_at #=> Time
1715
1766
  # resp.updated_at #=> Time
1716
1767
  # resp.description #=> String
@@ -2182,7 +2233,7 @@ module Aws::Kendra
2182
2233
  # resp.summary_items #=> Array
2183
2234
  # resp.summary_items[0].name #=> String
2184
2235
  # resp.summary_items[0].id #=> String
2185
- # resp.summary_items[0].type #=> String, one of "S3", "SHAREPOINT", "DATABASE", "SALESFORCE", "ONEDRIVE", "SERVICENOW", "CUSTOM", "CONFLUENCE", "GOOGLEDRIVE"
2236
+ # resp.summary_items[0].type #=> String, one of "S3", "SHAREPOINT", "DATABASE", "SALESFORCE", "ONEDRIVE", "SERVICENOW", "CUSTOM", "CONFLUENCE", "GOOGLEDRIVE", "WEBCRAWLER"
2186
2237
  # resp.summary_items[0].created_at #=> Time
2187
2238
  # resp.summary_items[0].updated_at #=> Time
2188
2239
  # resp.summary_items[0].status #=> String, one of "CREATING", "DELETING", "FAILED", "UPDATING", "ACTIVE"
@@ -3180,6 +3231,37 @@ module Aws::Kendra
3180
3231
  # exclude_user_accounts: ["UserAccount"],
3181
3232
  # exclude_shared_drives: ["SharedDriveId"],
3182
3233
  # },
3234
+ # web_crawler_configuration: {
3235
+ # urls: { # required
3236
+ # seed_url_configuration: {
3237
+ # seed_urls: ["SeedUrl"], # required
3238
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
3239
+ # },
3240
+ # site_maps_configuration: {
3241
+ # site_maps: ["SiteMap"], # required
3242
+ # },
3243
+ # },
3244
+ # crawl_depth: 1,
3245
+ # max_links_per_page: 1,
3246
+ # max_content_size_per_page_in_mega_bytes: 1.0,
3247
+ # max_urls_per_minute_crawl_rate: 1,
3248
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
3249
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
3250
+ # proxy_configuration: {
3251
+ # host: "Host", # required
3252
+ # port: 1, # required
3253
+ # credentials: "SecretArn",
3254
+ # },
3255
+ # authentication_configuration: {
3256
+ # basic_authentication: [
3257
+ # {
3258
+ # host: "Host", # required
3259
+ # port: 1, # required
3260
+ # credentials: "SecretArn", # required
3261
+ # },
3262
+ # ],
3263
+ # },
3264
+ # },
3183
3265
  # },
3184
3266
  # description: "Description",
3185
3267
  # schedule: "ScanSchedule",
@@ -3509,7 +3591,7 @@ module Aws::Kendra
3509
3591
  params: params,
3510
3592
  config: config)
3511
3593
  context[:gem_name] = 'aws-sdk-kendra'
3512
- context[:gem_version] = '1.26.0'
3594
+ context[:gem_version] = '1.27.0'
3513
3595
  Seahorse::Client::Request.new(handlers, context)
3514
3596
  end
3515
3597
 
@@ -23,6 +23,9 @@ module Aws::Kendra
23
23
  AmazonResourceName = Shapes::StringShape.new(name: 'AmazonResourceName')
24
24
  AttributeFilter = Shapes::StructureShape.new(name: 'AttributeFilter')
25
25
  AttributeFilterList = Shapes::ListShape.new(name: 'AttributeFilterList')
26
+ AuthenticationConfiguration = Shapes::StructureShape.new(name: 'AuthenticationConfiguration')
27
+ BasicAuthenticationConfiguration = Shapes::StructureShape.new(name: 'BasicAuthenticationConfiguration')
28
+ BasicAuthenticationConfigurationList = Shapes::ListShape.new(name: 'BasicAuthenticationConfigurationList')
26
29
  BatchDeleteDocumentRequest = Shapes::StructureShape.new(name: 'BatchDeleteDocumentRequest')
27
30
  BatchDeleteDocumentResponse = Shapes::StructureShape.new(name: 'BatchDeleteDocumentResponse')
28
31
  BatchDeleteDocumentResponseFailedDocument = Shapes::StructureShape.new(name: 'BatchDeleteDocumentResponseFailedDocument')
@@ -69,6 +72,7 @@ module Aws::Kendra
69
72
  ConfluenceVersion = Shapes::StringShape.new(name: 'ConfluenceVersion')
70
73
  ConnectionConfiguration = Shapes::StructureShape.new(name: 'ConnectionConfiguration')
71
74
  ContentType = Shapes::StringShape.new(name: 'ContentType')
75
+ CrawlDepth = Shapes::IntegerShape.new(name: 'CrawlDepth')
72
76
  CreateDataSourceRequest = Shapes::StructureShape.new(name: 'CreateDataSourceRequest')
73
77
  CreateDataSourceResponse = Shapes::StructureShape.new(name: 'CreateDataSourceResponse')
74
78
  CreateFaqRequest = Shapes::StructureShape.new(name: 'CreateFaqRequest')
@@ -172,6 +176,7 @@ module Aws::Kendra
172
176
  Highlight = Shapes::StructureShape.new(name: 'Highlight')
173
177
  HighlightList = Shapes::ListShape.new(name: 'HighlightList')
174
178
  HighlightType = Shapes::StringShape.new(name: 'HighlightType')
179
+ Host = Shapes::StringShape.new(name: 'Host')
175
180
  Importance = Shapes::IntegerShape.new(name: 'Importance')
176
181
  IndexConfigurationSummary = Shapes::StructureShape.new(name: 'IndexConfigurationSummary')
177
182
  IndexConfigurationSummaryList = Shapes::ListShape.new(name: 'IndexConfigurationSummaryList')
@@ -206,12 +211,15 @@ module Aws::Kendra
206
211
  ListThesauriRequest = Shapes::StructureShape.new(name: 'ListThesauriRequest')
207
212
  ListThesauriResponse = Shapes::StructureShape.new(name: 'ListThesauriResponse')
208
213
  Long = Shapes::IntegerShape.new(name: 'Long')
214
+ MaxContentSizePerPageInMegaBytes = Shapes::FloatShape.new(name: 'MaxContentSizePerPageInMegaBytes')
215
+ MaxLinksPerPage = Shapes::IntegerShape.new(name: 'MaxLinksPerPage')
209
216
  MaxResultsIntegerForListDataSourceSyncJobsRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListDataSourceSyncJobsRequest')
210
217
  MaxResultsIntegerForListDataSourcesRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListDataSourcesRequest')
211
218
  MaxResultsIntegerForListFaqsRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListFaqsRequest')
212
219
  MaxResultsIntegerForListIndicesRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListIndicesRequest')
213
220
  MaxResultsIntegerForListQuerySuggestionsBlockLists = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListQuerySuggestionsBlockLists')
214
221
  MaxResultsIntegerForListThesauriRequest = Shapes::IntegerShape.new(name: 'MaxResultsIntegerForListThesauriRequest')
222
+ MaxUrlsPerMinuteCrawlRate = Shapes::IntegerShape.new(name: 'MaxUrlsPerMinuteCrawlRate')
215
223
  MetricValue = Shapes::StringShape.new(name: 'MetricValue')
216
224
  MimeType = Shapes::StringShape.new(name: 'MimeType')
217
225
  MinimumNumberOfQueryingUsers = Shapes::IntegerShape.new(name: 'MinimumNumberOfQueryingUsers')
@@ -224,10 +232,12 @@ module Aws::Kendra
224
232
  OneDriveUserList = Shapes::ListShape.new(name: 'OneDriveUserList')
225
233
  OneDriveUsers = Shapes::StructureShape.new(name: 'OneDriveUsers')
226
234
  Order = Shapes::StringShape.new(name: 'Order')
235
+ Port = Shapes::IntegerShape.new(name: 'Port')
227
236
  Principal = Shapes::StructureShape.new(name: 'Principal')
228
237
  PrincipalList = Shapes::ListShape.new(name: 'PrincipalList')
229
238
  PrincipalName = Shapes::StringShape.new(name: 'PrincipalName')
230
239
  PrincipalType = Shapes::StringShape.new(name: 'PrincipalType')
240
+ ProxyConfiguration = Shapes::StructureShape.new(name: 'ProxyConfiguration')
231
241
  QueryCapacityUnit = Shapes::IntegerShape.new(name: 'QueryCapacityUnit')
232
242
  QueryId = Shapes::StringShape.new(name: 'QueryId')
233
243
  QueryIdentifiersEnclosingOption = Shapes::StringShape.new(name: 'QueryIdentifiersEnclosingOption')
@@ -280,6 +290,9 @@ module Aws::Kendra
280
290
  Search = Shapes::StructureShape.new(name: 'Search')
281
291
  SecretArn = Shapes::StringShape.new(name: 'SecretArn')
282
292
  SecurityGroupIdList = Shapes::ListShape.new(name: 'SecurityGroupIdList')
293
+ SeedUrl = Shapes::StringShape.new(name: 'SeedUrl')
294
+ SeedUrlConfiguration = Shapes::StructureShape.new(name: 'SeedUrlConfiguration')
295
+ SeedUrlList = Shapes::ListShape.new(name: 'SeedUrlList')
283
296
  ServerSideEncryptionConfiguration = Shapes::StructureShape.new(name: 'ServerSideEncryptionConfiguration')
284
297
  ServiceNowAuthenticationType = Shapes::StringShape.new(name: 'ServiceNowAuthenticationType')
285
298
  ServiceNowBuildVersionType = Shapes::StringShape.new(name: 'ServiceNowBuildVersionType')
@@ -293,6 +306,9 @@ module Aws::Kendra
293
306
  SharePointUrlList = Shapes::ListShape.new(name: 'SharePointUrlList')
294
307
  SharePointVersion = Shapes::StringShape.new(name: 'SharePointVersion')
295
308
  SharedDriveId = Shapes::StringShape.new(name: 'SharedDriveId')
309
+ SiteMap = Shapes::StringShape.new(name: 'SiteMap')
310
+ SiteMapsConfiguration = Shapes::StructureShape.new(name: 'SiteMapsConfiguration')
311
+ SiteMapsList = Shapes::ListShape.new(name: 'SiteMapsList')
296
312
  SortOrder = Shapes::StringShape.new(name: 'SortOrder')
297
313
  SortingConfiguration = Shapes::StructureShape.new(name: 'SortingConfiguration')
298
314
  SqlConfiguration = Shapes::StructureShape.new(name: 'SqlConfiguration')
@@ -341,6 +357,7 @@ module Aws::Kendra
341
357
  UpdateQuerySuggestionsConfigRequest = Shapes::StructureShape.new(name: 'UpdateQuerySuggestionsConfigRequest')
342
358
  UpdateThesaurusRequest = Shapes::StructureShape.new(name: 'UpdateThesaurusRequest')
343
359
  Url = Shapes::StringShape.new(name: 'Url')
360
+ Urls = Shapes::StructureShape.new(name: 'Urls')
344
361
  UserAccount = Shapes::StringShape.new(name: 'UserAccount')
345
362
  UserContext = Shapes::StructureShape.new(name: 'UserContext')
346
363
  UserContextPolicy = Shapes::StringShape.new(name: 'UserContextPolicy')
@@ -352,6 +369,8 @@ module Aws::Kendra
352
369
  ValueImportanceMapKey = Shapes::StringShape.new(name: 'ValueImportanceMapKey')
353
370
  VisitorId = Shapes::StringShape.new(name: 'VisitorId')
354
371
  VpcSecurityGroupId = Shapes::StringShape.new(name: 'VpcSecurityGroupId')
372
+ WebCrawlerConfiguration = Shapes::StructureShape.new(name: 'WebCrawlerConfiguration')
373
+ WebCrawlerMode = Shapes::StringShape.new(name: 'WebCrawlerMode')
355
374
 
356
375
  AccessControlListConfiguration.add_member(:key_path, Shapes::ShapeRef.new(shape: S3ObjectKey, location_name: "KeyPath"))
357
376
  AccessControlListConfiguration.struct_class = Types::AccessControlListConfiguration
@@ -386,6 +405,16 @@ module Aws::Kendra
386
405
 
387
406
  AttributeFilterList.member = Shapes::ShapeRef.new(shape: AttributeFilter)
388
407
 
408
+ AuthenticationConfiguration.add_member(:basic_authentication, Shapes::ShapeRef.new(shape: BasicAuthenticationConfigurationList, location_name: "BasicAuthentication"))
409
+ AuthenticationConfiguration.struct_class = Types::AuthenticationConfiguration
410
+
411
+ BasicAuthenticationConfiguration.add_member(:host, Shapes::ShapeRef.new(shape: Host, required: true, location_name: "Host"))
412
+ BasicAuthenticationConfiguration.add_member(:port, Shapes::ShapeRef.new(shape: Port, required: true, location_name: "Port"))
413
+ BasicAuthenticationConfiguration.add_member(:credentials, Shapes::ShapeRef.new(shape: SecretArn, required: true, location_name: "Credentials"))
414
+ BasicAuthenticationConfiguration.struct_class = Types::BasicAuthenticationConfiguration
415
+
416
+ BasicAuthenticationConfigurationList.member = Shapes::ShapeRef.new(shape: BasicAuthenticationConfiguration)
417
+
389
418
  BatchDeleteDocumentRequest.add_member(:index_id, Shapes::ShapeRef.new(shape: IndexId, required: true, location_name: "IndexId"))
390
419
  BatchDeleteDocumentRequest.add_member(:document_id_list, Shapes::ShapeRef.new(shape: DocumentIdList, required: true, location_name: "DocumentIdList"))
391
420
  BatchDeleteDocumentRequest.add_member(:data_source_sync_job_metric_target, Shapes::ShapeRef.new(shape: DataSourceSyncJobMetricTarget, location_name: "DataSourceSyncJobMetricTarget"))
@@ -595,6 +624,7 @@ module Aws::Kendra
595
624
  DataSourceConfiguration.add_member(:service_now_configuration, Shapes::ShapeRef.new(shape: ServiceNowConfiguration, location_name: "ServiceNowConfiguration"))
596
625
  DataSourceConfiguration.add_member(:confluence_configuration, Shapes::ShapeRef.new(shape: ConfluenceConfiguration, location_name: "ConfluenceConfiguration"))
597
626
  DataSourceConfiguration.add_member(:google_drive_configuration, Shapes::ShapeRef.new(shape: GoogleDriveConfiguration, location_name: "GoogleDriveConfiguration"))
627
+ DataSourceConfiguration.add_member(:web_crawler_configuration, Shapes::ShapeRef.new(shape: WebCrawlerConfiguration, location_name: "WebCrawlerConfiguration"))
598
628
  DataSourceConfiguration.struct_class = Types::DataSourceConfiguration
599
629
 
600
630
  DataSourceInclusionsExclusionsStrings.member = Shapes::ShapeRef.new(shape: DataSourceInclusionsExclusionsStringsMember)
@@ -1007,6 +1037,11 @@ module Aws::Kendra
1007
1037
 
1008
1038
  PrincipalList.member = Shapes::ShapeRef.new(shape: Principal)
1009
1039
 
1040
+ ProxyConfiguration.add_member(:host, Shapes::ShapeRef.new(shape: Host, required: true, location_name: "Host"))
1041
+ ProxyConfiguration.add_member(:port, Shapes::ShapeRef.new(shape: Port, required: true, location_name: "Port"))
1042
+ ProxyConfiguration.add_member(:credentials, Shapes::ShapeRef.new(shape: SecretArn, location_name: "Credentials"))
1043
+ ProxyConfiguration.struct_class = Types::ProxyConfiguration
1044
+
1010
1045
  QueryRequest.add_member(:index_id, Shapes::ShapeRef.new(shape: IndexId, required: true, location_name: "IndexId"))
1011
1046
  QueryRequest.add_member(:query_text, Shapes::ShapeRef.new(shape: QueryText, required: true, location_name: "QueryText"))
1012
1047
  QueryRequest.add_member(:attribute_filter, Shapes::ShapeRef.new(shape: AttributeFilter, location_name: "AttributeFilter"))
@@ -1150,6 +1185,12 @@ module Aws::Kendra
1150
1185
 
1151
1186
  SecurityGroupIdList.member = Shapes::ShapeRef.new(shape: VpcSecurityGroupId)
1152
1187
 
1188
+ SeedUrlConfiguration.add_member(:seed_urls, Shapes::ShapeRef.new(shape: SeedUrlList, required: true, location_name: "SeedUrls"))
1189
+ SeedUrlConfiguration.add_member(:web_crawler_mode, Shapes::ShapeRef.new(shape: WebCrawlerMode, location_name: "WebCrawlerMode"))
1190
+ SeedUrlConfiguration.struct_class = Types::SeedUrlConfiguration
1191
+
1192
+ SeedUrlList.member = Shapes::ShapeRef.new(shape: SeedUrl)
1193
+
1153
1194
  ServerSideEncryptionConfiguration.add_member(:kms_key_id, Shapes::ShapeRef.new(shape: KmsKeyId, location_name: "KmsKeyId"))
1154
1195
  ServerSideEncryptionConfiguration.struct_class = Types::ServerSideEncryptionConfiguration
1155
1196
 
@@ -1196,6 +1237,11 @@ module Aws::Kendra
1196
1237
 
1197
1238
  SharePointUrlList.member = Shapes::ShapeRef.new(shape: Url)
1198
1239
 
1240
+ SiteMapsConfiguration.add_member(:site_maps, Shapes::ShapeRef.new(shape: SiteMapsList, required: true, location_name: "SiteMaps"))
1241
+ SiteMapsConfiguration.struct_class = Types::SiteMapsConfiguration
1242
+
1243
+ SiteMapsList.member = Shapes::ShapeRef.new(shape: SiteMap)
1244
+
1199
1245
  SortingConfiguration.add_member(:document_attribute_key, Shapes::ShapeRef.new(shape: DocumentAttributeKey, required: true, location_name: "DocumentAttributeKey"))
1200
1246
  SortingConfiguration.add_member(:sort_order, Shapes::ShapeRef.new(shape: SortOrder, required: true, location_name: "SortOrder"))
1201
1247
  SortingConfiguration.struct_class = Types::SortingConfiguration
@@ -1334,6 +1380,10 @@ module Aws::Kendra
1334
1380
  UpdateThesaurusRequest.add_member(:source_s3_path, Shapes::ShapeRef.new(shape: S3Path, location_name: "SourceS3Path"))
1335
1381
  UpdateThesaurusRequest.struct_class = Types::UpdateThesaurusRequest
1336
1382
 
1383
+ Urls.add_member(:seed_url_configuration, Shapes::ShapeRef.new(shape: SeedUrlConfiguration, location_name: "SeedUrlConfiguration"))
1384
+ Urls.add_member(:site_maps_configuration, Shapes::ShapeRef.new(shape: SiteMapsConfiguration, location_name: "SiteMapsConfiguration"))
1385
+ Urls.struct_class = Types::Urls
1386
+
1337
1387
  UserContext.add_member(:token, Shapes::ShapeRef.new(shape: Token, location_name: "Token"))
1338
1388
  UserContext.struct_class = Types::UserContext
1339
1389
 
@@ -1349,6 +1399,17 @@ module Aws::Kendra
1349
1399
  ValueImportanceMap.key = Shapes::ShapeRef.new(shape: ValueImportanceMapKey)
1350
1400
  ValueImportanceMap.value = Shapes::ShapeRef.new(shape: Importance)
1351
1401
 
1402
+ WebCrawlerConfiguration.add_member(:urls, Shapes::ShapeRef.new(shape: Urls, required: true, location_name: "Urls"))
1403
+ WebCrawlerConfiguration.add_member(:crawl_depth, Shapes::ShapeRef.new(shape: CrawlDepth, location_name: "CrawlDepth"))
1404
+ WebCrawlerConfiguration.add_member(:max_links_per_page, Shapes::ShapeRef.new(shape: MaxLinksPerPage, location_name: "MaxLinksPerPage"))
1405
+ WebCrawlerConfiguration.add_member(:max_content_size_per_page_in_mega_bytes, Shapes::ShapeRef.new(shape: MaxContentSizePerPageInMegaBytes, location_name: "MaxContentSizePerPageInMegaBytes"))
1406
+ WebCrawlerConfiguration.add_member(:max_urls_per_minute_crawl_rate, Shapes::ShapeRef.new(shape: MaxUrlsPerMinuteCrawlRate, location_name: "MaxUrlsPerMinuteCrawlRate"))
1407
+ WebCrawlerConfiguration.add_member(:url_inclusion_patterns, Shapes::ShapeRef.new(shape: DataSourceInclusionsExclusionsStrings, location_name: "UrlInclusionPatterns"))
1408
+ WebCrawlerConfiguration.add_member(:url_exclusion_patterns, Shapes::ShapeRef.new(shape: DataSourceInclusionsExclusionsStrings, location_name: "UrlExclusionPatterns"))
1409
+ WebCrawlerConfiguration.add_member(:proxy_configuration, Shapes::ShapeRef.new(shape: ProxyConfiguration, location_name: "ProxyConfiguration"))
1410
+ WebCrawlerConfiguration.add_member(:authentication_configuration, Shapes::ShapeRef.new(shape: AuthenticationConfiguration, location_name: "AuthenticationConfiguration"))
1411
+ WebCrawlerConfiguration.struct_class = Types::WebCrawlerConfiguration
1412
+
1352
1413
 
1353
1414
  # @api private
1354
1415
  API = Seahorse::Model::Api.new.tap do |api|
@@ -492,6 +492,87 @@ module Aws::Kendra
492
492
  include Aws::Structure
493
493
  end
494
494
 
495
+ # Provides the configuration information to connect to websites that
496
+ # require user authentication.
497
+ #
498
+ # @note When making an API call, you may pass AuthenticationConfiguration
499
+ # data as a hash:
500
+ #
501
+ # {
502
+ # basic_authentication: [
503
+ # {
504
+ # host: "Host", # required
505
+ # port: 1, # required
506
+ # credentials: "SecretArn", # required
507
+ # },
508
+ # ],
509
+ # }
510
+ #
511
+ # @!attribute [rw] basic_authentication
512
+ # The list of configuration information that's required to connect to
513
+ # and crawl a website host using basic authentication credentials.
514
+ #
515
+ # The list includes the name and port number of the website host.
516
+ # @return [Array<Types::BasicAuthenticationConfiguration>]
517
+ #
518
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/AuthenticationConfiguration AWS API Documentation
519
+ #
520
+ class AuthenticationConfiguration < Struct.new(
521
+ :basic_authentication)
522
+ SENSITIVE = []
523
+ include Aws::Structure
524
+ end
525
+
526
+ # Provides the configuration information to connect to websites that
527
+ # require basic user authentication.
528
+ #
529
+ # @note When making an API call, you may pass BasicAuthenticationConfiguration
530
+ # data as a hash:
531
+ #
532
+ # {
533
+ # host: "Host", # required
534
+ # port: 1, # required
535
+ # credentials: "SecretArn", # required
536
+ # }
537
+ #
538
+ # @!attribute [rw] host
539
+ # The name of the website host you want to connect to using
540
+ # authentication credentials.
541
+ #
542
+ # For example, the host name of https://a.example.com/page1.html is
543
+ # "a.example.com".
544
+ # @return [String]
545
+ #
546
+ # @!attribute [rw] port
547
+ # The port number of the website host you want to connect to using
548
+ # authentication credentials.
549
+ #
550
+ # For example, the port for https://a.example.com/page1.html is 443,
551
+ # the standard port for HTTPS.
552
+ # @return [Integer]
553
+ #
554
+ # @!attribute [rw] credentials
555
+ # Your secret ARN, which you can create in [AWS Secrets Manager][1]
556
+ #
557
+ # You use a secret if basic authentication credentials are required to
558
+ # connect to a website. The secret stores your credentials of user
559
+ # name and password.
560
+ #
561
+ #
562
+ #
563
+ # [1]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html
564
+ # @return [String]
565
+ #
566
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/BasicAuthenticationConfiguration AWS API Documentation
567
+ #
568
+ class BasicAuthenticationConfiguration < Struct.new(
569
+ :host,
570
+ :port,
571
+ :credentials)
572
+ SENSITIVE = []
573
+ include Aws::Structure
574
+ end
575
+
495
576
  # @note When making an API call, you may pass BatchDeleteDocumentRequest
496
577
  # data as a hash:
497
578
  #
@@ -801,8 +882,9 @@ module Aws::Kendra
801
882
  include Aws::Structure
802
883
  end
803
884
 
804
- # Specifies capacity units configured for your index. You can add and
805
- # remove capacity units to tune an index to your requirements.
885
+ # Specifies capacity units configured for your enterprise edition index.
886
+ # You can add and remove capacity units to tune an index to your
887
+ # requirements.
806
888
  #
807
889
  # @note When making an API call, you may pass CapacityUnitsConfiguration
808
890
  # data as a hash:
@@ -813,14 +895,28 @@ module Aws::Kendra
813
895
  # }
814
896
  #
815
897
  # @!attribute [rw] storage_capacity_units
816
- # The amount of extra storage capacity for an index. Each capacity
817
- # unit provides 150 Gb of storage space or 500,000 documents,
818
- # whichever is reached first.
898
+ # The amount of extra storage capacity for an index. A single capacity
899
+ # unit for an index provides 150 GB of storage space or 500,000
900
+ # documents, whichever is reached first.
819
901
  # @return [Integer]
820
902
  #
821
903
  # @!attribute [rw] query_capacity_units
822
- # The amount of extra query capacity for an index. Each capacity unit
823
- # provides 0.5 queries per second and 40,000 queries per day.
904
+ # The amount of extra query capacity for an index and
905
+ # [GetQuerySuggestions][1] capacity.
906
+ #
907
+ # A single extra capacity unit for an index provides 0.5 queries per
908
+ # second or approximately 40,000 queries per day.
909
+ #
910
+ # `GetQuerySuggestions` capacity is 5 times the provisioned query
911
+ # capacity for an index. For example, the base capacity for an index
912
+ # is 0.5 queries per second, so GetQuerySuggestions capacity is 2.5
913
+ # calls per second. If adding another 0.5 queries per second to total
914
+ # 1 queries per second for an index, the `GetQuerySuggestions`
915
+ # capacity is 5 calls per second.
916
+ #
917
+ #
918
+ #
919
+ # [1]: https://docs.aws.amazon.com/kendra/latest/dg/API_GetQuerySuggestions.html
824
920
  # @return [Integer]
825
921
  #
826
922
  # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/CapacityUnitsConfiguration AWS API Documentation
@@ -1499,7 +1595,7 @@ module Aws::Kendra
1499
1595
  # {
1500
1596
  # name: "DataSourceName", # required
1501
1597
  # index_id: "IndexId", # required
1502
- # type: "S3", # required, accepts S3, SHAREPOINT, DATABASE, SALESFORCE, ONEDRIVE, SERVICENOW, CUSTOM, CONFLUENCE, GOOGLEDRIVE
1598
+ # type: "S3", # required, accepts S3, SHAREPOINT, DATABASE, SALESFORCE, ONEDRIVE, SERVICENOW, CUSTOM, CONFLUENCE, GOOGLEDRIVE, WEBCRAWLER
1503
1599
  # configuration: {
1504
1600
  # s3_configuration: {
1505
1601
  # bucket_name: "S3BucketName", # required
@@ -1762,6 +1858,37 @@ module Aws::Kendra
1762
1858
  # exclude_user_accounts: ["UserAccount"],
1763
1859
  # exclude_shared_drives: ["SharedDriveId"],
1764
1860
  # },
1861
+ # web_crawler_configuration: {
1862
+ # urls: { # required
1863
+ # seed_url_configuration: {
1864
+ # seed_urls: ["SeedUrl"], # required
1865
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
1866
+ # },
1867
+ # site_maps_configuration: {
1868
+ # site_maps: ["SiteMap"], # required
1869
+ # },
1870
+ # },
1871
+ # crawl_depth: 1,
1872
+ # max_links_per_page: 1,
1873
+ # max_content_size_per_page_in_mega_bytes: 1.0,
1874
+ # max_urls_per_minute_crawl_rate: 1,
1875
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
1876
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
1877
+ # proxy_configuration: {
1878
+ # host: "Host", # required
1879
+ # port: 1, # required
1880
+ # credentials: "SecretArn",
1881
+ # },
1882
+ # authentication_configuration: {
1883
+ # basic_authentication: [
1884
+ # {
1885
+ # host: "Host", # required
1886
+ # port: 1, # required
1887
+ # credentials: "SecretArn", # required
1888
+ # },
1889
+ # ],
1890
+ # },
1891
+ # },
1765
1892
  # },
1766
1893
  # description: "Description",
1767
1894
  # schedule: "ScanSchedule",
@@ -2570,6 +2697,37 @@ module Aws::Kendra
2570
2697
  # exclude_user_accounts: ["UserAccount"],
2571
2698
  # exclude_shared_drives: ["SharedDriveId"],
2572
2699
  # },
2700
+ # web_crawler_configuration: {
2701
+ # urls: { # required
2702
+ # seed_url_configuration: {
2703
+ # seed_urls: ["SeedUrl"], # required
2704
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
2705
+ # },
2706
+ # site_maps_configuration: {
2707
+ # site_maps: ["SiteMap"], # required
2708
+ # },
2709
+ # },
2710
+ # crawl_depth: 1,
2711
+ # max_links_per_page: 1,
2712
+ # max_content_size_per_page_in_mega_bytes: 1.0,
2713
+ # max_urls_per_minute_crawl_rate: 1,
2714
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
2715
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
2716
+ # proxy_configuration: {
2717
+ # host: "Host", # required
2718
+ # port: 1, # required
2719
+ # credentials: "SecretArn",
2720
+ # },
2721
+ # authentication_configuration: {
2722
+ # basic_authentication: [
2723
+ # {
2724
+ # host: "Host", # required
2725
+ # port: 1, # required
2726
+ # credentials: "SecretArn", # required
2727
+ # },
2728
+ # ],
2729
+ # },
2730
+ # },
2573
2731
  # }
2574
2732
  #
2575
2733
  # @!attribute [rw] s3_configuration
@@ -2612,6 +2770,11 @@ module Aws::Kendra
2612
2770
  # Drive.
2613
2771
  # @return [Types::GoogleDriveConfiguration]
2614
2772
  #
2773
+ # @!attribute [rw] web_crawler_configuration
2774
+ # Provides the configuration information required for Amazon Kendra
2775
+ # web crawler.
2776
+ # @return [Types::WebCrawlerConfiguration]
2777
+ #
2615
2778
  # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/DataSourceConfiguration AWS API Documentation
2616
2779
  #
2617
2780
  class DataSourceConfiguration < Struct.new(
@@ -2622,7 +2785,8 @@ module Aws::Kendra
2622
2785
  :one_drive_configuration,
2623
2786
  :service_now_configuration,
2624
2787
  :confluence_configuration,
2625
- :google_drive_configuration)
2788
+ :google_drive_configuration,
2789
+ :web_crawler_configuration)
2626
2790
  SENSITIVE = []
2627
2791
  include Aws::Structure
2628
2792
  end
@@ -5048,6 +5212,57 @@ module Aws::Kendra
5048
5212
  include Aws::Structure
5049
5213
  end
5050
5214
 
5215
+ # Provides the configuration information for a web proxy to connect to
5216
+ # website hosts.
5217
+ #
5218
+ # @note When making an API call, you may pass ProxyConfiguration
5219
+ # data as a hash:
5220
+ #
5221
+ # {
5222
+ # host: "Host", # required
5223
+ # port: 1, # required
5224
+ # credentials: "SecretArn",
5225
+ # }
5226
+ #
5227
+ # @!attribute [rw] host
5228
+ # The name of the website host you want to connect to via a web proxy
5229
+ # server.
5230
+ #
5231
+ # For example, the host name of https://a.example.com/page1.html is
5232
+ # "a.example.com".
5233
+ # @return [String]
5234
+ #
5235
+ # @!attribute [rw] port
5236
+ # The port number of the website host you want to connect to via a web
5237
+ # proxy server.
5238
+ #
5239
+ # For example, the port for https://a.example.com/page1.html is 443,
5240
+ # the standard port for HTTPS.
5241
+ # @return [Integer]
5242
+ #
5243
+ # @!attribute [rw] credentials
5244
+ # Your secret ARN, which you can create in [AWS Secrets Manager][1]
5245
+ #
5246
+ # The credentials are optional. You use a secret if web proxy
5247
+ # credentials are required to connect to a website host. Amazon Kendra
5248
+ # currently support basic authentication to connect to a web proxy
5249
+ # server. The secret stores your credentials.
5250
+ #
5251
+ #
5252
+ #
5253
+ # [1]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html
5254
+ # @return [String]
5255
+ #
5256
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/ProxyConfiguration AWS API Documentation
5257
+ #
5258
+ class ProxyConfiguration < Struct.new(
5259
+ :host,
5260
+ :port,
5261
+ :credentials)
5262
+ SENSITIVE = []
5263
+ include Aws::Structure
5264
+ end
5265
+
5051
5266
  # @note When making an API call, you may pass QueryRequest
5052
5267
  # data as a hash:
5053
5268
  #
@@ -6253,6 +6468,59 @@ module Aws::Kendra
6253
6468
  include Aws::Structure
6254
6469
  end
6255
6470
 
6471
+ # Provides the configuration information of the seed or starting point
6472
+ # URLs to crawl.
6473
+ #
6474
+ # *When selecting websites to index, you must adhere to the [Amazon
6475
+ # Acceptable Use Policy][1] and all other Amazon terms. Remember that
6476
+ # you must only use the Amazon Kendra web crawler to index your own
6477
+ # webpages, or webpages that you have authorization to index.*
6478
+ #
6479
+ #
6480
+ #
6481
+ # [1]: https://aws.amazon.com/aup/
6482
+ #
6483
+ # @note When making an API call, you may pass SeedUrlConfiguration
6484
+ # data as a hash:
6485
+ #
6486
+ # {
6487
+ # seed_urls: ["SeedUrl"], # required
6488
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
6489
+ # }
6490
+ #
6491
+ # @!attribute [rw] seed_urls
6492
+ # The list of seed or starting point URLs of the websites you want to
6493
+ # crawl.
6494
+ #
6495
+ # The list can include a maximum of 100 seed URLs.
6496
+ # @return [Array<String>]
6497
+ #
6498
+ # @!attribute [rw] web_crawler_mode
6499
+ # You can choose one of the following modes:
6500
+ #
6501
+ # * `HOST_ONLY` – crawl only the website host names. For example, if
6502
+ # the seed URL is "abc.example.com", then only URLs with host name
6503
+ # "abc.example.com" are crawled.
6504
+ #
6505
+ # * `SUBDOMAINS` – crawl the website host names with subdomains. For
6506
+ # example, if the seed URL is "abc.example.com", then
6507
+ # "a.abc.example.com" and "b.abc.example.com" are also crawled.
6508
+ #
6509
+ # * `EVERYTHING` – crawl the website host names with subdomains and
6510
+ # other domains that the webpages link to.
6511
+ #
6512
+ # The default mode is set to `HOST_ONLY`.
6513
+ # @return [String]
6514
+ #
6515
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/SeedUrlConfiguration AWS API Documentation
6516
+ #
6517
+ class SeedUrlConfiguration < Struct.new(
6518
+ :seed_urls,
6519
+ :web_crawler_mode)
6520
+ SENSITIVE = []
6521
+ include Aws::Structure
6522
+ end
6523
+
6256
6524
  # Provides the identifier of the AWS KMS customer master key (CMK) used
6257
6525
  # to encrypt data indexed by Amazon Kendra. Amazon Kendra doesn't
6258
6526
  # support asymmetric CMKs.
@@ -6675,6 +6943,38 @@ module Aws::Kendra
6675
6943
  include Aws::Structure
6676
6944
  end
6677
6945
 
6946
+ # Provides the configuration information of the sitemap URLs to crawl.
6947
+ #
6948
+ # *When selecting websites to index, you must adhere to the [Amazon
6949
+ # Acceptable Use Policy][1] and all other Amazon terms. Remember that
6950
+ # you must only use the Amazon Kendra web crawler to index your own
6951
+ # webpages, or webpages that you have authorization to index.*
6952
+ #
6953
+ #
6954
+ #
6955
+ # [1]: https://aws.amazon.com/aup/
6956
+ #
6957
+ # @note When making an API call, you may pass SiteMapsConfiguration
6958
+ # data as a hash:
6959
+ #
6960
+ # {
6961
+ # site_maps: ["SiteMap"], # required
6962
+ # }
6963
+ #
6964
+ # @!attribute [rw] site_maps
6965
+ # The list of sitemap URLs of the websites you want to crawl.
6966
+ #
6967
+ # The list can include a maximum of three sitemap URLs.
6968
+ # @return [Array<String>]
6969
+ #
6970
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/SiteMapsConfiguration AWS API Documentation
6971
+ #
6972
+ class SiteMapsConfiguration < Struct.new(
6973
+ :site_maps)
6974
+ SENSITIVE = []
6975
+ include Aws::Structure
6976
+ end
6977
+
6678
6978
  # Specifies the document attribute to use to sort the response to a
6679
6979
  # Amazon Kendra query. You can specify a single attribute for sorting.
6680
6980
  # The attribute must have the `Sortable` flag set to `true`, otherwise
@@ -7476,6 +7776,37 @@ module Aws::Kendra
7476
7776
  # exclude_user_accounts: ["UserAccount"],
7477
7777
  # exclude_shared_drives: ["SharedDriveId"],
7478
7778
  # },
7779
+ # web_crawler_configuration: {
7780
+ # urls: { # required
7781
+ # seed_url_configuration: {
7782
+ # seed_urls: ["SeedUrl"], # required
7783
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
7784
+ # },
7785
+ # site_maps_configuration: {
7786
+ # site_maps: ["SiteMap"], # required
7787
+ # },
7788
+ # },
7789
+ # crawl_depth: 1,
7790
+ # max_links_per_page: 1,
7791
+ # max_content_size_per_page_in_mega_bytes: 1.0,
7792
+ # max_urls_per_minute_crawl_rate: 1,
7793
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
7794
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
7795
+ # proxy_configuration: {
7796
+ # host: "Host", # required
7797
+ # port: 1, # required
7798
+ # credentials: "SecretArn",
7799
+ # },
7800
+ # authentication_configuration: {
7801
+ # basic_authentication: [
7802
+ # {
7803
+ # host: "Host", # required
7804
+ # port: 1, # required
7805
+ # credentials: "SecretArn", # required
7806
+ # },
7807
+ # ],
7808
+ # },
7809
+ # },
7479
7810
  # },
7480
7811
  # description: "Description",
7481
7812
  # schedule: "ScanSchedule",
@@ -7838,6 +8169,58 @@ module Aws::Kendra
7838
8169
  include Aws::Structure
7839
8170
  end
7840
8171
 
8172
+ # Provides the configuration information of the URLs to crawl.
8173
+ #
8174
+ # *When selecting websites to index, you must adhere to the [Amazon
8175
+ # Acceptable Use Policy][1] and all other Amazon terms. Remember that
8176
+ # you must only use the Amazon Kendra web crawler to index your own
8177
+ # webpages, or webpages that you have authorization to index.*
8178
+ #
8179
+ #
8180
+ #
8181
+ # [1]: https://aws.amazon.com/aup/
8182
+ #
8183
+ # @note When making an API call, you may pass Urls
8184
+ # data as a hash:
8185
+ #
8186
+ # {
8187
+ # seed_url_configuration: {
8188
+ # seed_urls: ["SeedUrl"], # required
8189
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
8190
+ # },
8191
+ # site_maps_configuration: {
8192
+ # site_maps: ["SiteMap"], # required
8193
+ # },
8194
+ # }
8195
+ #
8196
+ # @!attribute [rw] seed_url_configuration
8197
+ # Provides the configuration of the seed or starting point URLs of the
8198
+ # websites you want to crawl.
8199
+ #
8200
+ # You can choose to crawl only the website host names, or the website
8201
+ # host names with subdomains, or the website host names with
8202
+ # subdomains and other domains that the webpages link to.
8203
+ #
8204
+ # You can list up to 100 seed URLs.
8205
+ # @return [Types::SeedUrlConfiguration]
8206
+ #
8207
+ # @!attribute [rw] site_maps_configuration
8208
+ # Provides the configuration of the sitemap URLs of the websites you
8209
+ # want to crawl.
8210
+ #
8211
+ # Only URLs belonging to the same website host names are crawled. You
8212
+ # can list up to three sitemap URLs.
8213
+ # @return [Types::SiteMapsConfiguration]
8214
+ #
8215
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/Urls AWS API Documentation
8216
+ #
8217
+ class Urls < Struct.new(
8218
+ :seed_url_configuration,
8219
+ :site_maps_configuration)
8220
+ SENSITIVE = []
8221
+ include Aws::Structure
8222
+ end
8223
+
7841
8224
  # Provides information about the user context for a Amazon Kendra index.
7842
8225
  #
7843
8226
  # @note When making an API call, you may pass UserContext
@@ -7908,5 +8291,167 @@ module Aws::Kendra
7908
8291
  include Aws::Structure
7909
8292
  end
7910
8293
 
8294
+ # Provides the configuration information required for Amazon Kendra web
8295
+ # crawler.
8296
+ #
8297
+ # @note When making an API call, you may pass WebCrawlerConfiguration
8298
+ # data as a hash:
8299
+ #
8300
+ # {
8301
+ # urls: { # required
8302
+ # seed_url_configuration: {
8303
+ # seed_urls: ["SeedUrl"], # required
8304
+ # web_crawler_mode: "HOST_ONLY", # accepts HOST_ONLY, SUBDOMAINS, EVERYTHING
8305
+ # },
8306
+ # site_maps_configuration: {
8307
+ # site_maps: ["SiteMap"], # required
8308
+ # },
8309
+ # },
8310
+ # crawl_depth: 1,
8311
+ # max_links_per_page: 1,
8312
+ # max_content_size_per_page_in_mega_bytes: 1.0,
8313
+ # max_urls_per_minute_crawl_rate: 1,
8314
+ # url_inclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
8315
+ # url_exclusion_patterns: ["DataSourceInclusionsExclusionsStringsMember"],
8316
+ # proxy_configuration: {
8317
+ # host: "Host", # required
8318
+ # port: 1, # required
8319
+ # credentials: "SecretArn",
8320
+ # },
8321
+ # authentication_configuration: {
8322
+ # basic_authentication: [
8323
+ # {
8324
+ # host: "Host", # required
8325
+ # port: 1, # required
8326
+ # credentials: "SecretArn", # required
8327
+ # },
8328
+ # ],
8329
+ # },
8330
+ # }
8331
+ #
8332
+ # @!attribute [rw] urls
8333
+ # Specifies the seed or starting point URLs of the websites or the
8334
+ # sitemap URLs of the websites you want to crawl.
8335
+ #
8336
+ # You can include website subdomains. You can list up to 100 seed URLs
8337
+ # and up to three sitemap URLs.
8338
+ #
8339
+ # *When selecting websites to index, you must adhere to the [Amazon
8340
+ # Acceptable Use Policy][1] and all other Amazon terms. Remember that
8341
+ # you must only use the Amazon Kendra web crawler to index your own
8342
+ # webpages, or webpages that you have authorization to index.*
8343
+ #
8344
+ #
8345
+ #
8346
+ # [1]: https://aws.amazon.com/aup/
8347
+ # @return [Types::Urls]
8348
+ #
8349
+ # @!attribute [rw] crawl_depth
8350
+ # Specifies the number of levels in a website that you want to crawl.
8351
+ #
8352
+ # The first level begins from the website seed or starting point URL.
8353
+ # For example, if a website has 3 levels – index level (i.e. seed in
8354
+ # this example), sections level, and subsections level – and you are
8355
+ # only interested in crawling information up to the sections level
8356
+ # (i.e. levels 0-1), you can set your depth to 1.
8357
+ #
8358
+ # The default crawl depth is set to 2.
8359
+ # @return [Integer]
8360
+ #
8361
+ # @!attribute [rw] max_links_per_page
8362
+ # The maximum number of URLs on a webpage to include when crawling a
8363
+ # website. This number is per webpage.
8364
+ #
8365
+ # As a website’s webpages are crawled, any URLs the webpages link to
8366
+ # are also crawled. URLs on a webpage are crawled in order of
8367
+ # appearance.
8368
+ #
8369
+ # The default maximum links per page is 100.
8370
+ # @return [Integer]
8371
+ #
8372
+ # @!attribute [rw] max_content_size_per_page_in_mega_bytes
8373
+ # The maximum size (in MB) of a webpage or attachment to crawl.
8374
+ #
8375
+ # Files larger than this size (in MB) are skipped/not crawled.
8376
+ #
8377
+ # The default maximum size of a webpage or attachment is set to 50 MB.
8378
+ # @return [Float]
8379
+ #
8380
+ # @!attribute [rw] max_urls_per_minute_crawl_rate
8381
+ # The maximum number of URLs crawled per website host per minute.
8382
+ #
8383
+ # A minimum of one URL is required.
8384
+ #
8385
+ # The default maximum number of URLs crawled per website host per
8386
+ # minute is 300.
8387
+ # @return [Integer]
8388
+ #
8389
+ # @!attribute [rw] url_inclusion_patterns
8390
+ # The regular expression pattern to include certain URLs to crawl.
8391
+ #
8392
+ # If there is a regular expression pattern to exclude certain URLs
8393
+ # that conflicts with the include pattern, the exclude pattern takes
8394
+ # precedence.
8395
+ # @return [Array<String>]
8396
+ #
8397
+ # @!attribute [rw] url_exclusion_patterns
8398
+ # The regular expression pattern to exclude certain URLs to crawl.
8399
+ #
8400
+ # If there is a regular expression pattern to include certain URLs
8401
+ # that conflicts with the exclude pattern, the exclude pattern takes
8402
+ # precedence.
8403
+ # @return [Array<String>]
8404
+ #
8405
+ # @!attribute [rw] proxy_configuration
8406
+ # Provides configuration information required to connect to your
8407
+ # internal websites via a web proxy.
8408
+ #
8409
+ # You must provide the website host name and port number. For example,
8410
+ # the host name of https://a.example.com/page1.html is
8411
+ # "a.example.com" and the port is 443, the standard port for HTTPS.
8412
+ #
8413
+ # Web proxy credentials are optional and you can use them to connect
8414
+ # to a web proxy server that requires basic authentication. To store
8415
+ # web proxy credentials, you use a secret in [AWS Secrets Manager][1].
8416
+ #
8417
+ #
8418
+ #
8419
+ # [1]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html
8420
+ # @return [Types::ProxyConfiguration]
8421
+ #
8422
+ # @!attribute [rw] authentication_configuration
8423
+ # Provides configuration information required to connect to websites
8424
+ # using authentication.
8425
+ #
8426
+ # You can connect to websites using basic authentication of user name
8427
+ # and password.
8428
+ #
8429
+ # You must provide the website host name and port number. For example,
8430
+ # the host name of https://a.example.com/page1.html is
8431
+ # "a.example.com" and the port is 443, the standard port for HTTPS.
8432
+ # You use a secret in [AWS Secrets Manager][1] to store your
8433
+ # authentication credentials.
8434
+ #
8435
+ #
8436
+ #
8437
+ # [1]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html
8438
+ # @return [Types::AuthenticationConfiguration]
8439
+ #
8440
+ # @see http://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/WebCrawlerConfiguration AWS API Documentation
8441
+ #
8442
+ class WebCrawlerConfiguration < Struct.new(
8443
+ :urls,
8444
+ :crawl_depth,
8445
+ :max_links_per_page,
8446
+ :max_content_size_per_page_in_mega_bytes,
8447
+ :max_urls_per_minute_crawl_rate,
8448
+ :url_inclusion_patterns,
8449
+ :url_exclusion_patterns,
8450
+ :proxy_configuration,
8451
+ :authentication_configuration)
8452
+ SENSITIVE = []
8453
+ include Aws::Structure
8454
+ end
8455
+
7911
8456
  end
7912
8457
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aws-sdk-kendra
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.26.0
4
+ version: 1.27.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Amazon Web Services
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-09 00:00:00.000000000 Z
11
+ date: 2021-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-core