carbon_ruby_sdk 0.2.14 → 0.2.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +19 -7
- data/lib/carbon_ruby_sdk/api/integrations_api.rb +20 -12
- data/lib/carbon_ruby_sdk/api/utilities_api.rb +6 -2
- data/lib/carbon_ruby_sdk/models/o_auth_url_request.rb +2 -2
- data/lib/carbon_ruby_sdk/models/sitemap_scrape_request.rb +18 -5
- data/lib/carbon_ruby_sdk/models/sync_files_request.rb +1 -1
- data/lib/carbon_ruby_sdk/models/sync_options.rb +1 -1
- data/lib/carbon_ruby_sdk/version.rb +1 -1
- data/spec/api/integrations_api_spec.rb +2 -2
- data/spec/models/sitemap_scrape_request_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06d33414be65f6bd0cd426ab4a944e8de9dcc7dca64b35d4ea90fa2ce3bf0474
|
4
|
+
data.tar.gz: 3a9d7272878fa2303e3bd0f2cf41796646f9f1689c034a5a16770db3e3fdcc02
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 41d5d17eb9b5f4bf0dedccadba7b29d796baddca4fe74bf4f1844b5bb52b440242bb917d6c49d074ca83598097b57483fe2cc5dc6a1acaade61cc9f5c1fb9813
|
7
|
+
data.tar.gz: 19f659c16c5845b371f99d82daf3cb8d37a5ffa88b7166e9afb7386a5e2991c2caed36c1d494a6a2237be4f0e0754b01b7bdd7e7ec6e8410f25c65754e20a436
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
Connect external data to LLMs, no matter the source.
|
8
8
|
|
9
|
-
[![npm](https://img.shields.io/badge/gem-v0.2.
|
9
|
+
[![npm](https://img.shields.io/badge/gem-v0.2.15-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.15)
|
10
10
|
|
11
11
|
</div>
|
12
12
|
|
@@ -91,7 +91,7 @@ Connect external data to LLMs, no matter the source.
|
|
91
91
|
Add to Gemfile:
|
92
92
|
|
93
93
|
```ruby
|
94
|
-
gem 'carbon_ruby_sdk', '~> 0.2.
|
94
|
+
gem 'carbon_ruby_sdk', '~> 0.2.15'
|
95
95
|
```
|
96
96
|
|
97
97
|
## Getting Started<a id="getting-started"></a>
|
@@ -1300,7 +1300,7 @@ result = carbon.integrations.connect_data_source(
|
|
1300
1300
|
"prepend_filename_to_chunks" => false,
|
1301
1301
|
"sync_files_on_connection" => true,
|
1302
1302
|
"set_page_as_boundary" => false,
|
1303
|
-
"request_id" => "
|
1303
|
+
"request_id" => "07b02a24-9429-4a3c-aa98-27ff63503082",
|
1304
1304
|
"enable_file_picker" => true,
|
1305
1305
|
"sync_source_items" => true,
|
1306
1306
|
"incremental_sync" => false,
|
@@ -1520,7 +1520,7 @@ result = carbon.integrations.get_oauth_url(
|
|
1520
1520
|
set_page_as_boundary: false,
|
1521
1521
|
data_source_id: 1,
|
1522
1522
|
connecting_new_account: false,
|
1523
|
-
request_id: "
|
1523
|
+
request_id: "d29ec177-bbc6-43c7-9b16-0ee340804c99",
|
1524
1524
|
use_ocr: false,
|
1525
1525
|
parse_pdf_tables_with_ocr: false,
|
1526
1526
|
enable_file_picker: true,
|
@@ -1581,7 +1581,7 @@ Enable OCR for files that support it. Supported formats: pdf
|
|
1581
1581
|
##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
|
1582
1582
|
##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
|
1583
1583
|
Enable integration's file picker for sources that support it. Supported sources:
|
1584
|
-
|
1584
|
+
BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
|
1585
1585
|
|
1586
1586
|
##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
|
1587
1587
|
Enabling this flag will fetch all available content from the source to be listed
|
@@ -1608,6 +1608,9 @@ other data sources.
|
|
1608
1608
|
|
1609
1609
|
|
1610
1610
|
### `carbon.integrations.list_confluence_pages`<a id="carbonintegrationslist_confluence_pages"></a>
|
1611
|
+
![Deprecated](https://img.shields.io/badge/deprecated-yellow)
|
1612
|
+
|
1613
|
+
This endpoint has been deprecated. Use /integrations/items/list instead.
|
1611
1614
|
|
1612
1615
|
To begin listing a user's Confluence pages, at least a `data_source_id` of a connected
|
1613
1616
|
Confluence account must be specified. This base request returns a list of root pages for
|
@@ -1858,6 +1861,9 @@ p result
|
|
1858
1861
|
|
1859
1862
|
|
1860
1863
|
### `carbon.integrations.sync_confluence`<a id="carbonintegrationssync_confluence"></a>
|
1864
|
+
![Deprecated](https://img.shields.io/badge/deprecated-yellow)
|
1865
|
+
|
1866
|
+
This endpoint has been deprecated. Use /integrations/files/sync instead.
|
1861
1867
|
|
1862
1868
|
After listing pages in a user's Confluence account, the set of selected page `ids` and the
|
1863
1869
|
connected account's `data_source_id` can be passed into this endpoint to sync them into
|
@@ -1881,7 +1887,7 @@ result = carbon.integrations.sync_confluence(
|
|
1881
1887
|
prepend_filename_to_chunks: false,
|
1882
1888
|
max_items_per_chunk: 1,
|
1883
1889
|
set_page_as_boundary: false,
|
1884
|
-
request_id: "
|
1890
|
+
request_id: "ca60b474-8b43-4b44-9deb-adb701e40610",
|
1885
1891
|
use_ocr: false,
|
1886
1892
|
parse_pdf_tables_with_ocr: false,
|
1887
1893
|
incremental_sync: false,
|
@@ -1986,7 +1992,7 @@ result = carbon.integrations.sync_files(
|
|
1986
1992
|
prepend_filename_to_chunks: false,
|
1987
1993
|
max_items_per_chunk: 1,
|
1988
1994
|
set_page_as_boundary: false,
|
1989
|
-
request_id: "
|
1995
|
+
request_id: "ca60b474-8b43-4b44-9deb-adb701e40610",
|
1990
1996
|
use_ocr: false,
|
1991
1997
|
parse_pdf_tables_with_ocr: false,
|
1992
1998
|
incremental_sync: false,
|
@@ -2966,6 +2972,7 @@ result = carbon.utilities.scrape_sitemap(
|
|
2966
2972
|
embedding_model: "OPENAI",
|
2967
2973
|
url_paths_to_include: [],
|
2968
2974
|
url_paths_to_exclude: [],
|
2975
|
+
urls_to_scrape: [],
|
2969
2976
|
)
|
2970
2977
|
p result
|
2971
2978
|
```
|
@@ -2995,6 +3002,11 @@ URL subpaths or directories that you want to exclude. For example if you want to
|
|
2995
3002
|
exclude URLs that start with /questions in stackoverflow.com, you will add
|
2996
3003
|
/questions/ in this input
|
2997
3004
|
|
3005
|
+
##### urls_to_scrape: Array<`String`><a id="urls_to_scrape-array"></a>
|
3006
|
+
You can submit a subset of URLs from the sitemap that should be scraped. To get
|
3007
|
+
the list of URLs, you can check out /process_sitemap endpoint. If left empty,
|
3008
|
+
all URLs from the sitemap will be scraped.
|
3009
|
+
|
2998
3010
|
#### 🌐 Endpoint<a id="🌐-endpoint"></a>
|
2999
3011
|
|
3000
3012
|
`/scrape_sitemap` `POST`
|
@@ -653,13 +653,13 @@ module Carbon
|
|
653
653
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
654
654
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
655
655
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
656
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
656
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
|
657
657
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
658
658
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX, INTERCOM, GMAIL, OUTLOOK. It will be ignored for other data sources.
|
659
659
|
# @param file_sync_config [FileSyncConfigNullable]
|
660
660
|
# @param body [OAuthURLRequest]
|
661
661
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
662
|
-
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
662
|
+
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'd29ec177-bbc6-43c7-9b16-0ee340804c99', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
663
663
|
_body = {}
|
664
664
|
_body[:tags] = tags if tags != SENTINEL
|
665
665
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -721,13 +721,13 @@ module Carbon
|
|
721
721
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
722
722
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
723
723
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
724
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
724
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
|
725
725
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
726
726
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX, INTERCOM, GMAIL, OUTLOOK. It will be ignored for other data sources.
|
727
727
|
# @param file_sync_config [FileSyncConfigNullable]
|
728
728
|
# @param body [OAuthURLRequest]
|
729
729
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
730
|
-
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
730
|
+
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'd29ec177-bbc6-43c7-9b16-0ee340804c99', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
731
731
|
_body = {}
|
732
732
|
_body[:tags] = tags if tags != SENTINEL
|
733
733
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -830,6 +830,8 @@ module Carbon
|
|
830
830
|
|
831
831
|
# Confluence List
|
832
832
|
#
|
833
|
+
# This endpoint has been deprecated. Use /integrations/items/list instead.
|
834
|
+
#
|
833
835
|
# To begin listing a user's Confluence pages, at least a `data_source_id` of a connected
|
834
836
|
# Confluence account must be specified. This base request returns a list of root pages for
|
835
837
|
# every space the user has access to in a Confluence instance. To traverse further down
|
@@ -853,6 +855,8 @@ module Carbon
|
|
853
855
|
|
854
856
|
# Confluence List
|
855
857
|
#
|
858
|
+
# This endpoint has been deprecated. Use /integrations/items/list instead.
|
859
|
+
#
|
856
860
|
# To begin listing a user's Confluence pages, at least a `data_source_id` of a connected
|
857
861
|
# Confluence account must be specified. This base request returns a list of root pages for
|
858
862
|
# every space the user has access to in a Confluence instance. To traverse further down
|
@@ -874,7 +878,7 @@ module Carbon
|
|
874
878
|
end
|
875
879
|
|
876
880
|
# Confluence List
|
877
|
-
# To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
|
881
|
+
# This endpoint has been deprecated. Use /integrations/items/list instead. To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
|
878
882
|
# @param list_request [ListRequest]
|
879
883
|
# @param [Hash] opts the optional parameters
|
880
884
|
# @return [ListResponse]
|
@@ -884,7 +888,7 @@ module Carbon
|
|
884
888
|
end
|
885
889
|
|
886
890
|
# Confluence List
|
887
|
-
# To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
|
891
|
+
# This endpoint has been deprecated. Use /integrations/items/list instead. To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
|
888
892
|
# @param list_request [ListRequest]
|
889
893
|
# @param [Hash] opts the optional parameters
|
890
894
|
# @return [APIResponse] data is ListResponse, status code, headers and response
|
@@ -1617,6 +1621,8 @@ module Carbon
|
|
1617
1621
|
|
1618
1622
|
# Confluence Sync
|
1619
1623
|
#
|
1624
|
+
# This endpoint has been deprecated. Use /integrations/files/sync instead.
|
1625
|
+
#
|
1620
1626
|
# After listing pages in a user's Confluence account, the set of selected page `ids` and the
|
1621
1627
|
# connected account's `data_source_id` can be passed into this endpoint to sync them into
|
1622
1628
|
# Carbon. Additional parameters listed below can be used to associate data to the selected
|
@@ -1640,7 +1646,7 @@ module Carbon
|
|
1640
1646
|
# @param file_sync_config [FileSyncConfigNullable]
|
1641
1647
|
# @param body [SyncFilesRequest]
|
1642
1648
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1643
|
-
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1649
|
+
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1644
1650
|
_body = {}
|
1645
1651
|
_body[:tags] = tags if tags != SENTINEL
|
1646
1652
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1665,6 +1671,8 @@ module Carbon
|
|
1665
1671
|
|
1666
1672
|
# Confluence Sync
|
1667
1673
|
#
|
1674
|
+
# This endpoint has been deprecated. Use /integrations/files/sync instead.
|
1675
|
+
#
|
1668
1676
|
# After listing pages in a user's Confluence account, the set of selected page `ids` and the
|
1669
1677
|
# connected account's `data_source_id` can be passed into this endpoint to sync them into
|
1670
1678
|
# Carbon. Additional parameters listed below can be used to associate data to the selected
|
@@ -1688,7 +1696,7 @@ module Carbon
|
|
1688
1696
|
# @param file_sync_config [FileSyncConfigNullable]
|
1689
1697
|
# @param body [SyncFilesRequest]
|
1690
1698
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1691
|
-
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1699
|
+
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1692
1700
|
_body = {}
|
1693
1701
|
_body[:tags] = tags if tags != SENTINEL
|
1694
1702
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1711,7 +1719,7 @@ module Carbon
|
|
1711
1719
|
end
|
1712
1720
|
|
1713
1721
|
# Confluence Sync
|
1714
|
-
# After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
|
1722
|
+
# This endpoint has been deprecated. Use /integrations/files/sync instead. After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
|
1715
1723
|
# @param sync_files_request [SyncFilesRequest]
|
1716
1724
|
# @param [Hash] opts the optional parameters
|
1717
1725
|
# @return [GenericSuccessResponse]
|
@@ -1721,7 +1729,7 @@ module Carbon
|
|
1721
1729
|
end
|
1722
1730
|
|
1723
1731
|
# Confluence Sync
|
1724
|
-
# After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
|
1732
|
+
# This endpoint has been deprecated. Use /integrations/files/sync instead. After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
|
1725
1733
|
# @param sync_files_request [SyncFilesRequest]
|
1726
1734
|
# @param [Hash] opts the optional parameters
|
1727
1735
|
# @return [APIResponse] data is GenericSuccessResponse, status code, headers and response
|
@@ -1896,7 +1904,7 @@ module Carbon
|
|
1896
1904
|
# @param file_sync_config [FileSyncConfigNullable]
|
1897
1905
|
# @param body [SyncFilesRequest]
|
1898
1906
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1899
|
-
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1907
|
+
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1900
1908
|
_body = {}
|
1901
1909
|
_body[:tags] = tags if tags != SENTINEL
|
1902
1910
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1944,7 +1952,7 @@ module Carbon
|
|
1944
1952
|
# @param file_sync_config [FileSyncConfigNullable]
|
1945
1953
|
# @param body [SyncFilesRequest]
|
1946
1954
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1947
|
-
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1955
|
+
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1948
1956
|
_body = {}
|
1949
1957
|
_body[:tags] = tags if tags != SENTINEL
|
1950
1958
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -436,9 +436,10 @@ module Carbon
|
|
436
436
|
# @param embedding_model [EmbeddingGenerators]
|
437
437
|
# @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
438
438
|
# @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
439
|
+
# @param urls_to_scrape [Array<String>] You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
|
439
440
|
# @param body [SitemapScrapeRequest]
|
440
441
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
441
|
-
def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
|
442
|
+
def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, extra: {})
|
442
443
|
_body = {}
|
443
444
|
_body[:tags] = tags if tags != SENTINEL
|
444
445
|
_body[:url] = url if url != SENTINEL
|
@@ -455,6 +456,7 @@ module Carbon
|
|
455
456
|
_body[:embedding_model] = embedding_model if embedding_model != SENTINEL
|
456
457
|
_body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
|
457
458
|
_body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
|
459
|
+
_body[:urls_to_scrape] = urls_to_scrape if urls_to_scrape != SENTINEL
|
458
460
|
sitemap_scrape_request = _body
|
459
461
|
api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
460
462
|
api_response.data
|
@@ -485,9 +487,10 @@ module Carbon
|
|
485
487
|
# @param embedding_model [EmbeddingGenerators]
|
486
488
|
# @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
487
489
|
# @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
490
|
+
# @param urls_to_scrape [Array<String>] You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
|
488
491
|
# @param body [SitemapScrapeRequest]
|
489
492
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
490
|
-
def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
|
493
|
+
def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, extra: {})
|
491
494
|
_body = {}
|
492
495
|
_body[:tags] = tags if tags != SENTINEL
|
493
496
|
_body[:url] = url if url != SENTINEL
|
@@ -504,6 +507,7 @@ module Carbon
|
|
504
507
|
_body[:embedding_model] = embedding_model if embedding_model != SENTINEL
|
505
508
|
_body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
|
506
509
|
_body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
|
510
|
+
_body[:urls_to_scrape] = urls_to_scrape if urls_to_scrape != SENTINEL
|
507
511
|
sitemap_scrape_request = _body
|
508
512
|
scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
509
513
|
end
|
@@ -61,7 +61,7 @@ module Carbon
|
|
61
61
|
|
62
62
|
attr_accessor :parse_pdf_tables_with_ocr
|
63
63
|
|
64
|
-
# Enable integration's file picker for sources that support it. Supported sources:
|
64
|
+
# Enable integration's file picker for sources that support it. Supported sources: BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
|
65
65
|
attr_accessor :enable_file_picker
|
66
66
|
|
67
67
|
# Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
@@ -279,7 +279,7 @@ module Carbon
|
|
279
279
|
if attributes.key?(:'request_id')
|
280
280
|
self.request_id = attributes[:'request_id']
|
281
281
|
else
|
282
|
-
self.request_id = '
|
282
|
+
self.request_id = 'd29ec177-bbc6-43c7-9b16-0ee340804c99'
|
283
283
|
end
|
284
284
|
|
285
285
|
if attributes.key?(:'use_ocr')
|
@@ -43,6 +43,9 @@ module Carbon
|
|
43
43
|
# URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
44
44
|
attr_accessor :url_paths_to_exclude
|
45
45
|
|
46
|
+
# You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
|
47
|
+
attr_accessor :urls_to_scrape
|
48
|
+
|
46
49
|
# Attribute mapping from ruby-style variable name to JSON key.
|
47
50
|
def self.attribute_map
|
48
51
|
{
|
@@ -60,7 +63,8 @@ module Carbon
|
|
60
63
|
:'css_selectors_to_skip' => :'css_selectors_to_skip',
|
61
64
|
:'embedding_model' => :'embedding_model',
|
62
65
|
:'url_paths_to_include' => :'url_paths_to_include',
|
63
|
-
:'url_paths_to_exclude' => :'url_paths_to_exclude'
|
66
|
+
:'url_paths_to_exclude' => :'url_paths_to_exclude',
|
67
|
+
:'urls_to_scrape' => :'urls_to_scrape'
|
64
68
|
}
|
65
69
|
end
|
66
70
|
|
@@ -86,7 +90,8 @@ module Carbon
|
|
86
90
|
:'css_selectors_to_skip' => :'Array<String>',
|
87
91
|
:'embedding_model' => :'EmbeddingGenerators',
|
88
92
|
:'url_paths_to_include' => :'Array<String>',
|
89
|
-
:'url_paths_to_exclude' => :'Array<String>'
|
93
|
+
:'url_paths_to_exclude' => :'Array<String>',
|
94
|
+
:'urls_to_scrape' => :'Array<String>'
|
90
95
|
}
|
91
96
|
end
|
92
97
|
|
@@ -105,7 +110,8 @@ module Carbon
|
|
105
110
|
:'css_classes_to_skip',
|
106
111
|
:'css_selectors_to_skip',
|
107
112
|
:'url_paths_to_include',
|
108
|
-
:'url_paths_to_exclude'
|
113
|
+
:'url_paths_to_exclude',
|
114
|
+
:'urls_to_scrape'
|
109
115
|
])
|
110
116
|
end
|
111
117
|
|
@@ -209,6 +215,12 @@ module Carbon
|
|
209
215
|
self.url_paths_to_exclude = value
|
210
216
|
end
|
211
217
|
end
|
218
|
+
|
219
|
+
if attributes.key?(:'urls_to_scrape')
|
220
|
+
if (value = attributes[:'urls_to_scrape']).is_a?(Array)
|
221
|
+
self.urls_to_scrape = value
|
222
|
+
end
|
223
|
+
end
|
212
224
|
end
|
213
225
|
|
214
226
|
# Show invalid properties with the reasons. Usually used together with valid?
|
@@ -293,7 +305,8 @@ module Carbon
|
|
293
305
|
css_selectors_to_skip == o.css_selectors_to_skip &&
|
294
306
|
embedding_model == o.embedding_model &&
|
295
307
|
url_paths_to_include == o.url_paths_to_include &&
|
296
|
-
url_paths_to_exclude == o.url_paths_to_exclude
|
308
|
+
url_paths_to_exclude == o.url_paths_to_exclude &&
|
309
|
+
urls_to_scrape == o.urls_to_scrape
|
297
310
|
end
|
298
311
|
|
299
312
|
# @see the `==` method
|
@@ -305,7 +318,7 @@ module Carbon
|
|
305
318
|
# Calculates hash code according to all attributes.
|
306
319
|
# @return [Integer] Hash code
|
307
320
|
def hash
|
308
|
-
[tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude].hash
|
321
|
+
[tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude, urls_to_scrape].hash
|
309
322
|
end
|
310
323
|
|
311
324
|
# Builds the object from hash
|
@@ -187,7 +187,7 @@ module Carbon
|
|
187
187
|
if attributes.key?(:'request_id')
|
188
188
|
self.request_id = attributes[:'request_id']
|
189
189
|
else
|
190
|
-
self.request_id = '
|
190
|
+
self.request_id = 'ca60b474-8b43-4b44-9deb-adb701e40610'
|
191
191
|
end
|
192
192
|
|
193
193
|
if attributes.key?(:'use_ocr')
|
@@ -182,7 +182,7 @@ module Carbon
|
|
182
182
|
if attributes.key?(:'request_id')
|
183
183
|
self.request_id = attributes[:'request_id']
|
184
184
|
else
|
185
|
-
self.request_id = '
|
185
|
+
self.request_id = '07b02a24-9429-4a3c-aa98-27ff63503082'
|
186
186
|
end
|
187
187
|
|
188
188
|
if attributes.key?(:'enable_file_picker')
|
@@ -98,7 +98,7 @@ describe 'IntegrationsApi' do
|
|
98
98
|
|
99
99
|
# unit tests for list_confluence_pages
|
100
100
|
# Confluence List
|
101
|
-
# To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
|
101
|
+
# This endpoint has been deprecated. Use /integrations/items/list instead. To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
|
102
102
|
# @param list_request
|
103
103
|
# @param [Hash] opts the optional parameters
|
104
104
|
# @return [ListResponse]
|
@@ -198,7 +198,7 @@ describe 'IntegrationsApi' do
|
|
198
198
|
|
199
199
|
# unit tests for sync_confluence
|
200
200
|
# Confluence Sync
|
201
|
-
# After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
|
201
|
+
# This endpoint has been deprecated. Use /integrations/files/sync instead. After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
|
202
202
|
# @param sync_files_request
|
203
203
|
# @param [Hash] opts the optional parameters
|
204
204
|
# @return [GenericSuccessResponse]
|
@@ -109,4 +109,10 @@ describe Carbon::SitemapScrapeRequest do
|
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
+
describe 'test attribute "urls_to_scrape"' do
|
113
|
+
it 'should work' do
|
114
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
112
118
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carbon_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Konfig
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|