carbon_ruby_sdk 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +9 -9
- data/lib/carbon_ruby_sdk/api/files_api.rb +6 -6
- data/lib/carbon_ruby_sdk/api/integrations_api.rb +8 -8
- data/lib/carbon_ruby_sdk/models/embedding_model.rb +227 -0
- data/lib/carbon_ruby_sdk/models/o_auth_url_request.rb +2 -2
- data/lib/carbon_ruby_sdk/models/sync_files_request.rb +1 -1
- data/lib/carbon_ruby_sdk/models/sync_options.rb +1 -1
- data/lib/carbon_ruby_sdk/version.rb +1 -1
- data/lib/carbon_ruby_sdk.rb +1 -0
- data/spec/api/files_api_spec.rb +1 -1
- data/spec/models/embedding_model_spec.rb +22 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ba0bb9e85c323c3cdaaba5026f74ee0e128f476efacc76ea319f744f9d04dd7
|
4
|
+
data.tar.gz: 171a3185783d7efa732ce6b4edb3af8bf1558659804df6f4cc91acec4df9b9b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 373c0a7f539b131c2fc2249d87c78d112dc85da61c89b632febec99663c4a87df4c31bd69886567252d662364dceb06e43d435c709e7bfabfbb6cac5ddbfeaa9
|
7
|
+
data.tar.gz: 4b6d6fd5fc75e0b5a3e89e3a7b48bb6c7ff34e8a22fdcb2e17569d4d5216d7e042b98d5fcfc2fbd0ef88ee1f336443da4a5439d50376d962037dd1698a560559
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
Connect external data to LLMs, no matter the source.
|
8
8
|
|
9
|
-
[](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.1)
|
10
10
|
|
11
11
|
</div>
|
12
12
|
|
@@ -85,7 +85,7 @@ Connect external data to LLMs, no matter the source.
|
|
85
85
|
Add to Gemfile:
|
86
86
|
|
87
87
|
```ruby
|
88
|
-
gem 'carbon_ruby_sdk', '~> 0.2.
|
88
|
+
gem 'carbon_ruby_sdk', '~> 0.2.1'
|
89
89
|
```
|
90
90
|
|
91
91
|
## Getting Started<a id="getting-started"></a>
|
@@ -996,7 +996,7 @@ result = carbon.files.upload(
|
|
996
996
|
chunk_overlap: 1,
|
997
997
|
skip_embedding_generation: false,
|
998
998
|
set_page_as_boundary: false,
|
999
|
-
embedding_model: "
|
999
|
+
embedding_model: "string_example",
|
1000
1000
|
use_ocr: false,
|
1001
1001
|
generate_sparse_vectors: false,
|
1002
1002
|
prepend_filename_to_chunks: false,
|
@@ -1026,7 +1026,7 @@ Flag to control whether or not to set the a page's worth of content as the
|
|
1026
1026
|
maximum amount of content that can appear in a chunk. Only valid for PDFs. See
|
1027
1027
|
description route description for more information.
|
1028
1028
|
|
1029
|
-
##### embedding_model: [`
|
1029
|
+
##### embedding_model: [`EmbeddingModel`](./lib/carbon_ruby_sdk/models/embedding_model.rb)<a id="embedding_model-embeddingmodellibcarbon_ruby_sdkmodelsembedding_modelrb"></a>
|
1030
1030
|
Embedding model that will be used to embed file chunks.
|
1031
1031
|
|
1032
1032
|
##### use_ocr: `Boolean`<a id="use_ocr-boolean"></a>
|
@@ -1214,7 +1214,7 @@ result = carbon.integrations.connect_data_source(
|
|
1214
1214
|
"prepend_filename_to_chunks" => false,
|
1215
1215
|
"sync_files_on_connection" => true,
|
1216
1216
|
"set_page_as_boundary" => false,
|
1217
|
-
"request_id" => "
|
1217
|
+
"request_id" => "fceb0182-329c-4e45-953b-885c747cf4a3",
|
1218
1218
|
"enable_file_picker" => true,
|
1219
1219
|
"sync_source_items" => true,
|
1220
1220
|
"incremental_sync" => false,
|
@@ -1433,7 +1433,7 @@ result = carbon.integrations.get_oauth_url(
|
|
1433
1433
|
set_page_as_boundary: false,
|
1434
1434
|
data_source_id: 1,
|
1435
1435
|
connecting_new_account: false,
|
1436
|
-
request_id: "
|
1436
|
+
request_id: "ce1b1ec8-be64-491c-9159-c40f85fa0073",
|
1437
1437
|
use_ocr: false,
|
1438
1438
|
parse_pdf_tables_with_ocr: false,
|
1439
1439
|
enable_file_picker: true,
|
@@ -1493,7 +1493,7 @@ Enable OCR for files that support it. Supported formats: pdf
|
|
1493
1493
|
##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
|
1494
1494
|
##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
|
1495
1495
|
Enable integration's file picker for sources that support it. Supported sources:
|
1496
|
-
|
1496
|
+
DROPBOX, SHAREPOINT, ONEDRIVE, BOX, GOOGLE_DRIVE
|
1497
1497
|
|
1498
1498
|
##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
|
1499
1499
|
Enabling this flag will fetch all available content from the source to be listed
|
@@ -1755,7 +1755,7 @@ result = carbon.integrations.sync_confluence(
|
|
1755
1755
|
prepend_filename_to_chunks: false,
|
1756
1756
|
max_items_per_chunk: 1,
|
1757
1757
|
set_page_as_boundary: false,
|
1758
|
-
request_id: "
|
1758
|
+
request_id: "9fe9190e-384f-4baa-a416-d51ed93d1be7",
|
1759
1759
|
use_ocr: false,
|
1760
1760
|
parse_pdf_tables_with_ocr: false,
|
1761
1761
|
incremental_sync: false,
|
@@ -1858,7 +1858,7 @@ result = carbon.integrations.sync_files(
|
|
1858
1858
|
prepend_filename_to_chunks: false,
|
1859
1859
|
max_items_per_chunk: 1,
|
1860
1860
|
set_page_as_boundary: false,
|
1861
|
-
request_id: "
|
1861
|
+
request_id: "9fe9190e-384f-4baa-a416-d51ed93d1be7",
|
1862
1862
|
use_ocr: false,
|
1863
1863
|
parse_pdf_tables_with_ocr: false,
|
1864
1864
|
incremental_sync: false,
|
@@ -1166,7 +1166,7 @@ module Carbon
|
|
1166
1166
|
# @param chunk_overlap [Integer] Chunk overlap in tiktoken tokens to be used when processing file.
|
1167
1167
|
# @param skip_embedding_generation [Boolean] Flag to control whether or not embeddings should be generated and stored when processing file.
|
1168
1168
|
# @param set_page_as_boundary [Boolean] Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.
|
1169
|
-
# @param embedding_model [
|
1169
|
+
# @param embedding_model [EmbeddingModel] Embedding model that will be used to embed file chunks.
|
1170
1170
|
# @param use_ocr [Boolean] Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.
|
1171
1171
|
# @param generate_sparse_vectors [Boolean] Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search.
|
1172
1172
|
# @param prepend_filename_to_chunks [Boolean] Whether or not to prepend the file's name to chunks.
|
@@ -1176,7 +1176,7 @@ module Carbon
|
|
1176
1176
|
# @param media_type [FileContentTypesNullable] The media type of the file. If not provided, it will be inferred from the file extension.
|
1177
1177
|
# @param body [BodyCreateUploadFileUploadfilePost]
|
1178
1178
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1179
|
-
def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model:
|
1179
|
+
def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: 'TEXT', extra: {})
|
1180
1180
|
_body = {}
|
1181
1181
|
_body[:file] = file if file != SENTINEL
|
1182
1182
|
body_create_upload_file_uploadfile_post = _body
|
@@ -1229,7 +1229,7 @@ module Carbon
|
|
1229
1229
|
# @param chunk_overlap [Integer] Chunk overlap in tiktoken tokens to be used when processing file.
|
1230
1230
|
# @param skip_embedding_generation [Boolean] Flag to control whether or not embeddings should be generated and stored when processing file.
|
1231
1231
|
# @param set_page_as_boundary [Boolean] Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.
|
1232
|
-
# @param embedding_model [
|
1232
|
+
# @param embedding_model [EmbeddingModel] Embedding model that will be used to embed file chunks.
|
1233
1233
|
# @param use_ocr [Boolean] Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.
|
1234
1234
|
# @param generate_sparse_vectors [Boolean] Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search.
|
1235
1235
|
# @param prepend_filename_to_chunks [Boolean] Whether or not to prepend the file's name to chunks.
|
@@ -1239,7 +1239,7 @@ module Carbon
|
|
1239
1239
|
# @param media_type [FileContentTypesNullable] The media type of the file. If not provided, it will be inferred from the file extension.
|
1240
1240
|
# @param body [BodyCreateUploadFileUploadfilePost]
|
1241
1241
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1242
|
-
def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model:
|
1242
|
+
def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: 'TEXT', extra: {})
|
1243
1243
|
_body = {}
|
1244
1244
|
_body[:file] = file if file != SENTINEL
|
1245
1245
|
body_create_upload_file_uploadfile_post = _body
|
@@ -1267,7 +1267,7 @@ module Carbon
|
|
1267
1267
|
# @option opts [Integer] :chunk_overlap Chunk overlap in tiktoken tokens to be used when processing file.
|
1268
1268
|
# @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file. (default to false)
|
1269
1269
|
# @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information. (default to false)
|
1270
|
-
# @option opts [
|
1270
|
+
# @option opts [EmbeddingModel] :embedding_model Embedding model that will be used to embed file chunks. (default to 'OPENAI')
|
1271
1271
|
# @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text. (default to false)
|
1272
1272
|
# @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search. (default to false)
|
1273
1273
|
# @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file's name to chunks. (default to false)
|
@@ -1290,7 +1290,7 @@ module Carbon
|
|
1290
1290
|
# @option opts [Integer] :chunk_overlap Chunk overlap in tiktoken tokens to be used when processing file.
|
1291
1291
|
# @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file. (default to false)
|
1292
1292
|
# @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information. (default to false)
|
1293
|
-
# @option opts [
|
1293
|
+
# @option opts [EmbeddingModel] :embedding_model Embedding model that will be used to embed file chunks. (default to 'OPENAI')
|
1294
1294
|
# @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text. (default to false)
|
1295
1295
|
# @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search. (default to false)
|
1296
1296
|
# @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file's name to chunks. (default to false)
|
@@ -561,13 +561,13 @@ module Carbon
|
|
561
561
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
562
562
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
563
563
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
564
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
564
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, SHAREPOINT, ONEDRIVE, BOX, GOOGLE_DRIVE
|
565
565
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
566
566
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
|
567
567
|
# @param file_sync_config [FileSyncConfigNullable]
|
568
568
|
# @param body [OAuthURLRequest]
|
569
569
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
570
|
-
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
570
|
+
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'ce1b1ec8-be64-491c-9159-c40f85fa0073', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
571
571
|
_body = {}
|
572
572
|
_body[:tags] = tags if tags != SENTINEL
|
573
573
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -629,13 +629,13 @@ module Carbon
|
|
629
629
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
630
630
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
631
631
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
632
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
632
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, SHAREPOINT, ONEDRIVE, BOX, GOOGLE_DRIVE
|
633
633
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
634
634
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
|
635
635
|
# @param file_sync_config [FileSyncConfigNullable]
|
636
636
|
# @param body [OAuthURLRequest]
|
637
637
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
638
|
-
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
638
|
+
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'ce1b1ec8-be64-491c-9159-c40f85fa0073', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
639
639
|
_body = {}
|
640
640
|
_body[:tags] = tags if tags != SENTINEL
|
641
641
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -1431,7 +1431,7 @@ module Carbon
|
|
1431
1431
|
# @param file_sync_config [FileSyncConfigNullable]
|
1432
1432
|
# @param body [SyncFilesRequest]
|
1433
1433
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1434
|
-
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1434
|
+
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '9fe9190e-384f-4baa-a416-d51ed93d1be7', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1435
1435
|
_body = {}
|
1436
1436
|
_body[:tags] = tags if tags != SENTINEL
|
1437
1437
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1479,7 +1479,7 @@ module Carbon
|
|
1479
1479
|
# @param file_sync_config [FileSyncConfigNullable]
|
1480
1480
|
# @param body [SyncFilesRequest]
|
1481
1481
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1482
|
-
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1482
|
+
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '9fe9190e-384f-4baa-a416-d51ed93d1be7', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1483
1483
|
_body = {}
|
1484
1484
|
_body[:tags] = tags if tags != SENTINEL
|
1485
1485
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1687,7 +1687,7 @@ module Carbon
|
|
1687
1687
|
# @param file_sync_config [FileSyncConfigNullable]
|
1688
1688
|
# @param body [SyncFilesRequest]
|
1689
1689
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1690
|
-
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1690
|
+
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '9fe9190e-384f-4baa-a416-d51ed93d1be7', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1691
1691
|
_body = {}
|
1692
1692
|
_body[:tags] = tags if tags != SENTINEL
|
1693
1693
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1735,7 +1735,7 @@ module Carbon
|
|
1735
1735
|
# @param file_sync_config [FileSyncConfigNullable]
|
1736
1736
|
# @param body [SyncFilesRequest]
|
1737
1737
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1738
|
-
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1738
|
+
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '9fe9190e-384f-4baa-a416-d51ed93d1be7', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1739
1739
|
_body = {}
|
1740
1740
|
_body[:tags] = tags if tags != SENTINEL
|
1741
1741
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -0,0 +1,227 @@
|
|
1
|
+
=begin
|
2
|
+
#Carbon
|
3
|
+
|
4
|
+
#Connect external data to LLMs, no matter the source.
|
5
|
+
|
6
|
+
The version of the OpenAPI document: 1.0.0
|
7
|
+
=end
|
8
|
+
|
9
|
+
require 'date'
|
10
|
+
require 'time'
|
11
|
+
|
12
|
+
module Carbon
|
13
|
+
# Embedding model that will be used to embed file chunks.
|
14
|
+
class EmbeddingModel
|
15
|
+
# Attribute mapping from ruby-style variable name to JSON key.
|
16
|
+
def self.attribute_map
|
17
|
+
{
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns all the JSON keys this model knows about
|
22
|
+
def self.acceptable_attributes
|
23
|
+
attribute_map.values
|
24
|
+
end
|
25
|
+
|
26
|
+
# Attribute type mapping.
|
27
|
+
def self.openapi_types
|
28
|
+
{
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
# List of attributes with nullable: true
|
33
|
+
def self.openapi_nullable
|
34
|
+
Set.new([
|
35
|
+
])
|
36
|
+
end
|
37
|
+
|
38
|
+
# List of class defined in anyOf (OpenAPI v3)
|
39
|
+
def self.openapi_any_of
|
40
|
+
[
|
41
|
+
:'String',
|
42
|
+
:'TextEmbeddingGenerators'
|
43
|
+
]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Initializes the object
|
47
|
+
# @param [Hash] attributes Model attributes in the form of hash
|
48
|
+
def initialize(attributes = {})
|
49
|
+
if (!attributes.is_a?(Hash))
|
50
|
+
fail ArgumentError, "The input argument (attributes) must be a hash in `Carbon::EmbeddingModel` initialize method"
|
51
|
+
end
|
52
|
+
|
53
|
+
# check to see if the attribute exists and convert string to symbol for hash key
|
54
|
+
attributes = attributes.each_with_object({}) { |(k, v), h|
|
55
|
+
if (!self.class.attribute_map.key?(k.to_sym))
|
56
|
+
fail ArgumentError, "`#{k}` is not a valid attribute in `Carbon::EmbeddingModel`. Please check the name to make sure it's valid. List of attributes: " + self.class.attribute_map.keys.inspect
|
57
|
+
end
|
58
|
+
h[k.to_sym] = v
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
# Show invalid properties with the reasons. Usually used together with valid?
|
63
|
+
# @return Array for valid properties with the reasons
|
64
|
+
def list_invalid_properties
|
65
|
+
invalid_properties = Array.new
|
66
|
+
invalid_properties
|
67
|
+
end
|
68
|
+
|
69
|
+
# Check to see if the all the properties in the model are valid
|
70
|
+
# @return true if the model is valid
|
71
|
+
def valid?
|
72
|
+
_any_of_found = false
|
73
|
+
self.class.openapi_any_of.each do |_class|
|
74
|
+
_any_of = Carbon.const_get(_class).build_from_hash(self.to_hash)
|
75
|
+
if _any_of.valid?
|
76
|
+
_any_of_found = true
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if !_any_of_found
|
81
|
+
return false
|
82
|
+
end
|
83
|
+
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
# Checks equality by comparing each attribute.
|
88
|
+
# @param [Object] Object to be compared
|
89
|
+
def ==(o)
|
90
|
+
return true if self.equal?(o)
|
91
|
+
self.class == o.class
|
92
|
+
end
|
93
|
+
|
94
|
+
# @see the `==` method
|
95
|
+
# @param [Object] Object to be compared
|
96
|
+
def eql?(o)
|
97
|
+
self == o
|
98
|
+
end
|
99
|
+
|
100
|
+
# Calculates hash code according to all attributes.
|
101
|
+
# @return [Integer] Hash code
|
102
|
+
def hash
|
103
|
+
[].hash
|
104
|
+
end
|
105
|
+
|
106
|
+
# Builds the object from hash
|
107
|
+
# @param [Hash] attributes Model attributes in the form of hash
|
108
|
+
# @return [Object] Returns the model itself
|
109
|
+
def self.build_from_hash(attributes)
|
110
|
+
new.build_from_hash(attributes)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Builds the object from hash
|
114
|
+
# @param [Hash] attributes Model attributes in the form of hash
|
115
|
+
# @return [Object] Returns the model itself
|
116
|
+
def build_from_hash(attributes)
|
117
|
+
return nil unless attributes.is_a?(Hash)
|
118
|
+
attributes = attributes.transform_keys(&:to_sym)
|
119
|
+
self.class.openapi_types.each_pair do |key, type|
|
120
|
+
if attributes[self.class.attribute_map[key]].nil? && self.class.openapi_nullable.include?(key)
|
121
|
+
self.send("#{key}=", nil)
|
122
|
+
elsif type =~ /\AArray<(.*)>/i
|
123
|
+
# check to ensure the input is an array given that the attribute
|
124
|
+
# is documented as an array but the input is not
|
125
|
+
if attributes[self.class.attribute_map[key]].is_a?(Array)
|
126
|
+
self.send("#{key}=", attributes[self.class.attribute_map[key]].map { |v| _deserialize($1, v) })
|
127
|
+
end
|
128
|
+
elsif !attributes[self.class.attribute_map[key]].nil?
|
129
|
+
self.send("#{key}=", _deserialize(type, attributes[self.class.attribute_map[key]]))
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
self
|
134
|
+
end
|
135
|
+
|
136
|
+
# Deserializes the data based on type
|
137
|
+
# @param string type Data type
|
138
|
+
# @param string value Value to be deserialized
|
139
|
+
# @return [Object] Deserialized data
|
140
|
+
def _deserialize(type, value)
|
141
|
+
case type.to_sym
|
142
|
+
when :Time
|
143
|
+
Time.parse(value)
|
144
|
+
when :Date
|
145
|
+
Date.parse(value)
|
146
|
+
when :String
|
147
|
+
value.to_s
|
148
|
+
when :Integer
|
149
|
+
value.to_i
|
150
|
+
when :Float
|
151
|
+
value.to_f
|
152
|
+
when :Boolean
|
153
|
+
if value.to_s =~ /\A(true|t|yes|y|1)\z/i
|
154
|
+
true
|
155
|
+
else
|
156
|
+
false
|
157
|
+
end
|
158
|
+
when :Object
|
159
|
+
# generic object (usually a Hash), return directly
|
160
|
+
value
|
161
|
+
when /\AArray<(?<inner_type>.+)>\z/
|
162
|
+
inner_type = Regexp.last_match[:inner_type]
|
163
|
+
value.map { |v| _deserialize(inner_type, v) }
|
164
|
+
when /\AHash<(?<k_type>.+?), (?<v_type>.+)>\z/
|
165
|
+
k_type = Regexp.last_match[:k_type]
|
166
|
+
v_type = Regexp.last_match[:v_type]
|
167
|
+
{}.tap do |hash|
|
168
|
+
value.each do |k, v|
|
169
|
+
hash[_deserialize(k_type, k)] = _deserialize(v_type, v)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
else # model
|
173
|
+
# models (e.g. Pet) or oneOf
|
174
|
+
klass = Carbon.const_get(type)
|
175
|
+
klass.respond_to?(:openapi_one_of) ? klass.build(value) : klass.build_from_hash(value)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Returns the string representation of the object
|
180
|
+
# @return [String] String presentation of the object
|
181
|
+
def to_s
|
182
|
+
to_hash.to_s
|
183
|
+
end
|
184
|
+
|
185
|
+
# to_body is an alias to to_hash (backward compatibility)
|
186
|
+
# @return [Hash] Returns the object in the form of hash
|
187
|
+
def to_body
|
188
|
+
to_hash
|
189
|
+
end
|
190
|
+
|
191
|
+
# Returns the object in the form of hash
|
192
|
+
# @return [Hash] Returns the object in the form of hash
|
193
|
+
def to_hash
|
194
|
+
hash = {}
|
195
|
+
self.class.attribute_map.each_pair do |attr, param|
|
196
|
+
value = self.send(attr)
|
197
|
+
if value.nil?
|
198
|
+
is_nullable = self.class.openapi_nullable.include?(attr)
|
199
|
+
next if !is_nullable || (is_nullable && !instance_variable_defined?(:"@#{attr}"))
|
200
|
+
end
|
201
|
+
|
202
|
+
hash[param] = _to_hash(value)
|
203
|
+
end
|
204
|
+
hash
|
205
|
+
end
|
206
|
+
|
207
|
+
# Outputs non-array value in the form of hash
|
208
|
+
# For object, use to_hash. Otherwise, just return the value
|
209
|
+
# @param [Object] value Any valid value
|
210
|
+
# @return [Hash] Returns the value in the form of hash
|
211
|
+
def _to_hash(value)
|
212
|
+
if value.is_a?(Array)
|
213
|
+
value.compact.map { |v| _to_hash(v) }
|
214
|
+
elsif value.is_a?(Hash)
|
215
|
+
{}.tap do |hash|
|
216
|
+
value.each { |k, v| hash[k] = _to_hash(v) }
|
217
|
+
end
|
218
|
+
elsif value.respond_to? :to_hash
|
219
|
+
value.to_hash
|
220
|
+
else
|
221
|
+
value
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
@@ -61,7 +61,7 @@ module Carbon
|
|
61
61
|
|
62
62
|
attr_accessor :parse_pdf_tables_with_ocr
|
63
63
|
|
64
|
-
# Enable integration's file picker for sources that support it. Supported sources:
|
64
|
+
# Enable integration's file picker for sources that support it. Supported sources: DROPBOX, SHAREPOINT, ONEDRIVE, BOX, GOOGLE_DRIVE
|
65
65
|
attr_accessor :enable_file_picker
|
66
66
|
|
67
67
|
# Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
@@ -279,7 +279,7 @@ module Carbon
|
|
279
279
|
if attributes.key?(:'request_id')
|
280
280
|
self.request_id = attributes[:'request_id']
|
281
281
|
else
|
282
|
-
self.request_id = '
|
282
|
+
self.request_id = 'ce1b1ec8-be64-491c-9159-c40f85fa0073'
|
283
283
|
end
|
284
284
|
|
285
285
|
if attributes.key?(:'use_ocr')
|
@@ -187,7 +187,7 @@ module Carbon
|
|
187
187
|
if attributes.key?(:'request_id')
|
188
188
|
self.request_id = attributes[:'request_id']
|
189
189
|
else
|
190
|
-
self.request_id = '
|
190
|
+
self.request_id = '9fe9190e-384f-4baa-a416-d51ed93d1be7'
|
191
191
|
end
|
192
192
|
|
193
193
|
if attributes.key?(:'use_ocr')
|
@@ -182,7 +182,7 @@ module Carbon
|
|
182
182
|
if attributes.key?(:'request_id')
|
183
183
|
self.request_id = attributes[:'request_id']
|
184
184
|
else
|
185
|
-
self.request_id = '
|
185
|
+
self.request_id = 'fceb0182-329c-4e45-953b-885c747cf4a3'
|
186
186
|
end
|
187
187
|
|
188
188
|
if attributes.key?(:'enable_file_picker')
|
data/lib/carbon_ruby_sdk.rb
CHANGED
@@ -40,6 +40,7 @@ require 'carbon_ruby_sdk/models/document_response_list'
|
|
40
40
|
require 'carbon_ruby_sdk/models/embedding_and_chunk'
|
41
41
|
require 'carbon_ruby_sdk/models/embedding_generators'
|
42
42
|
require 'carbon_ruby_sdk/models/embedding_generators_nullable'
|
43
|
+
require 'carbon_ruby_sdk/models/embedding_model'
|
43
44
|
require 'carbon_ruby_sdk/models/embedding_properties'
|
44
45
|
require 'carbon_ruby_sdk/models/embeddings_and_chunks_filters'
|
45
46
|
require 'carbon_ruby_sdk/models/embeddings_and_chunks_order_by_columns'
|
data/spec/api/files_api_spec.rb
CHANGED
@@ -151,7 +151,7 @@ describe 'FilesApi' do
|
|
151
151
|
# @option opts [Integer] :chunk_overlap Chunk overlap in tiktoken tokens to be used when processing file.
|
152
152
|
# @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file.
|
153
153
|
# @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.
|
154
|
-
# @option opts [
|
154
|
+
# @option opts [EmbeddingModel] :embedding_model Embedding model that will be used to embed file chunks.
|
155
155
|
# @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.
|
156
156
|
# @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search.
|
157
157
|
# @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file's name to chunks.
|
@@ -0,0 +1,22 @@
|
|
1
|
+
=begin
|
2
|
+
#Carbon
|
3
|
+
|
4
|
+
#Connect external data to LLMs, no matter the source.
|
5
|
+
|
6
|
+
The version of the OpenAPI document: 1.0.0
|
7
|
+
=end
|
8
|
+
|
9
|
+
require 'spec_helper'
|
10
|
+
require 'json'
|
11
|
+
require 'date'
|
12
|
+
|
13
|
+
# Unit tests for Carbon::EmbeddingModel
|
14
|
+
describe Carbon::EmbeddingModel do
|
15
|
+
let(:instance) { Carbon::EmbeddingModel.new }
|
16
|
+
|
17
|
+
describe 'test an instance of EmbeddingModel' do
|
18
|
+
it 'should create an instance of EmbeddingModel' do
|
19
|
+
expect(instance).to be_instance_of(Carbon::EmbeddingModel)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carbon_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Konfig
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -123,6 +123,7 @@ files:
|
|
123
123
|
- lib/carbon_ruby_sdk/models/embedding_and_chunk.rb
|
124
124
|
- lib/carbon_ruby_sdk/models/embedding_generators.rb
|
125
125
|
- lib/carbon_ruby_sdk/models/embedding_generators_nullable.rb
|
126
|
+
- lib/carbon_ruby_sdk/models/embedding_model.rb
|
126
127
|
- lib/carbon_ruby_sdk/models/embedding_properties.rb
|
127
128
|
- lib/carbon_ruby_sdk/models/embeddings_and_chunks_filters.rb
|
128
129
|
- lib/carbon_ruby_sdk/models/embeddings_and_chunks_order_by_columns.rb
|
@@ -269,6 +270,7 @@ files:
|
|
269
270
|
- spec/models/embedding_and_chunk_spec.rb
|
270
271
|
- spec/models/embedding_generators_nullable_spec.rb
|
271
272
|
- spec/models/embedding_generators_spec.rb
|
273
|
+
- spec/models/embedding_model_spec.rb
|
272
274
|
- spec/models/embedding_properties_spec.rb
|
273
275
|
- spec/models/embeddings_and_chunks_filters_spec.rb
|
274
276
|
- spec/models/embeddings_and_chunks_order_by_columns_spec.rb
|
@@ -440,6 +442,7 @@ test_files:
|
|
440
442
|
- spec/models/update_organization_input_spec.rb
|
441
443
|
- spec/models/user_request_content_spec.rb
|
442
444
|
- spec/models/gmail_sync_input_spec.rb
|
445
|
+
- spec/models/embedding_model_spec.rb
|
443
446
|
- spec/models/delete_files_query_input_spec.rb
|
444
447
|
- spec/models/webhook_query_input_spec.rb
|
445
448
|
- spec/models/text_embedding_generators_spec.rb
|