carbon_ruby_sdk 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +9 -9
- data/lib/carbon_ruby_sdk/api/files_api.rb +6 -6
- data/lib/carbon_ruby_sdk/api/integrations_api.rb +8 -8
- data/lib/carbon_ruby_sdk/models/embedding_model.rb +227 -0
- data/lib/carbon_ruby_sdk/models/o_auth_url_request.rb +2 -2
- data/lib/carbon_ruby_sdk/models/sync_files_request.rb +1 -1
- data/lib/carbon_ruby_sdk/models/sync_options.rb +1 -1
- data/lib/carbon_ruby_sdk/version.rb +1 -1
- data/lib/carbon_ruby_sdk.rb +1 -0
- data/spec/api/files_api_spec.rb +1 -1
- data/spec/models/embedding_model_spec.rb +22 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ba0bb9e85c323c3cdaaba5026f74ee0e128f476efacc76ea319f744f9d04dd7
|
4
|
+
data.tar.gz: 171a3185783d7efa732ce6b4edb3af8bf1558659804df6f4cc91acec4df9b9b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 373c0a7f539b131c2fc2249d87c78d112dc85da61c89b632febec99663c4a87df4c31bd69886567252d662364dceb06e43d435c709e7bfabfbb6cac5ddbfeaa9
|
7
|
+
data.tar.gz: 4b6d6fd5fc75e0b5a3e89e3a7b48bb6c7ff34e8a22fdcb2e17569d4d5216d7e042b98d5fcfc2fbd0ef88ee1f336443da4a5439d50376d962037dd1698a560559
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
Connect external data to LLMs, no matter the source.
|
8
8
|
|
9
|
-
[![npm](https://img.shields.io/badge/gem-v0.2.
|
9
|
+
[![npm](https://img.shields.io/badge/gem-v0.2.1-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.1)
|
10
10
|
|
11
11
|
</div>
|
12
12
|
|
@@ -85,7 +85,7 @@ Connect external data to LLMs, no matter the source.
|
|
85
85
|
Add to Gemfile:
|
86
86
|
|
87
87
|
```ruby
|
88
|
-
gem 'carbon_ruby_sdk', '~> 0.2.
|
88
|
+
gem 'carbon_ruby_sdk', '~> 0.2.1'
|
89
89
|
```
|
90
90
|
|
91
91
|
## Getting Started<a id="getting-started"></a>
|
@@ -996,7 +996,7 @@ result = carbon.files.upload(
|
|
996
996
|
chunk_overlap: 1,
|
997
997
|
skip_embedding_generation: false,
|
998
998
|
set_page_as_boundary: false,
|
999
|
-
embedding_model: "
|
999
|
+
embedding_model: "string_example",
|
1000
1000
|
use_ocr: false,
|
1001
1001
|
generate_sparse_vectors: false,
|
1002
1002
|
prepend_filename_to_chunks: false,
|
@@ -1026,7 +1026,7 @@ Flag to control whether or not to set the a page's worth of content as the
|
|
1026
1026
|
maximum amount of content that can appear in a chunk. Only valid for PDFs. See
|
1027
1027
|
description route description for more information.
|
1028
1028
|
|
1029
|
-
##### embedding_model: [`
|
1029
|
+
##### embedding_model: [`EmbeddingModel`](./lib/carbon_ruby_sdk/models/embedding_model.rb)<a id="embedding_model-embeddingmodellibcarbon_ruby_sdkmodelsembedding_modelrb"></a>
|
1030
1030
|
Embedding model that will be used to embed file chunks.
|
1031
1031
|
|
1032
1032
|
##### use_ocr: `Boolean`<a id="use_ocr-boolean"></a>
|
@@ -1214,7 +1214,7 @@ result = carbon.integrations.connect_data_source(
|
|
1214
1214
|
"prepend_filename_to_chunks" => false,
|
1215
1215
|
"sync_files_on_connection" => true,
|
1216
1216
|
"set_page_as_boundary" => false,
|
1217
|
-
"request_id" => "
|
1217
|
+
"request_id" => "fceb0182-329c-4e45-953b-885c747cf4a3",
|
1218
1218
|
"enable_file_picker" => true,
|
1219
1219
|
"sync_source_items" => true,
|
1220
1220
|
"incremental_sync" => false,
|
@@ -1433,7 +1433,7 @@ result = carbon.integrations.get_oauth_url(
|
|
1433
1433
|
set_page_as_boundary: false,
|
1434
1434
|
data_source_id: 1,
|
1435
1435
|
connecting_new_account: false,
|
1436
|
-
request_id: "
|
1436
|
+
request_id: "ce1b1ec8-be64-491c-9159-c40f85fa0073",
|
1437
1437
|
use_ocr: false,
|
1438
1438
|
parse_pdf_tables_with_ocr: false,
|
1439
1439
|
enable_file_picker: true,
|
@@ -1493,7 +1493,7 @@ Enable OCR for files that support it. Supported formats: pdf
|
|
1493
1493
|
##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
|
1494
1494
|
##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
|
1495
1495
|
Enable integration's file picker for sources that support it. Supported sources:
|
1496
|
-
|
1496
|
+
DROPBOX, SHAREPOINT, ONEDRIVE, BOX, GOOGLE_DRIVE
|
1497
1497
|
|
1498
1498
|
##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
|
1499
1499
|
Enabling this flag will fetch all available content from the source to be listed
|
@@ -1755,7 +1755,7 @@ result = carbon.integrations.sync_confluence(
|
|
1755
1755
|
prepend_filename_to_chunks: false,
|
1756
1756
|
max_items_per_chunk: 1,
|
1757
1757
|
set_page_as_boundary: false,
|
1758
|
-
request_id: "
|
1758
|
+
request_id: "9fe9190e-384f-4baa-a416-d51ed93d1be7",
|
1759
1759
|
use_ocr: false,
|
1760
1760
|
parse_pdf_tables_with_ocr: false,
|
1761
1761
|
incremental_sync: false,
|
@@ -1858,7 +1858,7 @@ result = carbon.integrations.sync_files(
|
|
1858
1858
|
prepend_filename_to_chunks: false,
|
1859
1859
|
max_items_per_chunk: 1,
|
1860
1860
|
set_page_as_boundary: false,
|
1861
|
-
request_id: "
|
1861
|
+
request_id: "9fe9190e-384f-4baa-a416-d51ed93d1be7",
|
1862
1862
|
use_ocr: false,
|
1863
1863
|
parse_pdf_tables_with_ocr: false,
|
1864
1864
|
incremental_sync: false,
|
@@ -1166,7 +1166,7 @@ module Carbon
|
|
1166
1166
|
# @param chunk_overlap [Integer] Chunk overlap in tiktoken tokens to be used when processing file.
|
1167
1167
|
# @param skip_embedding_generation [Boolean] Flag to control whether or not embeddings should be generated and stored when processing file.
|
1168
1168
|
# @param set_page_as_boundary [Boolean] Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.
|
1169
|
-
# @param embedding_model [
|
1169
|
+
# @param embedding_model [EmbeddingModel] Embedding model that will be used to embed file chunks.
|
1170
1170
|
# @param use_ocr [Boolean] Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.
|
1171
1171
|
# @param generate_sparse_vectors [Boolean] Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search.
|
1172
1172
|
# @param prepend_filename_to_chunks [Boolean] Whether or not to prepend the file's name to chunks.
|
@@ -1176,7 +1176,7 @@ module Carbon
|
|
1176
1176
|
# @param media_type [FileContentTypesNullable] The media type of the file. If not provided, it will be inferred from the file extension.
|
1177
1177
|
# @param body [BodyCreateUploadFileUploadfilePost]
|
1178
1178
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1179
|
-
def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model:
|
1179
|
+
def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: 'TEXT', extra: {})
|
1180
1180
|
_body = {}
|
1181
1181
|
_body[:file] = file if file != SENTINEL
|
1182
1182
|
body_create_upload_file_uploadfile_post = _body
|
@@ -1229,7 +1229,7 @@ module Carbon
|
|
1229
1229
|
# @param chunk_overlap [Integer] Chunk overlap in tiktoken tokens to be used when processing file.
|
1230
1230
|
# @param skip_embedding_generation [Boolean] Flag to control whether or not embeddings should be generated and stored when processing file.
|
1231
1231
|
# @param set_page_as_boundary [Boolean] Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.
|
1232
|
-
# @param embedding_model [
|
1232
|
+
# @param embedding_model [EmbeddingModel] Embedding model that will be used to embed file chunks.
|
1233
1233
|
# @param use_ocr [Boolean] Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.
|
1234
1234
|
# @param generate_sparse_vectors [Boolean] Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search.
|
1235
1235
|
# @param prepend_filename_to_chunks [Boolean] Whether or not to prepend the file's name to chunks.
|
@@ -1239,7 +1239,7 @@ module Carbon
|
|
1239
1239
|
# @param media_type [FileContentTypesNullable] The media type of the file. If not provided, it will be inferred from the file extension.
|
1240
1240
|
# @param body [BodyCreateUploadFileUploadfilePost]
|
1241
1241
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1242
|
-
def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model:
|
1242
|
+
def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: 'TEXT', extra: {})
|
1243
1243
|
_body = {}
|
1244
1244
|
_body[:file] = file if file != SENTINEL
|
1245
1245
|
body_create_upload_file_uploadfile_post = _body
|
@@ -1267,7 +1267,7 @@ module Carbon
|
|
1267
1267
|
# @option opts [Integer] :chunk_overlap Chunk overlap in tiktoken tokens to be used when processing file.
|
1268
1268
|
# @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file. (default to false)
|
1269
1269
|
# @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information. (default to false)
|
1270
|
-
# @option opts [
|
1270
|
+
# @option opts [EmbeddingModel] :embedding_model Embedding model that will be used to embed file chunks. (default to 'OPENAI')
|
1271
1271
|
# @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text. (default to false)
|
1272
1272
|
# @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search. (default to false)
|
1273
1273
|
# @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file's name to chunks. (default to false)
|
@@ -1290,7 +1290,7 @@ module Carbon
|
|
1290
1290
|
# @option opts [Integer] :chunk_overlap Chunk overlap in tiktoken tokens to be used when processing file.
|
1291
1291
|
# @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file. (default to false)
|
1292
1292
|
# @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information. (default to false)
|
1293
|
-
# @option opts [
|
1293
|
+
# @option opts [EmbeddingModel] :embedding_model Embedding model that will be used to embed file chunks. (default to 'OPENAI')
|
1294
1294
|
# @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text. (default to false)
|
1295
1295
|
# @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search. (default to false)
|
1296
1296
|
# @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file's name to chunks. (default to false)
|
@@ -561,13 +561,13 @@ module Carbon
|
|
561
561
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
562
562
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
563
563
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
564
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
564
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, SHAREPOINT, ONEDRIVE, BOX, GOOGLE_DRIVE
|
565
565
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
566
566
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
|
567
567
|
# @param file_sync_config [FileSyncConfigNullable]
|
568
568
|
# @param body [OAuthURLRequest]
|
569
569
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
570
|
-
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
570
|
+
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'ce1b1ec8-be64-491c-9159-c40f85fa0073', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
571
571
|
_body = {}
|
572
572
|
_body[:tags] = tags if tags != SENTINEL
|
573
573
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -629,13 +629,13 @@ module Carbon
|
|
629
629
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
630
630
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
631
631
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
632
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
632
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, SHAREPOINT, ONEDRIVE, BOX, GOOGLE_DRIVE
|
633
633
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
634
634
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
|
635
635
|
# @param file_sync_config [FileSyncConfigNullable]
|
636
636
|
# @param body [OAuthURLRequest]
|
637
637
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
638
|
-
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
638
|
+
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'ce1b1ec8-be64-491c-9159-c40f85fa0073', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
639
639
|
_body = {}
|
640
640
|
_body[:tags] = tags if tags != SENTINEL
|
641
641
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -1431,7 +1431,7 @@ module Carbon
|
|
1431
1431
|
# @param file_sync_config [FileSyncConfigNullable]
|
1432
1432
|
# @param body [SyncFilesRequest]
|
1433
1433
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1434
|
-
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1434
|
+
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '9fe9190e-384f-4baa-a416-d51ed93d1be7', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1435
1435
|
_body = {}
|
1436
1436
|
_body[:tags] = tags if tags != SENTINEL
|
1437
1437
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1479,7 +1479,7 @@ module Carbon
|
|
1479
1479
|
# @param file_sync_config [FileSyncConfigNullable]
|
1480
1480
|
# @param body [SyncFilesRequest]
|
1481
1481
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1482
|
-
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1482
|
+
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '9fe9190e-384f-4baa-a416-d51ed93d1be7', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1483
1483
|
_body = {}
|
1484
1484
|
_body[:tags] = tags if tags != SENTINEL
|
1485
1485
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1687,7 +1687,7 @@ module Carbon
|
|
1687
1687
|
# @param file_sync_config [FileSyncConfigNullable]
|
1688
1688
|
# @param body [SyncFilesRequest]
|
1689
1689
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1690
|
-
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1690
|
+
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '9fe9190e-384f-4baa-a416-d51ed93d1be7', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1691
1691
|
_body = {}
|
1692
1692
|
_body[:tags] = tags if tags != SENTINEL
|
1693
1693
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1735,7 +1735,7 @@ module Carbon
|
|
1735
1735
|
# @param file_sync_config [FileSyncConfigNullable]
|
1736
1736
|
# @param body [SyncFilesRequest]
|
1737
1737
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1738
|
-
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1738
|
+
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '9fe9190e-384f-4baa-a416-d51ed93d1be7', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1739
1739
|
_body = {}
|
1740
1740
|
_body[:tags] = tags if tags != SENTINEL
|
1741
1741
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -0,0 +1,227 @@
|
|
1
|
+
=begin
|
2
|
+
#Carbon
|
3
|
+
|
4
|
+
#Connect external data to LLMs, no matter the source.
|
5
|
+
|
6
|
+
The version of the OpenAPI document: 1.0.0
|
7
|
+
=end
|
8
|
+
|
9
|
+
require 'date'
|
10
|
+
require 'time'
|
11
|
+
|
12
|
+
module Carbon
|
13
|
+
# Embedding model that will be used to embed file chunks.
|
14
|
+
class EmbeddingModel
|
15
|
+
# Attribute mapping from ruby-style variable name to JSON key.
|
16
|
+
def self.attribute_map
|
17
|
+
{
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns all the JSON keys this model knows about
|
22
|
+
def self.acceptable_attributes
|
23
|
+
attribute_map.values
|
24
|
+
end
|
25
|
+
|
26
|
+
# Attribute type mapping.
|
27
|
+
def self.openapi_types
|
28
|
+
{
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
# List of attributes with nullable: true
|
33
|
+
def self.openapi_nullable
|
34
|
+
Set.new([
|
35
|
+
])
|
36
|
+
end
|
37
|
+
|
38
|
+
# List of class defined in anyOf (OpenAPI v3)
|
39
|
+
def self.openapi_any_of
|
40
|
+
[
|
41
|
+
:'String',
|
42
|
+
:'TextEmbeddingGenerators'
|
43
|
+
]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Initializes the object
|
47
|
+
# @param [Hash] attributes Model attributes in the form of hash
|
48
|
+
def initialize(attributes = {})
|
49
|
+
if (!attributes.is_a?(Hash))
|
50
|
+
fail ArgumentError, "The input argument (attributes) must be a hash in `Carbon::EmbeddingModel` initialize method"
|
51
|
+
end
|
52
|
+
|
53
|
+
# check to see if the attribute exists and convert string to symbol for hash key
|
54
|
+
attributes = attributes.each_with_object({}) { |(k, v), h|
|
55
|
+
if (!self.class.attribute_map.key?(k.to_sym))
|
56
|
+
fail ArgumentError, "`#{k}` is not a valid attribute in `Carbon::EmbeddingModel`. Please check the name to make sure it's valid. List of attributes: " + self.class.attribute_map.keys.inspect
|
57
|
+
end
|
58
|
+
h[k.to_sym] = v
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
# Show invalid properties with the reasons. Usually used together with valid?
|
63
|
+
# @return Array for valid properties with the reasons
|
64
|
+
def list_invalid_properties
|
65
|
+
invalid_properties = Array.new
|
66
|
+
invalid_properties
|
67
|
+
end
|
68
|
+
|
69
|
+
# Check to see if the all the properties in the model are valid
|
70
|
+
# @return true if the model is valid
|
71
|
+
def valid?
|
72
|
+
_any_of_found = false
|
73
|
+
self.class.openapi_any_of.each do |_class|
|
74
|
+
_any_of = Carbon.const_get(_class).build_from_hash(self.to_hash)
|
75
|
+
if _any_of.valid?
|
76
|
+
_any_of_found = true
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if !_any_of_found
|
81
|
+
return false
|
82
|
+
end
|
83
|
+
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
# Checks equality by comparing each attribute.
|
88
|
+
# @param [Object] Object to be compared
|
89
|
+
def ==(o)
|
90
|
+
return true if self.equal?(o)
|
91
|
+
self.class == o.class
|
92
|
+
end
|
93
|
+
|
94
|
+
# @see the `==` method
|
95
|
+
# @param [Object] Object to be compared
|
96
|
+
def eql?(o)
|
97
|
+
self == o
|
98
|
+
end
|
99
|
+
|
100
|
+
# Calculates hash code according to all attributes.
|
101
|
+
# @return [Integer] Hash code
|
102
|
+
def hash
|
103
|
+
[].hash
|
104
|
+
end
|
105
|
+
|
106
|
+
# Builds the object from hash
|
107
|
+
# @param [Hash] attributes Model attributes in the form of hash
|
108
|
+
# @return [Object] Returns the model itself
|
109
|
+
def self.build_from_hash(attributes)
|
110
|
+
new.build_from_hash(attributes)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Builds the object from hash
|
114
|
+
# @param [Hash] attributes Model attributes in the form of hash
|
115
|
+
# @return [Object] Returns the model itself
|
116
|
+
def build_from_hash(attributes)
|
117
|
+
return nil unless attributes.is_a?(Hash)
|
118
|
+
attributes = attributes.transform_keys(&:to_sym)
|
119
|
+
self.class.openapi_types.each_pair do |key, type|
|
120
|
+
if attributes[self.class.attribute_map[key]].nil? && self.class.openapi_nullable.include?(key)
|
121
|
+
self.send("#{key}=", nil)
|
122
|
+
elsif type =~ /\AArray<(.*)>/i
|
123
|
+
# check to ensure the input is an array given that the attribute
|
124
|
+
# is documented as an array but the input is not
|
125
|
+
if attributes[self.class.attribute_map[key]].is_a?(Array)
|
126
|
+
self.send("#{key}=", attributes[self.class.attribute_map[key]].map { |v| _deserialize($1, v) })
|
127
|
+
end
|
128
|
+
elsif !attributes[self.class.attribute_map[key]].nil?
|
129
|
+
self.send("#{key}=", _deserialize(type, attributes[self.class.attribute_map[key]]))
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
self
|
134
|
+
end
|
135
|
+
|
136
|
+
# Deserializes the data based on type
|
137
|
+
# @param string type Data type
|
138
|
+
# @param string value Value to be deserialized
|
139
|
+
# @return [Object] Deserialized data
|
140
|
+
def _deserialize(type, value)
|
141
|
+
case type.to_sym
|
142
|
+
when :Time
|
143
|
+
Time.parse(value)
|
144
|
+
when :Date
|
145
|
+
Date.parse(value)
|
146
|
+
when :String
|
147
|
+
value.to_s
|
148
|
+
when :Integer
|
149
|
+
value.to_i
|
150
|
+
when :Float
|
151
|
+
value.to_f
|
152
|
+
when :Boolean
|
153
|
+
if value.to_s =~ /\A(true|t|yes|y|1)\z/i
|
154
|
+
true
|
155
|
+
else
|
156
|
+
false
|
157
|
+
end
|
158
|
+
when :Object
|
159
|
+
# generic object (usually a Hash), return directly
|
160
|
+
value
|
161
|
+
when /\AArray<(?<inner_type>.+)>\z/
|
162
|
+
inner_type = Regexp.last_match[:inner_type]
|
163
|
+
value.map { |v| _deserialize(inner_type, v) }
|
164
|
+
when /\AHash<(?<k_type>.+?), (?<v_type>.+)>\z/
|
165
|
+
k_type = Regexp.last_match[:k_type]
|
166
|
+
v_type = Regexp.last_match[:v_type]
|
167
|
+
{}.tap do |hash|
|
168
|
+
value.each do |k, v|
|
169
|
+
hash[_deserialize(k_type, k)] = _deserialize(v_type, v)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
else # model
|
173
|
+
# models (e.g. Pet) or oneOf
|
174
|
+
klass = Carbon.const_get(type)
|
175
|
+
klass.respond_to?(:openapi_one_of) ? klass.build(value) : klass.build_from_hash(value)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Returns the string representation of the object
|
180
|
+
# @return [String] String presentation of the object
|
181
|
+
def to_s
|
182
|
+
to_hash.to_s
|
183
|
+
end
|
184
|
+
|
185
|
+
# to_body is an alias to to_hash (backward compatibility)
|
186
|
+
# @return [Hash] Returns the object in the form of hash
|
187
|
+
def to_body
|
188
|
+
to_hash
|
189
|
+
end
|
190
|
+
|
191
|
+
# Returns the object in the form of hash
|
192
|
+
# @return [Hash] Returns the object in the form of hash
|
193
|
+
def to_hash
|
194
|
+
hash = {}
|
195
|
+
self.class.attribute_map.each_pair do |attr, param|
|
196
|
+
value = self.send(attr)
|
197
|
+
if value.nil?
|
198
|
+
is_nullable = self.class.openapi_nullable.include?(attr)
|
199
|
+
next if !is_nullable || (is_nullable && !instance_variable_defined?(:"@#{attr}"))
|
200
|
+
end
|
201
|
+
|
202
|
+
hash[param] = _to_hash(value)
|
203
|
+
end
|
204
|
+
hash
|
205
|
+
end
|
206
|
+
|
207
|
+
# Outputs non-array value in the form of hash
|
208
|
+
# For object, use to_hash. Otherwise, just return the value
|
209
|
+
# @param [Object] value Any valid value
|
210
|
+
# @return [Hash] Returns the value in the form of hash
|
211
|
+
def _to_hash(value)
|
212
|
+
if value.is_a?(Array)
|
213
|
+
value.compact.map { |v| _to_hash(v) }
|
214
|
+
elsif value.is_a?(Hash)
|
215
|
+
{}.tap do |hash|
|
216
|
+
value.each { |k, v| hash[k] = _to_hash(v) }
|
217
|
+
end
|
218
|
+
elsif value.respond_to? :to_hash
|
219
|
+
value.to_hash
|
220
|
+
else
|
221
|
+
value
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
@@ -61,7 +61,7 @@ module Carbon
|
|
61
61
|
|
62
62
|
attr_accessor :parse_pdf_tables_with_ocr
|
63
63
|
|
64
|
-
# Enable integration's file picker for sources that support it. Supported sources:
|
64
|
+
# Enable integration's file picker for sources that support it. Supported sources: DROPBOX, SHAREPOINT, ONEDRIVE, BOX, GOOGLE_DRIVE
|
65
65
|
attr_accessor :enable_file_picker
|
66
66
|
|
67
67
|
# Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
@@ -279,7 +279,7 @@ module Carbon
|
|
279
279
|
if attributes.key?(:'request_id')
|
280
280
|
self.request_id = attributes[:'request_id']
|
281
281
|
else
|
282
|
-
self.request_id = '
|
282
|
+
self.request_id = 'ce1b1ec8-be64-491c-9159-c40f85fa0073'
|
283
283
|
end
|
284
284
|
|
285
285
|
if attributes.key?(:'use_ocr')
|
@@ -187,7 +187,7 @@ module Carbon
|
|
187
187
|
if attributes.key?(:'request_id')
|
188
188
|
self.request_id = attributes[:'request_id']
|
189
189
|
else
|
190
|
-
self.request_id = '
|
190
|
+
self.request_id = '9fe9190e-384f-4baa-a416-d51ed93d1be7'
|
191
191
|
end
|
192
192
|
|
193
193
|
if attributes.key?(:'use_ocr')
|
@@ -182,7 +182,7 @@ module Carbon
|
|
182
182
|
if attributes.key?(:'request_id')
|
183
183
|
self.request_id = attributes[:'request_id']
|
184
184
|
else
|
185
|
-
self.request_id = '
|
185
|
+
self.request_id = 'fceb0182-329c-4e45-953b-885c747cf4a3'
|
186
186
|
end
|
187
187
|
|
188
188
|
if attributes.key?(:'enable_file_picker')
|
data/lib/carbon_ruby_sdk.rb
CHANGED
@@ -40,6 +40,7 @@ require 'carbon_ruby_sdk/models/document_response_list'
|
|
40
40
|
require 'carbon_ruby_sdk/models/embedding_and_chunk'
|
41
41
|
require 'carbon_ruby_sdk/models/embedding_generators'
|
42
42
|
require 'carbon_ruby_sdk/models/embedding_generators_nullable'
|
43
|
+
require 'carbon_ruby_sdk/models/embedding_model'
|
43
44
|
require 'carbon_ruby_sdk/models/embedding_properties'
|
44
45
|
require 'carbon_ruby_sdk/models/embeddings_and_chunks_filters'
|
45
46
|
require 'carbon_ruby_sdk/models/embeddings_and_chunks_order_by_columns'
|
data/spec/api/files_api_spec.rb
CHANGED
@@ -151,7 +151,7 @@ describe 'FilesApi' do
|
|
151
151
|
# @option opts [Integer] :chunk_overlap Chunk overlap in tiktoken tokens to be used when processing file.
|
152
152
|
# @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file.
|
153
153
|
# @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.
|
154
|
-
# @option opts [
|
154
|
+
# @option opts [EmbeddingModel] :embedding_model Embedding model that will be used to embed file chunks.
|
155
155
|
# @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.
|
156
156
|
# @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search.
|
157
157
|
# @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file's name to chunks.
|
@@ -0,0 +1,22 @@
|
|
1
|
+
=begin
|
2
|
+
#Carbon
|
3
|
+
|
4
|
+
#Connect external data to LLMs, no matter the source.
|
5
|
+
|
6
|
+
The version of the OpenAPI document: 1.0.0
|
7
|
+
=end
|
8
|
+
|
9
|
+
require 'spec_helper'
|
10
|
+
require 'json'
|
11
|
+
require 'date'
|
12
|
+
|
13
|
+
# Unit tests for Carbon::EmbeddingModel
|
14
|
+
describe Carbon::EmbeddingModel do
|
15
|
+
let(:instance) { Carbon::EmbeddingModel.new }
|
16
|
+
|
17
|
+
describe 'test an instance of EmbeddingModel' do
|
18
|
+
it 'should create an instance of EmbeddingModel' do
|
19
|
+
expect(instance).to be_instance_of(Carbon::EmbeddingModel)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carbon_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Konfig
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -123,6 +123,7 @@ files:
|
|
123
123
|
- lib/carbon_ruby_sdk/models/embedding_and_chunk.rb
|
124
124
|
- lib/carbon_ruby_sdk/models/embedding_generators.rb
|
125
125
|
- lib/carbon_ruby_sdk/models/embedding_generators_nullable.rb
|
126
|
+
- lib/carbon_ruby_sdk/models/embedding_model.rb
|
126
127
|
- lib/carbon_ruby_sdk/models/embedding_properties.rb
|
127
128
|
- lib/carbon_ruby_sdk/models/embeddings_and_chunks_filters.rb
|
128
129
|
- lib/carbon_ruby_sdk/models/embeddings_and_chunks_order_by_columns.rb
|
@@ -269,6 +270,7 @@ files:
|
|
269
270
|
- spec/models/embedding_and_chunk_spec.rb
|
270
271
|
- spec/models/embedding_generators_nullable_spec.rb
|
271
272
|
- spec/models/embedding_generators_spec.rb
|
273
|
+
- spec/models/embedding_model_spec.rb
|
272
274
|
- spec/models/embedding_properties_spec.rb
|
273
275
|
- spec/models/embeddings_and_chunks_filters_spec.rb
|
274
276
|
- spec/models/embeddings_and_chunks_order_by_columns_spec.rb
|
@@ -440,6 +442,7 @@ test_files:
|
|
440
442
|
- spec/models/update_organization_input_spec.rb
|
441
443
|
- spec/models/user_request_content_spec.rb
|
442
444
|
- spec/models/gmail_sync_input_spec.rb
|
445
|
+
- spec/models/embedding_model_spec.rb
|
443
446
|
- spec/models/delete_files_query_input_spec.rb
|
444
447
|
- spec/models/webhook_query_input_spec.rb
|
445
448
|
- spec/models/text_embedding_generators_spec.rb
|