carbon_ruby_sdk 0.2.24 → 0.2.26

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4b61e2385085bb9887386f845b210ba35f30fcb7c4e318827914fdc2d4311a8d
4
- data.tar.gz: c939329f0ef41fcbdc4a10302d6c587665078d563d969660bae0d5af05e9514d
3
+ metadata.gz: c562b3f56aff93a861af81dd88f0793eec36d8d8a9fc82c3f636649795dd50e5
4
+ data.tar.gz: 2f13ed904f9cfac8e64a28e3334e6436c062b579b729abeea7c66bf983245d94
5
5
  SHA512:
6
- metadata.gz: e109bbf4cdf4b20423a509411a212b0e1b9ce246573a56cb24776f04642dae068ca3710c2748f798ddcc8aa976a18ebb92250ce4afe797efdd31efb128de6926
7
- data.tar.gz: d98be3101307c2805cbeb8a8b26454f57d32fa0b40cb580902fb23c58ee0c4973e332081d1ad499ea1f8d0858545784f8c2ab98d87a45fd2500c902f0164e800
6
+ metadata.gz: 343fc3b835634fb8b21c4673067e63ef7b7912a4ea5afea96bdbefd924e82281a0f70ef283bed37e0ab535b1b1b0a6292aa0da93a1a93bd22770a25bc775f741
7
+ data.tar.gz: 95ad01e04435a5513e248d35cdd560c2635c604d8e70aa46cd173853afd211b120722e3d9e12138f349a28e0572c7287f24a72fd14a25a7c9ad3560240f33702
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- carbon_ruby_sdk (0.2.24)
4
+ carbon_ruby_sdk (0.2.26)
5
5
  faraday (>= 1.0.1, < 3.0)
6
6
  faraday-multipart (~> 1.0, >= 1.0.4)
7
7
 
@@ -52,7 +52,7 @@ GEM
52
52
  rspec-mocks (~> 3.13.0)
53
53
  rspec-core (3.13.0)
54
54
  rspec-support (~> 3.13.0)
55
- rspec-expectations (3.13.1)
55
+ rspec-expectations (3.13.2)
56
56
  diff-lcs (>= 1.2.0, < 2.0)
57
57
  rspec-support (~> 3.13.0)
58
58
  rspec-mocks (3.13.1)
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  Connect external data to LLMs, no matter the source.
8
8
 
9
- [![npm](https://img.shields.io/badge/gem-v0.2.24-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.24)
9
+ [![npm](https://img.shields.io/badge/gem-v0.2.26-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.26)
10
10
 
11
11
  </div>
12
12
 
@@ -93,7 +93,7 @@ Connect external data to LLMs, no matter the source.
93
93
  Add to Gemfile:
94
94
 
95
95
  ```ruby
96
- gem 'carbon_ruby_sdk', '~> 0.2.24'
96
+ gem 'carbon_ruby_sdk', '~> 0.2.26'
97
97
  ```
98
98
 
99
99
  ## Getting Started<a id="getting-started"></a>
@@ -1114,7 +1114,7 @@ of all possible query parameters:
1114
1114
  - `skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings
1115
1115
  - `set_page_as_boundary`: described above
1116
1116
  - `embedding_model`: the model used to generate embeddings for the document chunks
1117
- - `use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks (only valid for PDFs currently)
1117
+ - `use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs
1118
1118
  - `generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search.
1119
1119
  - `prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text
1120
1120
 
@@ -1178,8 +1178,8 @@ description route description for more information.
1178
1178
  Embedding model that will be used to embed file chunks.
1179
1179
 
1180
1180
  ##### use_ocr: `Boolean`<a id="use_ocr-boolean"></a>
1181
- Whether or not to use OCR when processing files. Only valid for PDFs. Useful for
1182
- documents with tables, images, and/or scanned text.
1181
+ Whether or not to use OCR when processing files. Valid for PDFs, JPEGs, and
1182
+ PNGs. Useful for documents with tables, images, and/or scanned text.
1183
1183
 
1184
1184
  ##### generate_sparse_vectors: `Boolean`<a id="generate_sparse_vectors-boolean"></a>
1185
1185
  Whether or not to generate sparse vectors for the file. This is *required* for
@@ -1617,7 +1617,7 @@ success state.
1617
1617
 
1618
1618
  ```ruby
1619
1619
  result = carbon.integrations.get_oauth_url(
1620
- service: "GOOGLE_CLOUD_STORAGE",
1620
+ service: "BOX",
1621
1621
  tags: None,
1622
1622
  scope: "string_example",
1623
1623
  chunk_size: 1500,
@@ -1657,7 +1657,7 @@ p result
1657
1657
 
1658
1658
  #### ⚙️ Parameters<a id="⚙️-parameters"></a>
1659
1659
 
1660
- ##### service: [`DataSourceType`](./lib/carbon_ruby_sdk/models/data_source_type.rb)<a id="service-datasourcetypelibcarbon_ruby_sdkmodelsdata_source_typerb"></a>
1660
+ ##### service: [`ExternalDataSourceType`](./lib/carbon_ruby_sdk/models/external_data_source_type.rb)<a id="service-externaldatasourcetypelibcarbon_ruby_sdkmodelsexternal_data_source_typerb"></a>
1661
1661
  ##### tags: `Object`<a id="tags-object"></a>
1662
1662
  ##### scope: `String`<a id="scope-string"></a>
1663
1663
  ##### chunk_size: `Integer`<a id="chunk_size-integer"></a>
@@ -1695,7 +1695,7 @@ This request id will be added to all files that get synced using the generated
1695
1695
  OAuth URL
1696
1696
 
1697
1697
  ##### use_ocr: `Boolean`<a id="use_ocr-boolean"></a>
1698
- Enable OCR for files that support it. Supported formats: png, jpg, pdf
1698
+ Enable OCR for files that support it. Supported formats: pdf, png, jpg
1699
1699
 
1700
1700
  ##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
1701
1701
  ##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
@@ -1341,7 +1341,7 @@ module Carbon
1341
1341
  # - `skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings
1342
1342
  # - `set_page_as_boundary`: described above
1343
1343
  # - `embedding_model`: the model used to generate embeddings for the document chunks
1344
- # - `use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks (only valid for PDFs currently)
1344
+ # - `use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs
1345
1345
  # - `generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search.
1346
1346
  # - `prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text
1347
1347
  #
@@ -1363,7 +1363,7 @@ module Carbon
1363
1363
  # @param skip_embedding_generation [Boolean] Flag to control whether or not embeddings should be generated and stored when processing file.
1364
1364
  # @param set_page_as_boundary [Boolean] Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.
1365
1365
  # @param embedding_model [EmbeddingModel] Embedding model that will be used to embed file chunks.
1366
- # @param use_ocr [Boolean] Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.
1366
+ # @param use_ocr [Boolean] Whether or not to use OCR when processing files. Valid for PDFs, JPEGs, and PNGs. Useful for documents with tables, images, and/or scanned text.
1367
1367
  # @param generate_sparse_vectors [Boolean] Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search.
1368
1368
  # @param prepend_filename_to_chunks [Boolean] Whether or not to prepend the file's name to chunks.
1369
1369
  # @param max_items_per_chunk [Integer] Number of objects per chunk. For csv, tsv, xlsx, and json files only.
@@ -1414,7 +1414,7 @@ module Carbon
1414
1414
  # - `skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings
1415
1415
  # - `set_page_as_boundary`: described above
1416
1416
  # - `embedding_model`: the model used to generate embeddings for the document chunks
1417
- # - `use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks (only valid for PDFs currently)
1417
+ # - `use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs
1418
1418
  # - `generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search.
1419
1419
  # - `prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text
1420
1420
  #
@@ -1436,7 +1436,7 @@ module Carbon
1436
1436
  # @param skip_embedding_generation [Boolean] Flag to control whether or not embeddings should be generated and stored when processing file.
1437
1437
  # @param set_page_as_boundary [Boolean] Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.
1438
1438
  # @param embedding_model [EmbeddingModel] Embedding model that will be used to embed file chunks.
1439
- # @param use_ocr [Boolean] Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.
1439
+ # @param use_ocr [Boolean] Whether or not to use OCR when processing files. Valid for PDFs, JPEGs, and PNGs. Useful for documents with tables, images, and/or scanned text.
1440
1440
  # @param generate_sparse_vectors [Boolean] Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search.
1441
1441
  # @param prepend_filename_to_chunks [Boolean] Whether or not to prepend the file's name to chunks.
1442
1442
  # @param max_items_per_chunk [Integer] Number of objects per chunk. For csv, tsv, xlsx, and json files only.
@@ -1475,7 +1475,7 @@ module Carbon
1475
1475
  end
1476
1476
 
1477
1477
  # Create Upload File
1478
- # This endpoint is used to directly upload local files to Carbon. The `POST` request should be a multipart form request. Note that the `set_page_as_boundary` query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters: - `chunk_size`: the chunk size (in tokens) applied when splitting the document - `chunk_overlap`: the chunk overlap (in tokens) applied when splitting the document - `skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings - `set_page_as_boundary`: described above - `embedding_model`: the model used to generate embeddings for the document chunks - `use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks (only valid for PDFs currently) - `generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search. - `prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI's multimodal model; for text, we support OpenAI's `text-embedding-ada-002` and Cohere's embed-multilingual-v3.0. The model can be specified via the `embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.
1478
+ # This endpoint is used to directly upload local files to Carbon. The `POST` request should be a multipart form request. Note that the `set_page_as_boundary` query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters: - `chunk_size`: the chunk size (in tokens) applied when splitting the document - `chunk_overlap`: the chunk overlap (in tokens) applied when splitting the document - `skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings - `set_page_as_boundary`: described above - `embedding_model`: the model used to generate embeddings for the document chunks - `use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs - `generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search. - `prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI's multimodal model; for text, we support OpenAI's `text-embedding-ada-002` and Cohere's embed-multilingual-v3.0. The model can be specified via the `embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.
1479
1479
  # @param file [File]
1480
1480
  # @param body_create_upload_file_uploadfile_post [BodyCreateUploadFileUploadfilePost]
1481
1481
  # @param [Hash] opts the optional parameters
@@ -1484,7 +1484,7 @@ module Carbon
1484
1484
  # @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file. (default to false)
1485
1485
  # @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information. (default to false)
1486
1486
  # @option opts [EmbeddingModel] :embedding_model Embedding model that will be used to embed file chunks. (default to 'OPENAI')
1487
- # @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text. (default to false)
1487
+ # @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Valid for PDFs, JPEGs, and PNGs. Useful for documents with tables, images, and/or scanned text. (default to false)
1488
1488
  # @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search. (default to false)
1489
1489
  # @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file's name to chunks. (default to false)
1490
1490
  # @option opts [Integer] :max_items_per_chunk Number of objects per chunk. For csv, tsv, xlsx, and json files only.
@@ -1503,7 +1503,7 @@ module Carbon
1503
1503
  end
1504
1504
 
1505
1505
  # Create Upload File
1506
- # This endpoint is used to directly upload local files to Carbon. The &#x60;POST&#x60; request should be a multipart form request. Note that the &#x60;set_page_as_boundary&#x60; query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters: - &#x60;chunk_size&#x60;: the chunk size (in tokens) applied when splitting the document - &#x60;chunk_overlap&#x60;: the chunk overlap (in tokens) applied when splitting the document - &#x60;skip_embedding_generation&#x60;: whether or not to skip the generation of chunks and embeddings - &#x60;set_page_as_boundary&#x60;: described above - &#x60;embedding_model&#x60;: the model used to generate embeddings for the document chunks - &#x60;use_ocr&#x60;: whether or not to use OCR as a preprocessing step prior to generating chunks (only valid for PDFs currently) - &#x60;generate_sparse_vectors&#x60;: whether or not to generate sparse vectors for the file. Required for hybrid search. - &#x60;prepend_filename_to_chunks&#x60;: whether or not to prepend the filename to the chunk text Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI&#39;s multimodal model; for text, we support OpenAI&#39;s &#x60;text-embedding-ada-002&#x60; and Cohere&#39;s embed-multilingual-v3.0. The model can be specified via the &#x60;embedding_model&#x60; parameter (in the POST body for &#x60;/embeddings&#x60;, and a query parameter in &#x60;/uploadfile&#x60;). If no model is supplied, the &#x60;text-embedding-ada-002&#x60; is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with &#x60;OPENAI&#x60;, and files C and D have embeddings generated with &#x60;COHERE_MULTILINGUAL_V3&#x60;, then by default, queries will only consider files A and B. If &#x60;COHERE_MULTILINGUAL_V3&#x60; is specified as the &#x60;embedding_model&#x60; in &#x60;/embeddings&#x60;, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set &#x60;VERTEX_MULTIMODAL&#x60; as an &#x60;embedding_model&#x60;. This model is used automatically by Carbon when it detects an image file.
1506
+ # This endpoint is used to directly upload local files to Carbon. The &#x60;POST&#x60; request should be a multipart form request. Note that the &#x60;set_page_as_boundary&#x60; query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters: - &#x60;chunk_size&#x60;: the chunk size (in tokens) applied when splitting the document - &#x60;chunk_overlap&#x60;: the chunk overlap (in tokens) applied when splitting the document - &#x60;skip_embedding_generation&#x60;: whether or not to skip the generation of chunks and embeddings - &#x60;set_page_as_boundary&#x60;: described above - &#x60;embedding_model&#x60;: the model used to generate embeddings for the document chunks - &#x60;use_ocr&#x60;: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs - &#x60;generate_sparse_vectors&#x60;: whether or not to generate sparse vectors for the file. Required for hybrid search. - &#x60;prepend_filename_to_chunks&#x60;: whether or not to prepend the filename to the chunk text Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI&#39;s multimodal model; for text, we support OpenAI&#39;s &#x60;text-embedding-ada-002&#x60; and Cohere&#39;s embed-multilingual-v3.0. The model can be specified via the &#x60;embedding_model&#x60; parameter (in the POST body for &#x60;/embeddings&#x60;, and a query parameter in &#x60;/uploadfile&#x60;). If no model is supplied, the &#x60;text-embedding-ada-002&#x60; is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with &#x60;OPENAI&#x60;, and files C and D have embeddings generated with &#x60;COHERE_MULTILINGUAL_V3&#x60;, then by default, queries will only consider files A and B. If &#x60;COHERE_MULTILINGUAL_V3&#x60; is specified as the &#x60;embedding_model&#x60; in &#x60;/embeddings&#x60;, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set &#x60;VERTEX_MULTIMODAL&#x60; as an &#x60;embedding_model&#x60;. This model is used automatically by Carbon when it detects an image file.
1507
1507
  # @param file [File]
1508
1508
  # @param body_create_upload_file_uploadfile_post [BodyCreateUploadFileUploadfilePost]
1509
1509
  # @param [Hash] opts the optional parameters
@@ -1512,7 +1512,7 @@ module Carbon
1512
1512
  # @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file. (default to false)
1513
1513
  # @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information. (default to false)
1514
1514
  # @option opts [EmbeddingModel] :embedding_model Embedding model that will be used to embed file chunks. (default to 'OPENAI')
1515
- # @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text. (default to false)
1515
+ # @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Valid for PDFs, JPEGs, and PNGs. Useful for documents with tables, images, and/or scanned text. (default to false)
1516
1516
  # @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search. (default to false)
1517
1517
  # @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file's name to chunks. (default to false)
1518
1518
  # @option opts [Integer] :max_items_per_chunk Number of objects per chunk. For csv, tsv, xlsx, and json files only.
@@ -641,7 +641,7 @@ module Carbon
641
641
  # - A file syncing URL which skips the OAuth flow if the user already has a valid access token and takes them to the
642
642
  # success state.
643
643
  #
644
- # @param service [DataSourceType]
644
+ # @param service [ExternalDataSourceType]
645
645
  # @param tags [Object]
646
646
  # @param scope [String]
647
647
  # @param chunk_size [Integer]
@@ -661,7 +661,7 @@ module Carbon
661
661
  # @param data_source_id [Integer] Used to specify a data source to sync from if you have multiple connected. It can be skipped if you only have one data source of that type connected or are connecting a new account.
662
662
  # @param connecting_new_account [Boolean] Used to connect a new data source. If not specified, we will attempt to create a sync URL for an existing data source based on type and ID.
663
663
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
664
- # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: png, jpg, pdf
664
+ # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf, png, jpg
665
665
  # @param parse_pdf_tables_with_ocr [Boolean]
666
666
  # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: BOX, DROPBOX, GOOGLE_DRIVE, ONEDRIVE, SHAREPOINT
667
667
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
@@ -711,7 +711,7 @@ module Carbon
711
711
  # - A file syncing URL which skips the OAuth flow if the user already has a valid access token and takes them to the
712
712
  # success state.
713
713
  #
714
- # @param service [DataSourceType]
714
+ # @param service [ExternalDataSourceType]
715
715
  # @param tags [Object]
716
716
  # @param scope [String]
717
717
  # @param chunk_size [Integer]
@@ -731,7 +731,7 @@ module Carbon
731
731
  # @param data_source_id [Integer] Used to specify a data source to sync from if you have multiple connected. It can be skipped if you only have one data source of that type connected or are connecting a new account.
732
732
  # @param connecting_new_account [Boolean] Used to connect a new data source. If not specified, we will attempt to create a sync URL for an existing data source based on type and ID.
733
733
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
734
- # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: png, jpg, pdf
734
+ # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf, png, jpg
735
735
  # @param parse_pdf_tables_with_ocr [Boolean]
736
736
  # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: BOX, DROPBOX, GOOGLE_DRIVE, ONEDRIVE, SHAREPOINT
737
737
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
@@ -0,0 +1,49 @@
1
+ =begin
2
+ #Carbon
3
+
4
+ #Connect external data to LLMs, no matter the source.
5
+
6
+ The version of the OpenAPI document: 1.0.0
7
+ =end
8
+
9
+ require 'date'
10
+ require 'time'
11
+
12
+ module Carbon
13
+ class ExternalDataSourceType
14
+ BOX = "BOX".freeze
15
+ CONFLUENCE = "CONFLUENCE".freeze
16
+ DROPBOX = "DROPBOX".freeze
17
+ GMAIL = "GMAIL".freeze
18
+ GOOGLE_DRIVE = "GOOGLE_DRIVE".freeze
19
+ GOOGLE_CLOUD_STORAGE = "GOOGLE_CLOUD_STORAGE".freeze
20
+ INTERCOM = "INTERCOM".freeze
21
+ NOTION = "NOTION".freeze
22
+ ONEDRIVE = "ONEDRIVE".freeze
23
+ OUTLOOK = "OUTLOOK".freeze
24
+ SALESFORCE = "SALESFORCE".freeze
25
+ SHAREPOINT = "SHAREPOINT".freeze
26
+ SLACK = "SLACK".freeze
27
+ ZENDESK = "ZENDESK".freeze
28
+ ZOTERO = "ZOTERO".freeze
29
+
30
+ def self.all_vars
31
+ @all_vars ||= [BOX, CONFLUENCE, DROPBOX, GMAIL, GOOGLE_DRIVE, GOOGLE_CLOUD_STORAGE, INTERCOM, NOTION, ONEDRIVE, OUTLOOK, SALESFORCE, SHAREPOINT, SLACK, ZENDESK, ZOTERO].freeze
32
+ end
33
+
34
+ # Builds the enum from string
35
+ # @param [String] The enum value in the form of the string
36
+ # @return [String] The enum value
37
+ def self.build_from_hash(value)
38
+ new.build_from_hash(value)
39
+ end
40
+
41
+ # Builds the enum from string
42
+ # @param [String] The enum value in the form of the string
43
+ # @return [String] The enum value
44
+ def build_from_hash(value)
45
+ return value if ExternalDataSourceType.all_vars.include?(value)
46
+ raise "Invalid ENUM value #{value} for class #ExternalDataSourceType"
47
+ end
48
+ end
49
+ end
@@ -56,7 +56,7 @@ module Carbon
56
56
  # This request id will be added to all files that get synced using the generated OAuth URL
57
57
  attr_accessor :request_id
58
58
 
59
- # Enable OCR for files that support it. Supported formats: png, jpg, pdf
59
+ # Enable OCR for files that support it. Supported formats: pdf, png, jpg
60
60
  attr_accessor :use_ocr
61
61
 
62
62
  attr_accessor :parse_pdf_tables_with_ocr
@@ -118,7 +118,7 @@ module Carbon
118
118
  {
119
119
  :'tags' => :'Object',
120
120
  :'scope' => :'String',
121
- :'service' => :'DataSourceType',
121
+ :'service' => :'ExternalDataSourceType',
122
122
  :'chunk_size' => :'Integer',
123
123
  :'chunk_overlap' => :'Integer',
124
124
  :'skip_embedding_generation' => :'Boolean',
@@ -7,5 +7,5 @@ The version of the OpenAPI document: 1.0.0
7
7
  =end
8
8
 
9
9
  module Carbon
10
- VERSION = '0.2.24'
10
+ VERSION = '0.2.26'
11
11
  end
@@ -50,6 +50,7 @@ require 'carbon_ruby_sdk/models/embeddings_and_chunks_order_by_columns'
50
50
  require 'carbon_ruby_sdk/models/embeddings_and_chunks_query_input'
51
51
  require 'carbon_ruby_sdk/models/embeddings_and_chunks_query_input_v2'
52
52
  require 'carbon_ruby_sdk/models/embeddings_and_chunks_response'
53
+ require 'carbon_ruby_sdk/models/external_data_source_type'
53
54
  require 'carbon_ruby_sdk/models/external_file_sync_statuses'
54
55
  require 'carbon_ruby_sdk/models/external_source_item'
55
56
  require 'carbon_ruby_sdk/models/external_source_items_order_by'
@@ -165,7 +165,7 @@ describe 'FilesApi' do
165
165
 
166
166
  # unit tests for upload
167
167
  # Create Upload File
168
- # This endpoint is used to directly upload local files to Carbon. The &#x60;POST&#x60; request should be a multipart form request. Note that the &#x60;set_page_as_boundary&#x60; query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters: - &#x60;chunk_size&#x60;: the chunk size (in tokens) applied when splitting the document - &#x60;chunk_overlap&#x60;: the chunk overlap (in tokens) applied when splitting the document - &#x60;skip_embedding_generation&#x60;: whether or not to skip the generation of chunks and embeddings - &#x60;set_page_as_boundary&#x60;: described above - &#x60;embedding_model&#x60;: the model used to generate embeddings for the document chunks - &#x60;use_ocr&#x60;: whether or not to use OCR as a preprocessing step prior to generating chunks (only valid for PDFs currently) - &#x60;generate_sparse_vectors&#x60;: whether or not to generate sparse vectors for the file. Required for hybrid search. - &#x60;prepend_filename_to_chunks&#x60;: whether or not to prepend the filename to the chunk text Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI&#39;s multimodal model; for text, we support OpenAI&#39;s &#x60;text-embedding-ada-002&#x60; and Cohere&#39;s embed-multilingual-v3.0. The model can be specified via the &#x60;embedding_model&#x60; parameter (in the POST body for &#x60;/embeddings&#x60;, and a query parameter in &#x60;/uploadfile&#x60;). If no model is supplied, the &#x60;text-embedding-ada-002&#x60; is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with &#x60;OPENAI&#x60;, and files C and D have embeddings generated with &#x60;COHERE_MULTILINGUAL_V3&#x60;, then by default, queries will only consider files A and B. If &#x60;COHERE_MULTILINGUAL_V3&#x60; is specified as the &#x60;embedding_model&#x60; in &#x60;/embeddings&#x60;, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set &#x60;VERTEX_MULTIMODAL&#x60; as an &#x60;embedding_model&#x60;. This model is used automatically by Carbon when it detects an image file.
168
+ # This endpoint is used to directly upload local files to Carbon. The &#x60;POST&#x60; request should be a multipart form request. Note that the &#x60;set_page_as_boundary&#x60; query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters: - &#x60;chunk_size&#x60;: the chunk size (in tokens) applied when splitting the document - &#x60;chunk_overlap&#x60;: the chunk overlap (in tokens) applied when splitting the document - &#x60;skip_embedding_generation&#x60;: whether or not to skip the generation of chunks and embeddings - &#x60;set_page_as_boundary&#x60;: described above - &#x60;embedding_model&#x60;: the model used to generate embeddings for the document chunks - &#x60;use_ocr&#x60;: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs - &#x60;generate_sparse_vectors&#x60;: whether or not to generate sparse vectors for the file. Required for hybrid search. - &#x60;prepend_filename_to_chunks&#x60;: whether or not to prepend the filename to the chunk text Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI&#39;s multimodal model; for text, we support OpenAI&#39;s &#x60;text-embedding-ada-002&#x60; and Cohere&#39;s embed-multilingual-v3.0. The model can be specified via the &#x60;embedding_model&#x60; parameter (in the POST body for &#x60;/embeddings&#x60;, and a query parameter in &#x60;/uploadfile&#x60;). If no model is supplied, the &#x60;text-embedding-ada-002&#x60; is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with &#x60;OPENAI&#x60;, and files C and D have embeddings generated with &#x60;COHERE_MULTILINGUAL_V3&#x60;, then by default, queries will only consider files A and B. If &#x60;COHERE_MULTILINGUAL_V3&#x60; is specified as the &#x60;embedding_model&#x60; in &#x60;/embeddings&#x60;, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set &#x60;VERTEX_MULTIMODAL&#x60; as an &#x60;embedding_model&#x60;. This model is used automatically by Carbon when it detects an image file.
169
169
  # @param file
170
170
  # @param body_create_upload_file_uploadfile_post
171
171
  # @param [Hash] opts the optional parameters
@@ -174,7 +174,7 @@ describe 'FilesApi' do
174
174
  # @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file.
175
175
  # @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page&#39;s worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.
176
176
  # @option opts [EmbeddingModel] :embedding_model Embedding model that will be used to embed file chunks.
177
- # @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.
177
+ # @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Valid for PDFs, JPEGs, and PNGs. Useful for documents with tables, images, and/or scanned text.
178
178
  # @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search.
179
179
  # @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file&#39;s name to chunks.
180
180
  # @option opts [Integer] :max_items_per_chunk Number of objects per chunk. For csv, tsv, xlsx, and json files only.
@@ -0,0 +1,22 @@
1
+ =begin
2
+ #Carbon
3
+
4
+ #Connect external data to LLMs, no matter the source.
5
+
6
+ The version of the OpenAPI document: 1.0.0
7
+ =end
8
+
9
+ require 'spec_helper'
10
+ require 'json'
11
+ require 'date'
12
+
13
+ # Unit tests for Carbon::ExternalDataSourceType
14
+ describe Carbon::ExternalDataSourceType do
15
+ let(:instance) { Carbon::ExternalDataSourceType.new }
16
+
17
+ describe 'test an instance of ExternalDataSourceType' do
18
+ it 'should create an instance of ExternalDataSourceType' do
19
+ expect(instance).to be_instance_of(Carbon::ExternalDataSourceType)
20
+ end
21
+ end
22
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carbon_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.24
4
+ version: 0.2.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Konfig
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-20 00:00:00.000000000 Z
11
+ date: 2024-08-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -132,6 +132,7 @@ files:
132
132
  - lib/carbon_ruby_sdk/models/embeddings_and_chunks_query_input.rb
133
133
  - lib/carbon_ruby_sdk/models/embeddings_and_chunks_query_input_v2.rb
134
134
  - lib/carbon_ruby_sdk/models/embeddings_and_chunks_response.rb
135
+ - lib/carbon_ruby_sdk/models/external_data_source_type.rb
135
136
  - lib/carbon_ruby_sdk/models/external_file_sync_statuses.rb
136
137
  - lib/carbon_ruby_sdk/models/external_source_item.rb
137
138
  - lib/carbon_ruby_sdk/models/external_source_items_order_by.rb
@@ -305,6 +306,7 @@ files:
305
306
  - spec/models/embeddings_and_chunks_query_input_spec.rb
306
307
  - spec/models/embeddings_and_chunks_query_input_v2_spec.rb
307
308
  - spec/models/embeddings_and_chunks_response_spec.rb
309
+ - spec/models/external_data_source_type_spec.rb
308
310
  - spec/models/external_file_sync_statuses_spec.rb
309
311
  - spec/models/external_source_item_spec.rb
310
312
  - spec/models/external_source_items_order_by_spec.rb
@@ -600,6 +602,7 @@ test_files:
600
602
  - spec/models/list_users_request_spec.rb
601
603
  - spec/models/gitbook_sync_request_spec.rb
602
604
  - spec/models/http_validation_error_spec.rb
605
+ - spec/models/external_data_source_type_spec.rb
603
606
  - spec/models/embedding_and_chunk_spec.rb
604
607
  - spec/models/embedding_properties_spec.rb
605
608
  - spec/models/file_sync_config_nullable_spec.rb