orca-sdk 0.0.96__py3-none-any.whl → 0.0.98__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/__init__.py +2 -5
- orca_sdk/_shared/__init__.py +1 -0
- orca_sdk/_shared/metrics.py +1 -1
- orca_sdk/_utils/analysis_ui.py +5 -5
- orca_sdk/_utils/auth.py +23 -33
- orca_sdk/_utils/pagination.py +126 -0
- orca_sdk/_utils/pagination_test.py +132 -0
- orca_sdk/classification_model.py +188 -126
- orca_sdk/classification_model_test.py +102 -0
- orca_sdk/client.py +3515 -0
- orca_sdk/conftest.py +10 -0
- orca_sdk/credentials.py +73 -21
- orca_sdk/credentials_test.py +20 -0
- orca_sdk/datasource.py +186 -81
- orca_sdk/datasource_test.py +194 -0
- orca_sdk/embedding_model.py +267 -75
- orca_sdk/embedding_model_test.py +32 -14
- orca_sdk/job.py +59 -54
- orca_sdk/job_test.py +50 -0
- orca_sdk/memoryset.py +372 -345
- orca_sdk/memoryset_test.py +7 -11
- orca_sdk/regression_model.py +120 -111
- orca_sdk/regression_model_test.py +15 -0
- orca_sdk/telemetry.py +229 -115
- {orca_sdk-0.0.96.dist-info → orca_sdk-0.0.98.dist-info}/METADATA +19 -5
- orca_sdk-0.0.98.dist-info/RECORD +40 -0
- orca_sdk/_generated_api_client/__init__.py +0 -3
- orca_sdk/_generated_api_client/api/__init__.py +0 -287
- orca_sdk/_generated_api_client/api/auth/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/auth/check_authentication_auth_get.py +0 -128
- orca_sdk/_generated_api_client/api/auth/create_api_key_auth_api_key_post.py +0 -170
- orca_sdk/_generated_api_client/api/auth/delete_api_key_auth_api_key_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/auth/delete_org_auth_org_delete.py +0 -130
- orca_sdk/_generated_api_client/api/auth/list_api_keys_auth_api_key_get.py +0 -127
- orca_sdk/_generated_api_client/api/classification_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/classification_model/create_classification_model_gpu_classification_model_post.py +0 -170
- orca_sdk/_generated_api_client/api/classification_model/delete_classification_model_classification_model_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/classification_model/delete_classification_model_evaluation_classification_model_model_name_or_id_evaluation_task_id_delete.py +0 -168
- orca_sdk/_generated_api_client/api/classification_model/evaluate_classification_model_classification_model_model_name_or_id_evaluation_post.py +0 -183
- orca_sdk/_generated_api_client/api/classification_model/get_classification_model_classification_model_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/classification_model/get_classification_model_evaluation_classification_model_model_name_or_id_evaluation_task_id_get.py +0 -170
- orca_sdk/_generated_api_client/api/classification_model/list_classification_model_evaluations_classification_model_model_name_or_id_evaluation_get.py +0 -161
- orca_sdk/_generated_api_client/api/classification_model/list_classification_models_classification_model_get.py +0 -127
- orca_sdk/_generated_api_client/api/classification_model/predict_label_gpu_classification_model_name_or_id_prediction_post.py +0 -190
- orca_sdk/_generated_api_client/api/classification_model/update_classification_model_classification_model_name_or_id_patch.py +0 -183
- orca_sdk/_generated_api_client/api/datasource/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/datasource/create_datasource_datasource_post.py +0 -167
- orca_sdk/_generated_api_client/api/datasource/create_embedding_evaluation_datasource_name_or_id_embedding_evaluation_post.py +0 -183
- orca_sdk/_generated_api_client/api/datasource/delete_datasource_datasource_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/datasource/download_datasource_datasource_name_or_id_download_get.py +0 -172
- orca_sdk/_generated_api_client/api/datasource/get_datasource_datasource_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/datasource/get_embedding_evaluation_datasource_name_or_id_embedding_evaluation_task_id_get.py +0 -169
- orca_sdk/_generated_api_client/api/datasource/list_datasources_datasource_get.py +0 -127
- orca_sdk/_generated_api_client/api/datasource/list_embedding_evaluations_datasource_name_or_id_embedding_evaluation_get.py +0 -235
- orca_sdk/_generated_api_client/api/default/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/default/healthcheck_get.py +0 -118
- orca_sdk/_generated_api_client/api/default/healthcheck_gpu_get.py +0 -118
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/create_finetuned_embedding_model_finetuned_embedding_model_post.py +0 -168
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/delete_finetuned_embedding_model_finetuned_embedding_model_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/embed_with_finetuned_model_gpu_finetuned_embedding_model_name_or_id_embedding_post.py +0 -189
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/get_finetuned_embedding_model_finetuned_embedding_model_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/list_finetuned_embedding_models_finetuned_embedding_model_get.py +0 -127
- orca_sdk/_generated_api_client/api/memoryset/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/memoryset/analyze_memoryset_memoryset_name_or_id_analysis_post.py +0 -183
- orca_sdk/_generated_api_client/api/memoryset/batch_delete_memoryset_batch_delete_memoryset_post.py +0 -168
- orca_sdk/_generated_api_client/api/memoryset/clone_memoryset_memoryset_name_or_id_clone_post.py +0 -181
- orca_sdk/_generated_api_client/api/memoryset/create_memoryset_memoryset_post.py +0 -168
- orca_sdk/_generated_api_client/api/memoryset/delete_memories_memoryset_name_or_id_memories_delete_post.py +0 -181
- orca_sdk/_generated_api_client/api/memoryset/delete_memory_memoryset_name_or_id_memory_memory_id_delete.py +0 -167
- orca_sdk/_generated_api_client/api/memoryset/delete_memoryset_memoryset_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/memoryset/get_analysis_memoryset_name_or_id_analysis_analysis_task_id_get.py +0 -169
- orca_sdk/_generated_api_client/api/memoryset/get_memories_memoryset_name_or_id_memories_get_post.py +0 -210
- orca_sdk/_generated_api_client/api/memoryset/get_memory_memoryset_name_or_id_memory_memory_id_get.py +0 -186
- orca_sdk/_generated_api_client/api/memoryset/get_memoryset_memoryset_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/memoryset/insert_memories_gpu_memoryset_name_or_id_memory_post.py +0 -188
- orca_sdk/_generated_api_client/api/memoryset/list_analyses_memoryset_name_or_id_analysis_get.py +0 -235
- orca_sdk/_generated_api_client/api/memoryset/list_memorysets_memoryset_get.py +0 -180
- orca_sdk/_generated_api_client/api/memoryset/memoryset_lookup_gpu_memoryset_name_or_id_lookup_post.py +0 -212
- orca_sdk/_generated_api_client/api/memoryset/potential_duplicate_groups_memoryset_name_or_id_potential_duplicate_groups_get.py +0 -195
- orca_sdk/_generated_api_client/api/memoryset/query_memoryset_memoryset_name_or_id_memories_post.py +0 -210
- orca_sdk/_generated_api_client/api/memoryset/suggest_cascading_edits_memoryset_name_or_id_memory_memory_id_cascading_edits_post.py +0 -233
- orca_sdk/_generated_api_client/api/memoryset/update_memories_gpu_memoryset_name_or_id_memories_patch.py +0 -216
- orca_sdk/_generated_api_client/api/memoryset/update_memory_gpu_memoryset_name_or_id_memory_patch.py +0 -205
- orca_sdk/_generated_api_client/api/memoryset/update_memoryset_memoryset_name_or_id_patch.py +0 -183
- orca_sdk/_generated_api_client/api/predictive_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/predictive_model/list_predictive_models_predictive_model_get.py +0 -150
- orca_sdk/_generated_api_client/api/pretrained_embedding_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/pretrained_embedding_model/embed_with_pretrained_model_gpu_pretrained_embedding_model_model_name_embedding_post.py +0 -192
- orca_sdk/_generated_api_client/api/pretrained_embedding_model/get_pretrained_embedding_model_pretrained_embedding_model_model_name_get.py +0 -161
- orca_sdk/_generated_api_client/api/pretrained_embedding_model/list_pretrained_embedding_models_pretrained_embedding_model_get.py +0 -127
- orca_sdk/_generated_api_client/api/regression_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/regression_model/create_regression_model_gpu_regression_model_post.py +0 -170
- orca_sdk/_generated_api_client/api/regression_model/delete_regression_model_evaluation_regression_model_model_name_or_id_evaluation_task_id_delete.py +0 -168
- orca_sdk/_generated_api_client/api/regression_model/delete_regression_model_regression_model_name_or_id_delete.py +0 -154
- orca_sdk/_generated_api_client/api/regression_model/evaluate_regression_model_regression_model_model_name_or_id_evaluation_post.py +0 -183
- orca_sdk/_generated_api_client/api/regression_model/get_regression_model_evaluation_regression_model_model_name_or_id_evaluation_task_id_get.py +0 -170
- orca_sdk/_generated_api_client/api/regression_model/get_regression_model_regression_model_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/regression_model/list_regression_model_evaluations_regression_model_model_name_or_id_evaluation_get.py +0 -161
- orca_sdk/_generated_api_client/api/regression_model/list_regression_models_regression_model_get.py +0 -127
- orca_sdk/_generated_api_client/api/regression_model/predict_score_gpu_regression_model_name_or_id_prediction_post.py +0 -190
- orca_sdk/_generated_api_client/api/regression_model/update_regression_model_regression_model_name_or_id_patch.py +0 -183
- orca_sdk/_generated_api_client/api/task/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/task/abort_task_task_task_id_abort_delete.py +0 -154
- orca_sdk/_generated_api_client/api/task/get_task_status_task_task_id_status_get.py +0 -156
- orca_sdk/_generated_api_client/api/task/get_task_task_task_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/task/list_tasks_task_get.py +0 -293
- orca_sdk/_generated_api_client/api/telemetry/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/telemetry/count_predictions_telemetry_prediction_count_post.py +0 -168
- orca_sdk/_generated_api_client/api/telemetry/drop_feedback_category_with_data_telemetry_feedback_category_name_or_id_delete.py +0 -162
- orca_sdk/_generated_api_client/api/telemetry/explain_prediction_telemetry_prediction_prediction_id_explanation_get.py +0 -182
- orca_sdk/_generated_api_client/api/telemetry/get_feedback_category_telemetry_feedback_category_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/telemetry/get_prediction_telemetry_prediction_prediction_id_get.py +0 -180
- orca_sdk/_generated_api_client/api/telemetry/list_feedback_categories_telemetry_feedback_category_get.py +0 -127
- orca_sdk/_generated_api_client/api/telemetry/list_memories_with_feedback_telemetry_memories_post.py +0 -198
- orca_sdk/_generated_api_client/api/telemetry/list_predictions_telemetry_prediction_post.py +0 -198
- orca_sdk/_generated_api_client/api/telemetry/record_prediction_feedback_telemetry_prediction_feedback_put.py +0 -171
- orca_sdk/_generated_api_client/api/telemetry/update_prediction_telemetry_prediction_prediction_id_patch.py +0 -181
- orca_sdk/_generated_api_client/client.py +0 -216
- orca_sdk/_generated_api_client/errors.py +0 -38
- orca_sdk/_generated_api_client/models/__init__.py +0 -295
- orca_sdk/_generated_api_client/models/analyze_neighbor_labels_result.py +0 -116
- orca_sdk/_generated_api_client/models/api_key_metadata.py +0 -137
- orca_sdk/_generated_api_client/models/api_key_metadata_scope_item.py +0 -9
- orca_sdk/_generated_api_client/models/base_label_prediction_result.py +0 -130
- orca_sdk/_generated_api_client/models/base_model.py +0 -55
- orca_sdk/_generated_api_client/models/base_score_prediction_result.py +0 -108
- orca_sdk/_generated_api_client/models/body_create_datasource_datasource_post.py +0 -207
- orca_sdk/_generated_api_client/models/cascade_edit_suggestions_request.py +0 -154
- orca_sdk/_generated_api_client/models/cascading_edit_suggestion.py +0 -92
- orca_sdk/_generated_api_client/models/classification_evaluation_request.py +0 -148
- orca_sdk/_generated_api_client/models/classification_metrics.py +0 -259
- orca_sdk/_generated_api_client/models/classification_model_metadata.py +0 -213
- orca_sdk/_generated_api_client/models/classification_prediction_request.py +0 -220
- orca_sdk/_generated_api_client/models/clone_memoryset_request.py +0 -170
- orca_sdk/_generated_api_client/models/cluster_metrics.py +0 -78
- orca_sdk/_generated_api_client/models/column_info.py +0 -145
- orca_sdk/_generated_api_client/models/column_type.py +0 -14
- orca_sdk/_generated_api_client/models/constraint_violation_error_response.py +0 -80
- orca_sdk/_generated_api_client/models/count_predictions_request.py +0 -195
- orca_sdk/_generated_api_client/models/create_api_key_request.py +0 -120
- orca_sdk/_generated_api_client/models/create_api_key_request_scope_item.py +0 -9
- orca_sdk/_generated_api_client/models/create_api_key_response.py +0 -145
- orca_sdk/_generated_api_client/models/create_api_key_response_scope_item.py +0 -9
- orca_sdk/_generated_api_client/models/create_classification_model_request.py +0 -197
- orca_sdk/_generated_api_client/models/create_memoryset_request.py +0 -325
- orca_sdk/_generated_api_client/models/create_memoryset_request_index_params.py +0 -66
- orca_sdk/_generated_api_client/models/create_memoryset_request_index_type.py +0 -13
- orca_sdk/_generated_api_client/models/create_regression_model_request.py +0 -137
- orca_sdk/_generated_api_client/models/datasource_metadata.py +0 -156
- orca_sdk/_generated_api_client/models/delete_memories_request.py +0 -70
- orca_sdk/_generated_api_client/models/delete_memorysets_request.py +0 -70
- orca_sdk/_generated_api_client/models/embed_request.py +0 -135
- orca_sdk/_generated_api_client/models/embedding_evaluation_payload.py +0 -187
- orca_sdk/_generated_api_client/models/embedding_evaluation_request.py +0 -179
- orca_sdk/_generated_api_client/models/embedding_evaluation_response.py +0 -158
- orca_sdk/_generated_api_client/models/embedding_evaluation_result.py +0 -86
- orca_sdk/_generated_api_client/models/embedding_finetuning_method.py +0 -9
- orca_sdk/_generated_api_client/models/embedding_model_result.py +0 -114
- orca_sdk/_generated_api_client/models/evaluation_response.py +0 -153
- orca_sdk/_generated_api_client/models/evaluation_response_classification_metrics.py +0 -140
- orca_sdk/_generated_api_client/models/evaluation_response_regression_metrics.py +0 -140
- orca_sdk/_generated_api_client/models/feedback_metrics.py +0 -85
- orca_sdk/_generated_api_client/models/feedback_type.py +0 -9
- orca_sdk/_generated_api_client/models/filter_item.py +0 -231
- orca_sdk/_generated_api_client/models/filter_item_field_type_0_item.py +0 -17
- orca_sdk/_generated_api_client/models/filter_item_field_type_2_item_type_1.py +0 -20
- orca_sdk/_generated_api_client/models/filter_item_op.py +0 -16
- orca_sdk/_generated_api_client/models/finetune_embedding_model_request.py +0 -259
- orca_sdk/_generated_api_client/models/finetune_embedding_model_request_training_args.py +0 -66
- orca_sdk/_generated_api_client/models/finetuned_embedding_model_metadata.py +0 -166
- orca_sdk/_generated_api_client/models/get_memories_request.py +0 -70
- orca_sdk/_generated_api_client/models/http_validation_error.py +0 -86
- orca_sdk/_generated_api_client/models/internal_server_error_response.py +0 -80
- orca_sdk/_generated_api_client/models/label_class_metrics.py +0 -108
- orca_sdk/_generated_api_client/models/label_prediction_memory_lookup.py +0 -210
- orca_sdk/_generated_api_client/models/label_prediction_memory_lookup_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/label_prediction_with_memories_and_feedback.py +0 -288
- orca_sdk/_generated_api_client/models/labeled_memory.py +0 -186
- orca_sdk/_generated_api_client/models/labeled_memory_insert.py +0 -128
- orca_sdk/_generated_api_client/models/labeled_memory_insert_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_lookup.py +0 -194
- orca_sdk/_generated_api_client/models/labeled_memory_lookup_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_metrics.py +0 -246
- orca_sdk/_generated_api_client/models/labeled_memory_update.py +0 -171
- orca_sdk/_generated_api_client/models/labeled_memory_update_metadata_type_0.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_with_feedback_metrics.py +0 -207
- orca_sdk/_generated_api_client/models/labeled_memory_with_feedback_metrics_feedback_metrics.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_with_feedback_metrics_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/list_memories_request.py +0 -104
- orca_sdk/_generated_api_client/models/list_predictions_request.py +0 -319
- orca_sdk/_generated_api_client/models/lookup_request.py +0 -81
- orca_sdk/_generated_api_client/models/lookup_score_metrics.py +0 -94
- orca_sdk/_generated_api_client/models/memory_metrics.py +0 -165
- orca_sdk/_generated_api_client/models/memory_type.py +0 -9
- orca_sdk/_generated_api_client/models/memoryset_analysis_configs.py +0 -212
- orca_sdk/_generated_api_client/models/memoryset_analysis_request.py +0 -105
- orca_sdk/_generated_api_client/models/memoryset_analysis_response.py +0 -182
- orca_sdk/_generated_api_client/models/memoryset_cluster_analysis_config.py +0 -202
- orca_sdk/_generated_api_client/models/memoryset_cluster_analysis_config_clustering_method.py +0 -9
- orca_sdk/_generated_api_client/models/memoryset_cluster_analysis_config_partitioning_method.py +0 -10
- orca_sdk/_generated_api_client/models/memoryset_cluster_metrics.py +0 -100
- orca_sdk/_generated_api_client/models/memoryset_duplicate_analysis_config.py +0 -70
- orca_sdk/_generated_api_client/models/memoryset_duplicate_metrics.py +0 -70
- orca_sdk/_generated_api_client/models/memoryset_label_analysis_config.py +0 -70
- orca_sdk/_generated_api_client/models/memoryset_label_metrics.py +0 -116
- orca_sdk/_generated_api_client/models/memoryset_metadata.py +0 -291
- orca_sdk/_generated_api_client/models/memoryset_metadata_index_params.py +0 -55
- orca_sdk/_generated_api_client/models/memoryset_metadata_index_type.py +0 -13
- orca_sdk/_generated_api_client/models/memoryset_metrics.py +0 -232
- orca_sdk/_generated_api_client/models/memoryset_neighbor_analysis_config.py +0 -83
- orca_sdk/_generated_api_client/models/memoryset_neighbor_metrics.py +0 -76
- orca_sdk/_generated_api_client/models/memoryset_neighbor_metrics_lookup_score_metrics.py +0 -68
- orca_sdk/_generated_api_client/models/memoryset_projection_analysis_config.py +0 -79
- orca_sdk/_generated_api_client/models/memoryset_projection_metrics.py +0 -55
- orca_sdk/_generated_api_client/models/memoryset_update.py +0 -101
- orca_sdk/_generated_api_client/models/not_found_error_response.py +0 -100
- orca_sdk/_generated_api_client/models/not_found_error_response_resource_type_0.py +0 -22
- orca_sdk/_generated_api_client/models/paginated_union_labeled_memory_with_feedback_metrics_scored_memory_with_feedback_metrics.py +0 -135
- orca_sdk/_generated_api_client/models/pr_curve.py +0 -86
- orca_sdk/_generated_api_client/models/prediction_feedback.py +0 -157
- orca_sdk/_generated_api_client/models/prediction_feedback_category.py +0 -115
- orca_sdk/_generated_api_client/models/prediction_feedback_request.py +0 -122
- orca_sdk/_generated_api_client/models/prediction_feedback_result.py +0 -102
- orca_sdk/_generated_api_client/models/prediction_sort_item_item_type_0.py +0 -10
- orca_sdk/_generated_api_client/models/prediction_sort_item_item_type_1.py +0 -9
- orca_sdk/_generated_api_client/models/predictive_model_update.py +0 -91
- orca_sdk/_generated_api_client/models/pretrained_embedding_model_metadata.py +0 -107
- orca_sdk/_generated_api_client/models/pretrained_embedding_model_name.py +0 -17
- orca_sdk/_generated_api_client/models/rac_head_type.py +0 -11
- orca_sdk/_generated_api_client/models/rar_head_type.py +0 -8
- orca_sdk/_generated_api_client/models/regression_evaluation_request.py +0 -148
- orca_sdk/_generated_api_client/models/regression_metrics.py +0 -172
- orca_sdk/_generated_api_client/models/regression_model_metadata.py +0 -177
- orca_sdk/_generated_api_client/models/regression_prediction_request.py +0 -195
- orca_sdk/_generated_api_client/models/roc_curve.py +0 -86
- orca_sdk/_generated_api_client/models/score_prediction_memory_lookup.py +0 -196
- orca_sdk/_generated_api_client/models/score_prediction_memory_lookup_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/score_prediction_with_memories_and_feedback.py +0 -252
- orca_sdk/_generated_api_client/models/scored_memory.py +0 -172
- orca_sdk/_generated_api_client/models/scored_memory_insert.py +0 -128
- orca_sdk/_generated_api_client/models/scored_memory_insert_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_lookup.py +0 -180
- orca_sdk/_generated_api_client/models/scored_memory_lookup_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_update.py +0 -171
- orca_sdk/_generated_api_client/models/scored_memory_update_metadata_type_0.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics.py +0 -193
- orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics_feedback_metrics.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/service_unavailable_error_response.py +0 -80
- orca_sdk/_generated_api_client/models/task.py +0 -198
- orca_sdk/_generated_api_client/models/task_status.py +0 -14
- orca_sdk/_generated_api_client/models/task_status_info.py +0 -133
- orca_sdk/_generated_api_client/models/telemetry_field_type_0_item_type_2.py +0 -9
- orca_sdk/_generated_api_client/models/telemetry_filter_item.py +0 -205
- orca_sdk/_generated_api_client/models/telemetry_filter_item_op.py +0 -15
- orca_sdk/_generated_api_client/models/telemetry_memories_request.py +0 -181
- orca_sdk/_generated_api_client/models/telemetry_sort_options.py +0 -173
- orca_sdk/_generated_api_client/models/telemetry_sort_options_direction.py +0 -9
- orca_sdk/_generated_api_client/models/unauthenticated_error_response.py +0 -72
- orca_sdk/_generated_api_client/models/unauthorized_error_response.py +0 -80
- orca_sdk/_generated_api_client/models/update_prediction_request.py +0 -133
- orca_sdk/_generated_api_client/models/validation_error.py +0 -99
- orca_sdk/_generated_api_client/py.typed +0 -1
- orca_sdk/_generated_api_client/types.py +0 -56
- orca_sdk-0.0.96.dist-info/RECORD +0 -278
- {orca_sdk-0.0.96.dist-info → orca_sdk-0.0.98.dist-info}/WHEEL +0 -0
orca_sdk/conftest.py
CHANGED
|
@@ -8,6 +8,7 @@ from datasets import ClassLabel, Dataset, Features, Value
|
|
|
8
8
|
|
|
9
9
|
from ._utils.auth import _create_api_key, _delete_org
|
|
10
10
|
from .classification_model import ClassificationModel
|
|
11
|
+
from .client import orca_api
|
|
11
12
|
from .credentials import OrcaCredentials
|
|
12
13
|
from .datasource import Datasource
|
|
13
14
|
from .embedding_model import PretrainedEmbeddingModel
|
|
@@ -43,6 +44,13 @@ def _create_org_id():
|
|
|
43
44
|
return "10e50000-0000-4000-a000-" + str(uuid4())[24:]
|
|
44
45
|
|
|
45
46
|
|
|
47
|
+
@pytest.fixture()
|
|
48
|
+
def base_url_reset():
|
|
49
|
+
original_base_url = orca_api.base_url
|
|
50
|
+
yield
|
|
51
|
+
orca_api.base_url = original_base_url
|
|
52
|
+
|
|
53
|
+
|
|
46
54
|
@pytest.fixture(scope="session")
|
|
47
55
|
def org_id():
|
|
48
56
|
return _create_org_id()
|
|
@@ -192,6 +200,8 @@ def writable_memoryset(datasource: Datasource, api_key: str) -> Generator[Labele
|
|
|
192
200
|
OrcaCredentials.set_api_key(api_key, check_validity=False)
|
|
193
201
|
|
|
194
202
|
if LabeledMemoryset.exists("test_writable_memoryset"):
|
|
203
|
+
memoryset.refresh()
|
|
204
|
+
|
|
195
205
|
memory_ids = [memoryset[i].memory_id for i in range(len(memoryset))]
|
|
196
206
|
|
|
197
207
|
if memory_ids:
|
orca_sdk/credentials.py
CHANGED
|
@@ -1,17 +1,10 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from typing import Literal, NamedTuple
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
list_api_keys,
|
|
9
|
-
)
|
|
10
|
-
from ._generated_api_client.client import get_base_url, get_headers, set_headers
|
|
11
|
-
from ._generated_api_client.models import (
|
|
12
|
-
CreateApiKeyRequest,
|
|
13
|
-
CreateApiKeyRequestScopeItem,
|
|
14
|
-
)
|
|
4
|
+
import httpx
|
|
5
|
+
from httpx import ConnectError, Headers
|
|
6
|
+
|
|
7
|
+
from .client import orca_api
|
|
15
8
|
|
|
16
9
|
Scope = Literal["ADMINISTER", "PREDICT"]
|
|
17
10
|
"""
|
|
@@ -46,7 +39,7 @@ class OrcaCredentials:
|
|
|
46
39
|
"""
|
|
47
40
|
Get the Orca API base URL that is currently being used
|
|
48
41
|
"""
|
|
49
|
-
return
|
|
42
|
+
return str(orca_api.base_url)
|
|
50
43
|
|
|
51
44
|
@staticmethod
|
|
52
45
|
def list_api_keys() -> list[ApiKeyInfo]:
|
|
@@ -57,8 +50,12 @@ class OrcaCredentials:
|
|
|
57
50
|
A list of named tuples, with the name and creation date time of the API key
|
|
58
51
|
"""
|
|
59
52
|
return [
|
|
60
|
-
ApiKeyInfo(
|
|
61
|
-
|
|
53
|
+
ApiKeyInfo(
|
|
54
|
+
name=api_key["name"],
|
|
55
|
+
created_at=datetime.fromisoformat(api_key["created_at"]),
|
|
56
|
+
scopes=set(api_key["scope"]),
|
|
57
|
+
)
|
|
58
|
+
for api_key in orca_api.GET("/auth/api_key")
|
|
62
59
|
]
|
|
63
60
|
|
|
64
61
|
@staticmethod
|
|
@@ -70,7 +67,7 @@ class OrcaCredentials:
|
|
|
70
67
|
True if you are authenticated, False otherwise
|
|
71
68
|
"""
|
|
72
69
|
try:
|
|
73
|
-
return
|
|
70
|
+
return orca_api.GET("/auth")
|
|
74
71
|
except ValueError as e:
|
|
75
72
|
if "Invalid API key" in str(e):
|
|
76
73
|
return False
|
|
@@ -88,10 +85,11 @@ class OrcaCredentials:
|
|
|
88
85
|
Returns:
|
|
89
86
|
The secret value of the API key. Make sure to save this value as it will not be shown again.
|
|
90
87
|
"""
|
|
91
|
-
res =
|
|
92
|
-
|
|
88
|
+
res = orca_api.POST(
|
|
89
|
+
"/auth/api_key",
|
|
90
|
+
json={"name": name, "scope": list(scopes)},
|
|
93
91
|
)
|
|
94
|
-
return res
|
|
92
|
+
return res["api_key"]
|
|
95
93
|
|
|
96
94
|
@staticmethod
|
|
97
95
|
def revoke_api_key(name: str) -> None:
|
|
@@ -104,7 +102,21 @@ class OrcaCredentials:
|
|
|
104
102
|
Raises:
|
|
105
103
|
ValueError: if the API key is not found
|
|
106
104
|
"""
|
|
107
|
-
|
|
105
|
+
orca_api.DELETE("/auth/api_key/{name_or_id}", params={"name_or_id": name})
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def set_headers(headers: dict[str, str]):
|
|
109
|
+
"""
|
|
110
|
+
Add or override default HTTP headers for all Orca API requests.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
**kwargs: Header names with their string values
|
|
114
|
+
|
|
115
|
+
Notes:
|
|
116
|
+
New keys are merged into the existing headers, this will overwrite headers with the
|
|
117
|
+
same name, but leave other headers untouched.
|
|
118
|
+
"""
|
|
119
|
+
orca_api.headers.update(Headers(headers))
|
|
108
120
|
|
|
109
121
|
@staticmethod
|
|
110
122
|
def set_api_key(api_key: str, check_validity: bool = True):
|
|
@@ -121,6 +133,46 @@ class OrcaCredentials:
|
|
|
121
133
|
Raises:
|
|
122
134
|
ValueError: if the API key is invalid and `check_validity` is True
|
|
123
135
|
"""
|
|
124
|
-
set_headers(
|
|
136
|
+
OrcaCredentials.set_headers({"Api-Key": api_key})
|
|
125
137
|
if check_validity:
|
|
126
|
-
|
|
138
|
+
orca_api.GET("/auth")
|
|
139
|
+
|
|
140
|
+
@staticmethod
|
|
141
|
+
def set_base_url(base_url: str, check_validity: bool = True):
|
|
142
|
+
"""
|
|
143
|
+
Set the base URL for the Orca API
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
base_url: The base URL to set
|
|
147
|
+
check_validity: Whether to check if there is an API running at the given base URL
|
|
148
|
+
|
|
149
|
+
Raises:
|
|
150
|
+
ValueError: if there is no healthy API running at the given base URL and `check_validity` is True
|
|
151
|
+
"""
|
|
152
|
+
# check if the base url is reachable before setting it
|
|
153
|
+
if check_validity:
|
|
154
|
+
try:
|
|
155
|
+
httpx.get(base_url, timeout=1)
|
|
156
|
+
except ConnectError as e:
|
|
157
|
+
raise ValueError(f"No API found at {base_url}") from e
|
|
158
|
+
|
|
159
|
+
orca_api.base_url = base_url
|
|
160
|
+
|
|
161
|
+
# check if the api passes the health check
|
|
162
|
+
if check_validity:
|
|
163
|
+
orca_api.GET("/")
|
|
164
|
+
|
|
165
|
+
@staticmethod
|
|
166
|
+
def is_healthy() -> bool:
|
|
167
|
+
"""
|
|
168
|
+
Check whether the API is healthy
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
True if the API is healthy, False otherwise
|
|
172
|
+
"""
|
|
173
|
+
try:
|
|
174
|
+
orca_api.GET("/")
|
|
175
|
+
orca_api.GET("/gpu/")
|
|
176
|
+
except Exception:
|
|
177
|
+
return False
|
|
178
|
+
return True
|
orca_sdk/credentials_test.py
CHANGED
|
@@ -2,6 +2,7 @@ from uuid import uuid4
|
|
|
2
2
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
|
+
from .client import orca_api
|
|
5
6
|
from .credentials import OrcaCredentials
|
|
6
7
|
|
|
7
8
|
|
|
@@ -35,3 +36,22 @@ def test_set_invalid_api_key(api_key):
|
|
|
35
36
|
with pytest.raises(ValueError, match="Invalid API key"):
|
|
36
37
|
OrcaCredentials.set_api_key(str(uuid4()))
|
|
37
38
|
assert not OrcaCredentials.is_authenticated()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_set_base_url(base_url_reset):
|
|
42
|
+
OrcaCredentials.set_base_url("http://api.orcadb.ai")
|
|
43
|
+
assert str(orca_api.base_url) == "http://api.orcadb.ai"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_set_invalid_base_url():
|
|
47
|
+
with pytest.raises(ValueError, match="No API found at http://localhost:1582"):
|
|
48
|
+
OrcaCredentials.set_base_url("http://localhost:1582")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_is_healthy():
|
|
52
|
+
assert OrcaCredentials.is_healthy()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_is_healthy_false(base_url_reset):
|
|
56
|
+
OrcaCredentials.set_base_url("http://localhost:1582", check_validity=False)
|
|
57
|
+
assert not OrcaCredentials.is_healthy()
|
orca_sdk/datasource.py
CHANGED
|
@@ -4,30 +4,86 @@ import logging
|
|
|
4
4
|
import tempfile
|
|
5
5
|
import zipfile
|
|
6
6
|
from datetime import datetime
|
|
7
|
+
from io import BytesIO
|
|
7
8
|
from os import PathLike
|
|
8
9
|
from pathlib import Path
|
|
9
|
-
from typing import cast
|
|
10
|
+
from typing import Literal, Union, cast
|
|
10
11
|
|
|
11
12
|
import pandas as pd
|
|
12
13
|
import pyarrow as pa
|
|
13
|
-
from datasets import Dataset
|
|
14
|
+
from datasets import Dataset, DatasetDict
|
|
15
|
+
from httpx._types import FileTypes # type: ignore
|
|
16
|
+
from pyarrow import parquet
|
|
14
17
|
from torch.utils.data import DataLoader as TorchDataLoader
|
|
15
18
|
from torch.utils.data import Dataset as TorchDataset
|
|
16
19
|
from tqdm.auto import tqdm
|
|
17
20
|
|
|
18
|
-
from ._generated_api_client.api import (
|
|
19
|
-
delete_datasource,
|
|
20
|
-
get_datasource,
|
|
21
|
-
list_datasources,
|
|
22
|
-
)
|
|
23
|
-
from ._generated_api_client.api.datasource.create_datasource_datasource_post import (
|
|
24
|
-
_parse_response as parse_create_response,
|
|
25
|
-
)
|
|
26
|
-
from ._generated_api_client.client import get_client
|
|
27
|
-
from ._generated_api_client.models import ColumnType, DatasourceMetadata
|
|
28
21
|
from ._utils.common import CreateMode, DropMode
|
|
29
|
-
from ._utils.data_parsing import
|
|
22
|
+
from ._utils.data_parsing import hf_dataset_from_torch
|
|
30
23
|
from ._utils.tqdm_file_reader import TqdmFileReader
|
|
24
|
+
from .client import DatasourceMetadata, orca_api
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _upload_files_to_datasource(
|
|
28
|
+
name: str,
|
|
29
|
+
file_paths: list[Path],
|
|
30
|
+
description: str | None = None,
|
|
31
|
+
) -> DatasourceMetadata:
|
|
32
|
+
"""
|
|
33
|
+
Helper function to upload files to create a datasource using manual HTTP requests.
|
|
34
|
+
|
|
35
|
+
This bypasses the generated client because it doesn't handle file uploads properly.
|
|
36
|
+
|
|
37
|
+
Params:
|
|
38
|
+
name: Name for the datasource
|
|
39
|
+
file_paths: List of file paths to upload
|
|
40
|
+
description: Optional description for the datasource
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Metadata for the created datasource
|
|
44
|
+
"""
|
|
45
|
+
files: list[tuple[Literal["files"], FileTypes]] = []
|
|
46
|
+
|
|
47
|
+
# Calculate total size for all files
|
|
48
|
+
total_size = sum(file_path.stat().st_size for file_path in file_paths)
|
|
49
|
+
|
|
50
|
+
with tqdm(total=total_size, unit="B", unit_scale=True, desc="Uploading") as pbar:
|
|
51
|
+
for file_path in file_paths:
|
|
52
|
+
buffered_reader = open(file_path, "rb")
|
|
53
|
+
tqdm_reader = TqdmFileReader(buffered_reader, pbar)
|
|
54
|
+
files.append(("files", (file_path.name, cast(bytes, tqdm_reader))))
|
|
55
|
+
|
|
56
|
+
# Use manual HTTP request for file uploads
|
|
57
|
+
metadata = orca_api.POST(
|
|
58
|
+
"/datasource/upload",
|
|
59
|
+
files=files,
|
|
60
|
+
data={"name": name, "description": description},
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return metadata
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _handle_existing_datasource(name: str, if_exists: CreateMode) -> Union["Datasource", None]:
|
|
67
|
+
"""
|
|
68
|
+
Helper function to handle the common pattern of checking if a datasource exists
|
|
69
|
+
and taking action based on the if_exists parameter.
|
|
70
|
+
|
|
71
|
+
Params:
|
|
72
|
+
name: Name of the datasource to check
|
|
73
|
+
if_exists: What to do if a datasource with the same name already exists
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Datasource instance if opening existing, None if should proceed with creation
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
ValueError: If the datasource already exists and if_exists is "error"
|
|
80
|
+
"""
|
|
81
|
+
if Datasource.exists(name):
|
|
82
|
+
if if_exists == "error":
|
|
83
|
+
raise ValueError(f"Dataset with name {name} already exists")
|
|
84
|
+
elif if_exists == "open":
|
|
85
|
+
return Datasource.open(name)
|
|
86
|
+
return None
|
|
31
87
|
|
|
32
88
|
|
|
33
89
|
class Datasource:
|
|
@@ -56,19 +112,19 @@ class Datasource:
|
|
|
56
112
|
|
|
57
113
|
def __init__(self, metadata: DatasourceMetadata):
|
|
58
114
|
# for internal use only, do not document
|
|
59
|
-
self.id = metadata
|
|
60
|
-
self.name = metadata
|
|
61
|
-
self.length = metadata
|
|
62
|
-
self.created_at = metadata
|
|
63
|
-
self.updated_at = metadata
|
|
64
|
-
self.description = metadata
|
|
115
|
+
self.id = metadata["id"]
|
|
116
|
+
self.name = metadata["name"]
|
|
117
|
+
self.length = metadata["length"]
|
|
118
|
+
self.created_at = datetime.fromisoformat(metadata["created_at"])
|
|
119
|
+
self.updated_at = datetime.fromisoformat(metadata["updated_at"])
|
|
120
|
+
self.description = metadata["description"]
|
|
65
121
|
self.columns = {
|
|
66
|
-
column
|
|
67
|
-
f"enum({', '.join(f'{option!r}' for option in column
|
|
68
|
-
if column
|
|
69
|
-
else "str" if column
|
|
122
|
+
column["name"]: (
|
|
123
|
+
f"enum({', '.join(f'{option!r}' for option in column['enum_options'] or []) if 'enum_options' in column else ''})"
|
|
124
|
+
if column["type"] == "ENUM"
|
|
125
|
+
else "str" if column["type"] == "STRING" else column["type"].lower()
|
|
70
126
|
)
|
|
71
|
-
for column in metadata
|
|
127
|
+
for column in metadata["columns"]
|
|
72
128
|
}
|
|
73
129
|
|
|
74
130
|
def __eq__(self, other) -> bool:
|
|
@@ -94,27 +150,16 @@ class Datasource:
|
|
|
94
150
|
|
|
95
151
|
Returns:
|
|
96
152
|
None
|
|
97
|
-
|
|
98
|
-
Raises:
|
|
99
|
-
RuntimeError: If the download fails.
|
|
100
153
|
"""
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
client = get_client().get_httpx_client()
|
|
104
|
-
url = f"/datasource/{self.id}/download"
|
|
105
|
-
response = client.get(url)
|
|
106
|
-
if response.status_code == 404:
|
|
107
|
-
raise LookupError(f"Datasource {self.id} not found")
|
|
108
|
-
if response.status_code != 200:
|
|
109
|
-
raise RuntimeError(f"Failed to download datasource: {response.status_code} {response.text}")
|
|
110
|
-
|
|
154
|
+
# TODO: add progress bar to the download
|
|
155
|
+
response = orca_api.GET("/datasource/{name_or_id}/download", params={"name_or_id": self.id}, parse_as=None)
|
|
111
156
|
with tempfile.NamedTemporaryFile(suffix=".zip") as tmp_zip:
|
|
112
|
-
tmp_zip.write(response
|
|
157
|
+
tmp_zip.write(response)
|
|
113
158
|
tmp_zip.flush()
|
|
114
159
|
with zipfile.ZipFile(tmp_zip.name, "r") as zf:
|
|
115
|
-
output_path.mkdir(parents=True, exist_ok=True)
|
|
160
|
+
Path(output_path).mkdir(parents=True, exist_ok=True)
|
|
116
161
|
for file in zf.namelist():
|
|
117
|
-
out_file = output_path / Path(file).name
|
|
162
|
+
out_file = Path(output_path) / Path(file).name
|
|
118
163
|
with zf.open(file) as af:
|
|
119
164
|
out_file.write_bytes(af.read())
|
|
120
165
|
|
|
@@ -138,40 +183,54 @@ class Datasource:
|
|
|
138
183
|
Raises:
|
|
139
184
|
ValueError: If the datasource already exists and if_exists is `"error"`
|
|
140
185
|
"""
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
if
|
|
144
|
-
|
|
145
|
-
raise ValueError(f"Dataset with name {name} already exists")
|
|
146
|
-
elif if_exists == "open":
|
|
147
|
-
return cls.open(name)
|
|
186
|
+
# Check if datasource already exists and handle accordingly
|
|
187
|
+
existing = _handle_existing_datasource(name, if_exists)
|
|
188
|
+
if existing is not None:
|
|
189
|
+
return existing
|
|
148
190
|
|
|
149
191
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
150
192
|
dataset.save_to_disk(tmp_dir)
|
|
151
|
-
files = []
|
|
152
193
|
|
|
153
|
-
#
|
|
194
|
+
# Get all file paths in the directory
|
|
154
195
|
file_paths = list(Path(tmp_dir).iterdir())
|
|
155
|
-
total_size = sum(file_path.stat().st_size for file_path in file_paths)
|
|
156
|
-
|
|
157
|
-
with tqdm(total=total_size, unit="B", unit_scale=True, desc="Uploading") as pbar:
|
|
158
|
-
for file_path in file_paths:
|
|
159
|
-
buffered_reader = open(file_path, "rb")
|
|
160
|
-
tqdm_reader = TqdmFileReader(buffered_reader, pbar)
|
|
161
|
-
files.append(("files", (file_path.name, tqdm_reader)))
|
|
162
|
-
|
|
163
|
-
# Do not use Generated client for this endpoint b/c it does not handle files properly
|
|
164
|
-
metadata = parse_create_response(
|
|
165
|
-
response=client.get_httpx_client().request(
|
|
166
|
-
method="post",
|
|
167
|
-
url="/datasource/",
|
|
168
|
-
files=files,
|
|
169
|
-
data={"name": name, "description": description},
|
|
170
|
-
)
|
|
171
|
-
)
|
|
172
196
|
|
|
197
|
+
# Use the helper function to upload files
|
|
198
|
+
metadata = _upload_files_to_datasource(name, file_paths, description)
|
|
173
199
|
return cls(metadata=metadata)
|
|
174
200
|
|
|
201
|
+
@classmethod
|
|
202
|
+
def from_hf_dataset_dict(
|
|
203
|
+
cls,
|
|
204
|
+
name: str,
|
|
205
|
+
dataset_dict: DatasetDict,
|
|
206
|
+
if_exists: CreateMode = "error",
|
|
207
|
+
description: dict[str, str | None] | str | None = None,
|
|
208
|
+
) -> dict[str, Datasource]:
|
|
209
|
+
"""
|
|
210
|
+
Create datasources from a Hugging Face DatasetDict
|
|
211
|
+
|
|
212
|
+
Params:
|
|
213
|
+
name: Name prefix for the new datasources, will be suffixed with the dataset name
|
|
214
|
+
dataset_dict: The Hugging Face DatasetDict to create the datasources from
|
|
215
|
+
if_exists: What to do if a datasource with the same name already exists, defaults to
|
|
216
|
+
`"error"`. Other option is `"open"` to open the existing datasource.
|
|
217
|
+
description: Optional description for the datasources, can be a string or a dictionary of dataset names to descriptions
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
A dictionary of datasource handles, keyed by the dataset name
|
|
221
|
+
|
|
222
|
+
Raises:
|
|
223
|
+
ValueError: If a datasource already exists and if_exists is `"error"`
|
|
224
|
+
"""
|
|
225
|
+
if description is None or isinstance(description, str):
|
|
226
|
+
description = {dataset_name: description for dataset_name in dataset_dict.keys()}
|
|
227
|
+
return {
|
|
228
|
+
dataset_name: cls.from_hf_dataset(
|
|
229
|
+
f"{name}_{dataset_name}", dataset, if_exists=if_exists, description=description[dataset_name]
|
|
230
|
+
)
|
|
231
|
+
for dataset_name, dataset in dataset_dict.items()
|
|
232
|
+
}
|
|
233
|
+
|
|
175
234
|
@classmethod
|
|
176
235
|
def from_pytorch(
|
|
177
236
|
cls,
|
|
@@ -225,8 +284,16 @@ class Datasource:
|
|
|
225
284
|
Examples:
|
|
226
285
|
>>> Datasource.from_list("my_datasource", [{"text": "Hello, world!", "label": 1}, {"text": "Goodbye", "label": 0}])
|
|
227
286
|
"""
|
|
228
|
-
|
|
229
|
-
|
|
287
|
+
# Check if datasource already exists and handle accordingly
|
|
288
|
+
existing = _handle_existing_datasource(name, if_exists)
|
|
289
|
+
if existing is not None:
|
|
290
|
+
return existing
|
|
291
|
+
|
|
292
|
+
metadata = orca_api.POST(
|
|
293
|
+
"/datasource",
|
|
294
|
+
json={"name": name, "description": description, "content": data},
|
|
295
|
+
)
|
|
296
|
+
return cls(metadata=metadata)
|
|
230
297
|
|
|
231
298
|
@classmethod
|
|
232
299
|
def from_dict(
|
|
@@ -251,8 +318,16 @@ class Datasource:
|
|
|
251
318
|
Examples:
|
|
252
319
|
>>> Datasource.from_dict("my_datasource", {"text": ["Hello, world!", "Goodbye"], "label": [1, 0]})
|
|
253
320
|
"""
|
|
254
|
-
|
|
255
|
-
|
|
321
|
+
# Check if datasource already exists and handle accordingly
|
|
322
|
+
existing = _handle_existing_datasource(name, if_exists)
|
|
323
|
+
if existing is not None:
|
|
324
|
+
return existing
|
|
325
|
+
|
|
326
|
+
metadata = orca_api.POST(
|
|
327
|
+
"/datasource",
|
|
328
|
+
json={"name": name, "description": description, "content": data},
|
|
329
|
+
)
|
|
330
|
+
return cls(metadata=metadata)
|
|
256
331
|
|
|
257
332
|
@classmethod
|
|
258
333
|
def from_pandas(
|
|
@@ -274,8 +349,8 @@ class Datasource:
|
|
|
274
349
|
Raises:
|
|
275
350
|
ValueError: If the datasource already exists and if_exists is `"error"`
|
|
276
351
|
"""
|
|
277
|
-
|
|
278
|
-
return cls.from_hf_dataset(name,
|
|
352
|
+
dataset = Dataset.from_pandas(dataframe)
|
|
353
|
+
return cls.from_hf_dataset(name, dataset, if_exists=if_exists, description=description)
|
|
279
354
|
|
|
280
355
|
@classmethod
|
|
281
356
|
def from_arrow(
|
|
@@ -297,8 +372,23 @@ class Datasource:
|
|
|
297
372
|
Raises:
|
|
298
373
|
ValueError: If the datasource already exists and if_exists is `"error"`
|
|
299
374
|
"""
|
|
300
|
-
|
|
301
|
-
|
|
375
|
+
# Check if datasource already exists and handle accordingly
|
|
376
|
+
existing = _handle_existing_datasource(name, if_exists)
|
|
377
|
+
if existing is not None:
|
|
378
|
+
return existing
|
|
379
|
+
|
|
380
|
+
# Write to bytes buffer
|
|
381
|
+
buffer = BytesIO()
|
|
382
|
+
parquet.write_table(pyarrow_table, buffer)
|
|
383
|
+
parquet_bytes = buffer.getvalue()
|
|
384
|
+
|
|
385
|
+
metadata = orca_api.POST(
|
|
386
|
+
"/datasource/upload",
|
|
387
|
+
files=[("files", ("data.parquet", parquet_bytes))],
|
|
388
|
+
data={"name": name, "description": description},
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
return cls(metadata=metadata)
|
|
302
392
|
|
|
303
393
|
@classmethod
|
|
304
394
|
def from_disk(
|
|
@@ -328,16 +418,31 @@ class Datasource:
|
|
|
328
418
|
Raises:
|
|
329
419
|
ValueError: If the datasource already exists and if_exists is `"error"`
|
|
330
420
|
"""
|
|
331
|
-
|
|
332
|
-
|
|
421
|
+
# Check if datasource already exists and handle accordingly
|
|
422
|
+
existing = _handle_existing_datasource(name, if_exists)
|
|
423
|
+
if existing is not None:
|
|
424
|
+
return existing
|
|
425
|
+
|
|
426
|
+
file_path = Path(file_path)
|
|
427
|
+
|
|
428
|
+
# For dataset directories, use the upload endpoint with multiple files
|
|
429
|
+
if file_path.is_dir():
|
|
430
|
+
return cls.from_hf_dataset(
|
|
431
|
+
name, Dataset.load_from_disk(file_path), if_exists=if_exists, description=description
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
# For single files, use the helper function to upload files
|
|
435
|
+
metadata = _upload_files_to_datasource(name, [file_path], description)
|
|
436
|
+
|
|
437
|
+
return cls(metadata=metadata)
|
|
333
438
|
|
|
334
439
|
@classmethod
|
|
335
|
-
def open(cls,
|
|
440
|
+
def open(cls, name_or_id: str) -> Datasource:
|
|
336
441
|
"""
|
|
337
442
|
Get a handle to a datasource by name or id in the OrcaCloud
|
|
338
443
|
|
|
339
444
|
Params:
|
|
340
|
-
|
|
445
|
+
name_or_id: The name or unique identifier of the datasource to get
|
|
341
446
|
|
|
342
447
|
Returns:
|
|
343
448
|
A handle to the existing datasource in the OrcaCloud
|
|
@@ -345,7 +450,7 @@ class Datasource:
|
|
|
345
450
|
Raises:
|
|
346
451
|
LookupError: If the datasource does not exist
|
|
347
452
|
"""
|
|
348
|
-
return cls(
|
|
453
|
+
return cls(orca_api.GET("/datasource/{name_or_id}", params={"name_or_id": name_or_id}))
|
|
349
454
|
|
|
350
455
|
@classmethod
|
|
351
456
|
def exists(cls, name_or_id: str) -> bool:
|
|
@@ -372,7 +477,7 @@ class Datasource:
|
|
|
372
477
|
Returns:
|
|
373
478
|
A list of all datasource handles in the OrcaCloud
|
|
374
479
|
"""
|
|
375
|
-
return [cls(metadata) for metadata in
|
|
480
|
+
return [cls(metadata) for metadata in orca_api.GET("/datasource")]
|
|
376
481
|
|
|
377
482
|
@classmethod
|
|
378
483
|
def drop(cls, name_or_id: str, if_not_exists: DropMode = "error") -> None:
|
|
@@ -388,7 +493,7 @@ class Datasource:
|
|
|
388
493
|
LookupError: If the datasource does not exist and if_not_exists is `"error"`
|
|
389
494
|
"""
|
|
390
495
|
try:
|
|
391
|
-
|
|
496
|
+
orca_api.DELETE("/datasource/{name_or_id}", params={"name_or_id": name_or_id})
|
|
392
497
|
logging.info(f"Deleted datasource {name_or_id}")
|
|
393
498
|
except LookupError:
|
|
394
499
|
if if_not_exists == "error":
|