orca-sdk 0.0.97__py3-none-any.whl → 0.0.100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/__init__.py +1 -0
- orca_sdk/_shared/__init__.py +1 -0
- orca_sdk/_utils/analysis_ui.py +5 -5
- orca_sdk/_utils/auth.py +23 -33
- orca_sdk/_utils/pagination.py +126 -0
- orca_sdk/_utils/pagination_test.py +132 -0
- orca_sdk/classification_model.py +188 -126
- orca_sdk/classification_model_test.py +57 -8
- orca_sdk/client.py +3563 -0
- orca_sdk/conftest.py +10 -0
- orca_sdk/credentials.py +59 -21
- orca_sdk/credentials_test.py +20 -0
- orca_sdk/datasource.py +80 -93
- orca_sdk/datasource_test.py +41 -7
- orca_sdk/embedding_model.py +225 -71
- orca_sdk/embedding_model_test.py +27 -36
- orca_sdk/job.py +49 -45
- orca_sdk/job_test.py +16 -0
- orca_sdk/memoryset.py +340 -353
- orca_sdk/memoryset_test.py +7 -11
- orca_sdk/regression_model.py +120 -111
- orca_sdk/regression_model_test.py +15 -0
- orca_sdk/telemetry.py +162 -139
- {orca_sdk-0.0.97.dist-info → orca_sdk-0.0.100.dist-info}/METADATA +2 -5
- orca_sdk-0.0.100.dist-info/RECORD +40 -0
- orca_sdk/_generated_api_client/__init__.py +0 -3
- orca_sdk/_generated_api_client/api/__init__.py +0 -307
- orca_sdk/_generated_api_client/api/auth/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/auth/check_authentication_auth_get.py +0 -128
- orca_sdk/_generated_api_client/api/auth/create_api_key_auth_api_key_post.py +0 -170
- orca_sdk/_generated_api_client/api/auth/create_org_plan_auth_org_plan_post.py +0 -168
- orca_sdk/_generated_api_client/api/auth/delete_api_key_auth_api_key_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/auth/delete_org_auth_org_delete.py +0 -130
- orca_sdk/_generated_api_client/api/auth/get_org_plan_auth_org_plan_get.py +0 -122
- orca_sdk/_generated_api_client/api/auth/list_api_keys_auth_api_key_get.py +0 -127
- orca_sdk/_generated_api_client/api/auth/update_org_plan_auth_org_plan_put.py +0 -168
- orca_sdk/_generated_api_client/api/classification_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/classification_model/create_classification_model_classification_model_post.py +0 -170
- orca_sdk/_generated_api_client/api/classification_model/delete_classification_model_classification_model_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/classification_model/delete_classification_model_evaluation_classification_model_model_name_or_id_evaluation_task_id_delete.py +0 -168
- orca_sdk/_generated_api_client/api/classification_model/evaluate_classification_model_classification_model_model_name_or_id_evaluation_post.py +0 -183
- orca_sdk/_generated_api_client/api/classification_model/get_classification_model_classification_model_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/classification_model/get_classification_model_evaluation_classification_model_model_name_or_id_evaluation_task_id_get.py +0 -170
- orca_sdk/_generated_api_client/api/classification_model/list_classification_model_evaluations_classification_model_model_name_or_id_evaluation_get.py +0 -161
- orca_sdk/_generated_api_client/api/classification_model/list_classification_models_classification_model_get.py +0 -127
- orca_sdk/_generated_api_client/api/classification_model/predict_label_gpu_classification_model_name_or_id_prediction_post.py +0 -190
- orca_sdk/_generated_api_client/api/classification_model/update_classification_model_classification_model_name_or_id_patch.py +0 -183
- orca_sdk/_generated_api_client/api/datasource/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/datasource/create_datasource_from_content_datasource_post.py +0 -224
- orca_sdk/_generated_api_client/api/datasource/create_datasource_from_files_datasource_upload_post.py +0 -229
- orca_sdk/_generated_api_client/api/datasource/create_embedding_evaluation_datasource_name_or_id_embedding_evaluation_post.py +0 -183
- orca_sdk/_generated_api_client/api/datasource/delete_datasource_datasource_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/datasource/download_datasource_datasource_name_or_id_download_get.py +0 -172
- orca_sdk/_generated_api_client/api/datasource/get_datasource_datasource_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/datasource/get_embedding_evaluation_datasource_name_or_id_embedding_evaluation_task_id_get.py +0 -169
- orca_sdk/_generated_api_client/api/datasource/list_datasources_datasource_get.py +0 -127
- orca_sdk/_generated_api_client/api/datasource/list_embedding_evaluations_datasource_name_or_id_embedding_evaluation_get.py +0 -235
- orca_sdk/_generated_api_client/api/default/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/default/healthcheck_get.py +0 -118
- orca_sdk/_generated_api_client/api/default/healthcheck_gpu_get.py +0 -118
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/create_finetuned_embedding_model_finetuned_embedding_model_post.py +0 -168
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/delete_finetuned_embedding_model_finetuned_embedding_model_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/embed_with_finetuned_model_gpu_finetuned_embedding_model_name_or_id_embedding_post.py +0 -189
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/get_finetuned_embedding_model_finetuned_embedding_model_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/finetuned_embedding_model/list_finetuned_embedding_models_finetuned_embedding_model_get.py +0 -127
- orca_sdk/_generated_api_client/api/memoryset/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/memoryset/analyze_memoryset_memoryset_name_or_id_analysis_post.py +0 -183
- orca_sdk/_generated_api_client/api/memoryset/batch_delete_memoryset_batch_delete_memoryset_post.py +0 -168
- orca_sdk/_generated_api_client/api/memoryset/clone_memoryset_memoryset_name_or_id_clone_post.py +0 -181
- orca_sdk/_generated_api_client/api/memoryset/create_memoryset_memoryset_post.py +0 -168
- orca_sdk/_generated_api_client/api/memoryset/delete_memories_memoryset_name_or_id_memories_delete_post.py +0 -181
- orca_sdk/_generated_api_client/api/memoryset/delete_memory_memoryset_name_or_id_memory_memory_id_delete.py +0 -167
- orca_sdk/_generated_api_client/api/memoryset/delete_memoryset_memoryset_name_or_id_delete.py +0 -156
- orca_sdk/_generated_api_client/api/memoryset/get_analysis_memoryset_name_or_id_analysis_analysis_task_id_get.py +0 -169
- orca_sdk/_generated_api_client/api/memoryset/get_memories_memoryset_name_or_id_memories_get_post.py +0 -210
- orca_sdk/_generated_api_client/api/memoryset/get_memory_memoryset_name_or_id_memory_memory_id_get.py +0 -186
- orca_sdk/_generated_api_client/api/memoryset/get_memoryset_memoryset_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/memoryset/insert_memories_gpu_memoryset_name_or_id_memory_post.py +0 -188
- orca_sdk/_generated_api_client/api/memoryset/list_analyses_memoryset_name_or_id_analysis_get.py +0 -235
- orca_sdk/_generated_api_client/api/memoryset/list_memorysets_memoryset_get.py +0 -180
- orca_sdk/_generated_api_client/api/memoryset/memoryset_lookup_gpu_memoryset_name_or_id_lookup_post.py +0 -212
- orca_sdk/_generated_api_client/api/memoryset/potential_duplicate_groups_memoryset_name_or_id_potential_duplicate_groups_get.py +0 -195
- orca_sdk/_generated_api_client/api/memoryset/query_memoryset_memoryset_name_or_id_memories_post.py +0 -210
- orca_sdk/_generated_api_client/api/memoryset/suggest_cascading_edits_memoryset_name_or_id_memory_memory_id_cascading_edits_post.py +0 -233
- orca_sdk/_generated_api_client/api/memoryset/update_memories_gpu_memoryset_name_or_id_memories_patch.py +0 -216
- orca_sdk/_generated_api_client/api/memoryset/update_memory_gpu_memoryset_name_or_id_memory_patch.py +0 -205
- orca_sdk/_generated_api_client/api/memoryset/update_memoryset_memoryset_name_or_id_patch.py +0 -183
- orca_sdk/_generated_api_client/api/predictive_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/predictive_model/list_predictive_models_predictive_model_get.py +0 -150
- orca_sdk/_generated_api_client/api/pretrained_embedding_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/pretrained_embedding_model/embed_with_pretrained_model_gpu_pretrained_embedding_model_model_name_embedding_post.py +0 -192
- orca_sdk/_generated_api_client/api/pretrained_embedding_model/get_pretrained_embedding_model_pretrained_embedding_model_model_name_get.py +0 -161
- orca_sdk/_generated_api_client/api/pretrained_embedding_model/list_pretrained_embedding_models_pretrained_embedding_model_get.py +0 -127
- orca_sdk/_generated_api_client/api/regression_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/regression_model/create_regression_model_regression_model_post.py +0 -170
- orca_sdk/_generated_api_client/api/regression_model/delete_regression_model_evaluation_regression_model_model_name_or_id_evaluation_task_id_delete.py +0 -168
- orca_sdk/_generated_api_client/api/regression_model/delete_regression_model_regression_model_name_or_id_delete.py +0 -154
- orca_sdk/_generated_api_client/api/regression_model/evaluate_regression_model_regression_model_model_name_or_id_evaluation_post.py +0 -183
- orca_sdk/_generated_api_client/api/regression_model/get_regression_model_evaluation_regression_model_model_name_or_id_evaluation_task_id_get.py +0 -170
- orca_sdk/_generated_api_client/api/regression_model/get_regression_model_regression_model_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/regression_model/list_regression_model_evaluations_regression_model_model_name_or_id_evaluation_get.py +0 -161
- orca_sdk/_generated_api_client/api/regression_model/list_regression_models_regression_model_get.py +0 -127
- orca_sdk/_generated_api_client/api/regression_model/predict_score_gpu_regression_model_name_or_id_prediction_post.py +0 -190
- orca_sdk/_generated_api_client/api/regression_model/update_regression_model_regression_model_name_or_id_patch.py +0 -183
- orca_sdk/_generated_api_client/api/task/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/task/abort_task_task_task_id_abort_delete.py +0 -154
- orca_sdk/_generated_api_client/api/task/get_task_status_task_task_id_status_get.py +0 -156
- orca_sdk/_generated_api_client/api/task/get_task_task_task_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/task/list_tasks_task_get.py +0 -288
- orca_sdk/_generated_api_client/api/telemetry/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/telemetry/count_predictions_telemetry_prediction_count_post.py +0 -168
- orca_sdk/_generated_api_client/api/telemetry/drop_feedback_category_with_data_telemetry_feedback_category_name_or_id_delete.py +0 -162
- orca_sdk/_generated_api_client/api/telemetry/explain_prediction_telemetry_prediction_prediction_id_explanation_get.py +0 -182
- orca_sdk/_generated_api_client/api/telemetry/generate_memory_suggestions_telemetry_prediction_prediction_id_memory_suggestions_post.py +0 -239
- orca_sdk/_generated_api_client/api/telemetry/get_action_recommendation_telemetry_prediction_prediction_id_action_get.py +0 -192
- orca_sdk/_generated_api_client/api/telemetry/get_feedback_category_telemetry_feedback_category_name_or_id_get.py +0 -156
- orca_sdk/_generated_api_client/api/telemetry/get_prediction_telemetry_prediction_prediction_id_get.py +0 -180
- orca_sdk/_generated_api_client/api/telemetry/list_feedback_categories_telemetry_feedback_category_get.py +0 -127
- orca_sdk/_generated_api_client/api/telemetry/list_memories_with_feedback_telemetry_memories_post.py +0 -198
- orca_sdk/_generated_api_client/api/telemetry/list_predictions_telemetry_prediction_post.py +0 -198
- orca_sdk/_generated_api_client/api/telemetry/record_prediction_feedback_telemetry_prediction_feedback_put.py +0 -171
- orca_sdk/_generated_api_client/api/telemetry/update_prediction_telemetry_prediction_prediction_id_patch.py +0 -181
- orca_sdk/_generated_api_client/client.py +0 -216
- orca_sdk/_generated_api_client/errors.py +0 -38
- orca_sdk/_generated_api_client/models/__init__.py +0 -345
- orca_sdk/_generated_api_client/models/action_recommendation.py +0 -82
- orca_sdk/_generated_api_client/models/action_recommendation_action.py +0 -11
- orca_sdk/_generated_api_client/models/add_memory_recommendations.py +0 -85
- orca_sdk/_generated_api_client/models/add_memory_suggestion.py +0 -79
- orca_sdk/_generated_api_client/models/analyze_neighbor_labels_result.py +0 -116
- orca_sdk/_generated_api_client/models/api_key_metadata.py +0 -137
- orca_sdk/_generated_api_client/models/api_key_metadata_scope_item.py +0 -9
- orca_sdk/_generated_api_client/models/base_label_prediction_result.py +0 -130
- orca_sdk/_generated_api_client/models/base_model.py +0 -55
- orca_sdk/_generated_api_client/models/base_score_prediction_result.py +0 -108
- orca_sdk/_generated_api_client/models/body_create_datasource_from_files_datasource_upload_post.py +0 -145
- orca_sdk/_generated_api_client/models/cascade_edit_suggestions_request.py +0 -154
- orca_sdk/_generated_api_client/models/cascading_edit_suggestion.py +0 -92
- orca_sdk/_generated_api_client/models/class_representatives.py +0 -92
- orca_sdk/_generated_api_client/models/classification_evaluation_request.py +0 -148
- orca_sdk/_generated_api_client/models/classification_metrics.py +0 -259
- orca_sdk/_generated_api_client/models/classification_model_metadata.py +0 -227
- orca_sdk/_generated_api_client/models/classification_prediction_request.py +0 -220
- orca_sdk/_generated_api_client/models/clone_memoryset_request.py +0 -210
- orca_sdk/_generated_api_client/models/cluster_metrics.py +0 -78
- orca_sdk/_generated_api_client/models/column_info.py +0 -145
- orca_sdk/_generated_api_client/models/column_type.py +0 -14
- orca_sdk/_generated_api_client/models/constraint_violation_error_response.py +0 -81
- orca_sdk/_generated_api_client/models/constraint_violation_error_response_status_code.py +0 -8
- orca_sdk/_generated_api_client/models/count_predictions_request.py +0 -195
- orca_sdk/_generated_api_client/models/create_api_key_request.py +0 -120
- orca_sdk/_generated_api_client/models/create_api_key_request_scope_item.py +0 -9
- orca_sdk/_generated_api_client/models/create_api_key_response.py +0 -145
- orca_sdk/_generated_api_client/models/create_api_key_response_scope_item.py +0 -9
- orca_sdk/_generated_api_client/models/create_classification_model_request.py +0 -237
- orca_sdk/_generated_api_client/models/create_datasource_from_content_request.py +0 -101
- orca_sdk/_generated_api_client/models/create_memoryset_request.py +0 -365
- orca_sdk/_generated_api_client/models/create_memoryset_request_index_params.py +0 -66
- orca_sdk/_generated_api_client/models/create_memoryset_request_index_type.py +0 -13
- orca_sdk/_generated_api_client/models/create_org_plan_request.py +0 -73
- orca_sdk/_generated_api_client/models/create_org_plan_request_tier.py +0 -11
- orca_sdk/_generated_api_client/models/create_regression_model_request.py +0 -157
- orca_sdk/_generated_api_client/models/datasource_metadata.py +0 -156
- orca_sdk/_generated_api_client/models/delete_memories_request.py +0 -70
- orca_sdk/_generated_api_client/models/delete_memorysets_request.py +0 -70
- orca_sdk/_generated_api_client/models/embed_request.py +0 -155
- orca_sdk/_generated_api_client/models/embedding_evaluation_payload.py +0 -205
- orca_sdk/_generated_api_client/models/embedding_evaluation_request.py +0 -197
- orca_sdk/_generated_api_client/models/embedding_evaluation_response.py +0 -158
- orca_sdk/_generated_api_client/models/embedding_evaluation_result.py +0 -86
- orca_sdk/_generated_api_client/models/embedding_finetuning_method.py +0 -9
- orca_sdk/_generated_api_client/models/embedding_model_result.py +0 -123
- orca_sdk/_generated_api_client/models/evaluation_response.py +0 -153
- orca_sdk/_generated_api_client/models/evaluation_response_classification_metrics.py +0 -140
- orca_sdk/_generated_api_client/models/evaluation_response_regression_metrics.py +0 -140
- orca_sdk/_generated_api_client/models/feedback_metrics.py +0 -85
- orca_sdk/_generated_api_client/models/feedback_type.py +0 -9
- orca_sdk/_generated_api_client/models/filter_item.py +0 -239
- orca_sdk/_generated_api_client/models/filter_item_field_type_0_item.py +0 -17
- orca_sdk/_generated_api_client/models/filter_item_field_type_1_item_type_0.py +0 -8
- orca_sdk/_generated_api_client/models/filter_item_field_type_2_item_type_0.py +0 -8
- orca_sdk/_generated_api_client/models/filter_item_field_type_2_item_type_1.py +0 -22
- orca_sdk/_generated_api_client/models/filter_item_op.py +0 -16
- orca_sdk/_generated_api_client/models/finetune_embedding_model_request.py +0 -259
- orca_sdk/_generated_api_client/models/finetune_embedding_model_request_training_args.py +0 -66
- orca_sdk/_generated_api_client/models/finetuned_embedding_model_metadata.py +0 -166
- orca_sdk/_generated_api_client/models/get_memories_request.py +0 -70
- orca_sdk/_generated_api_client/models/http_validation_error.py +0 -86
- orca_sdk/_generated_api_client/models/internal_server_error_response.py +0 -81
- orca_sdk/_generated_api_client/models/internal_server_error_response_status_code.py +0 -8
- orca_sdk/_generated_api_client/models/label_class_metrics.py +0 -108
- orca_sdk/_generated_api_client/models/label_prediction_memory_lookup.py +0 -210
- orca_sdk/_generated_api_client/models/label_prediction_memory_lookup_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/label_prediction_with_memories_and_feedback.py +0 -288
- orca_sdk/_generated_api_client/models/labeled_memory.py +0 -186
- orca_sdk/_generated_api_client/models/labeled_memory_insert.py +0 -128
- orca_sdk/_generated_api_client/models/labeled_memory_insert_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_lookup.py +0 -194
- orca_sdk/_generated_api_client/models/labeled_memory_lookup_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_update.py +0 -171
- orca_sdk/_generated_api_client/models/labeled_memory_update_metadata_type_0.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_with_feedback_metrics.py +0 -207
- orca_sdk/_generated_api_client/models/labeled_memory_with_feedback_metrics_feedback_metrics.py +0 -68
- orca_sdk/_generated_api_client/models/labeled_memory_with_feedback_metrics_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/list_memories_request.py +0 -104
- orca_sdk/_generated_api_client/models/list_predictions_request.py +0 -319
- orca_sdk/_generated_api_client/models/lookup_request.py +0 -101
- orca_sdk/_generated_api_client/models/lookup_score_metrics.py +0 -94
- orca_sdk/_generated_api_client/models/memory_metrics.py +0 -263
- orca_sdk/_generated_api_client/models/memory_type.py +0 -9
- orca_sdk/_generated_api_client/models/memoryset_analysis_configs.py +0 -245
- orca_sdk/_generated_api_client/models/memoryset_analysis_request.py +0 -105
- orca_sdk/_generated_api_client/models/memoryset_analysis_response.py +0 -182
- orca_sdk/_generated_api_client/models/memoryset_class_patterns_analysis_config.py +0 -79
- orca_sdk/_generated_api_client/models/memoryset_class_patterns_metrics.py +0 -138
- orca_sdk/_generated_api_client/models/memoryset_cluster_analysis_config.py +0 -202
- orca_sdk/_generated_api_client/models/memoryset_cluster_analysis_config_clustering_method.py +0 -9
- orca_sdk/_generated_api_client/models/memoryset_cluster_analysis_config_partitioning_method.py +0 -10
- orca_sdk/_generated_api_client/models/memoryset_cluster_metrics.py +0 -100
- orca_sdk/_generated_api_client/models/memoryset_duplicate_analysis_config.py +0 -70
- orca_sdk/_generated_api_client/models/memoryset_duplicate_metrics.py +0 -70
- orca_sdk/_generated_api_client/models/memoryset_label_analysis_config.py +0 -70
- orca_sdk/_generated_api_client/models/memoryset_label_metrics.py +0 -116
- orca_sdk/_generated_api_client/models/memoryset_metadata.py +0 -333
- orca_sdk/_generated_api_client/models/memoryset_metadata_index_params.py +0 -55
- orca_sdk/_generated_api_client/models/memoryset_metadata_index_type.py +0 -13
- orca_sdk/_generated_api_client/models/memoryset_metrics.py +0 -265
- orca_sdk/_generated_api_client/models/memoryset_neighbor_analysis_config.py +0 -83
- orca_sdk/_generated_api_client/models/memoryset_neighbor_metrics.py +0 -76
- orca_sdk/_generated_api_client/models/memoryset_neighbor_metrics_lookup_score_metrics.py +0 -68
- orca_sdk/_generated_api_client/models/memoryset_projection_analysis_config.py +0 -79
- orca_sdk/_generated_api_client/models/memoryset_projection_metrics.py +0 -55
- orca_sdk/_generated_api_client/models/memoryset_update.py +0 -121
- orca_sdk/_generated_api_client/models/not_found_error_response.py +0 -99
- orca_sdk/_generated_api_client/models/not_found_error_response_resource_type_0.py +0 -23
- orca_sdk/_generated_api_client/models/not_found_error_response_status_code.py +0 -8
- orca_sdk/_generated_api_client/models/org_plan.py +0 -99
- orca_sdk/_generated_api_client/models/org_plan_tier.py +0 -11
- orca_sdk/_generated_api_client/models/paginated_task.py +0 -108
- orca_sdk/_generated_api_client/models/paginated_union_labeled_memory_with_feedback_metrics_scored_memory_with_feedback_metrics.py +0 -135
- orca_sdk/_generated_api_client/models/pr_curve.py +0 -86
- orca_sdk/_generated_api_client/models/prediction_feedback.py +0 -157
- orca_sdk/_generated_api_client/models/prediction_feedback_category.py +0 -115
- orca_sdk/_generated_api_client/models/prediction_feedback_request.py +0 -122
- orca_sdk/_generated_api_client/models/prediction_feedback_result.py +0 -102
- orca_sdk/_generated_api_client/models/prediction_sort_item_item_type_0.py +0 -10
- orca_sdk/_generated_api_client/models/prediction_sort_item_item_type_1.py +0 -9
- orca_sdk/_generated_api_client/models/predictive_model_update.py +0 -111
- orca_sdk/_generated_api_client/models/pretrained_embedding_model_metadata.py +0 -115
- orca_sdk/_generated_api_client/models/pretrained_embedding_model_name.py +0 -17
- orca_sdk/_generated_api_client/models/rac_head_type.py +0 -11
- orca_sdk/_generated_api_client/models/rar_head_type.py +0 -8
- orca_sdk/_generated_api_client/models/regression_evaluation_request.py +0 -148
- orca_sdk/_generated_api_client/models/regression_metrics.py +0 -172
- orca_sdk/_generated_api_client/models/regression_model_metadata.py +0 -191
- orca_sdk/_generated_api_client/models/regression_prediction_request.py +0 -195
- orca_sdk/_generated_api_client/models/roc_curve.py +0 -86
- orca_sdk/_generated_api_client/models/score_prediction_memory_lookup.py +0 -196
- orca_sdk/_generated_api_client/models/score_prediction_memory_lookup_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/score_prediction_with_memories_and_feedback.py +0 -252
- orca_sdk/_generated_api_client/models/scored_memory.py +0 -172
- orca_sdk/_generated_api_client/models/scored_memory_insert.py +0 -128
- orca_sdk/_generated_api_client/models/scored_memory_insert_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_lookup.py +0 -180
- orca_sdk/_generated_api_client/models/scored_memory_lookup_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_update.py +0 -171
- orca_sdk/_generated_api_client/models/scored_memory_update_metadata_type_0.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics.py +0 -193
- orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics_feedback_metrics.py +0 -68
- orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics_metadata.py +0 -68
- orca_sdk/_generated_api_client/models/service_unavailable_error_response.py +0 -81
- orca_sdk/_generated_api_client/models/service_unavailable_error_response_status_code.py +0 -8
- orca_sdk/_generated_api_client/models/task.py +0 -198
- orca_sdk/_generated_api_client/models/task_status.py +0 -14
- orca_sdk/_generated_api_client/models/task_status_info.py +0 -133
- orca_sdk/_generated_api_client/models/telemetry_field_type_0_item_type_0.py +0 -8
- orca_sdk/_generated_api_client/models/telemetry_field_type_0_item_type_2.py +0 -9
- orca_sdk/_generated_api_client/models/telemetry_field_type_1_item_type_0.py +0 -8
- orca_sdk/_generated_api_client/models/telemetry_field_type_1_item_type_1.py +0 -8
- orca_sdk/_generated_api_client/models/telemetry_filter_item.py +0 -217
- orca_sdk/_generated_api_client/models/telemetry_filter_item_op.py +0 -15
- orca_sdk/_generated_api_client/models/telemetry_memories_request.py +0 -181
- orca_sdk/_generated_api_client/models/telemetry_sort_options.py +0 -185
- orca_sdk/_generated_api_client/models/telemetry_sort_options_direction.py +0 -9
- orca_sdk/_generated_api_client/models/unauthenticated_error_response.py +0 -73
- orca_sdk/_generated_api_client/models/unauthenticated_error_response_status_code.py +0 -8
- orca_sdk/_generated_api_client/models/unauthorized_error_response.py +0 -81
- orca_sdk/_generated_api_client/models/unauthorized_error_response_status_code.py +0 -8
- orca_sdk/_generated_api_client/models/update_org_plan_request.py +0 -73
- orca_sdk/_generated_api_client/models/update_org_plan_request_tier.py +0 -11
- orca_sdk/_generated_api_client/models/update_prediction_request.py +0 -133
- orca_sdk/_generated_api_client/models/validation_error.py +0 -99
- orca_sdk/_generated_api_client/py.typed +0 -1
- orca_sdk/_generated_api_client/types.py +0 -56
- orca_sdk-0.0.97.dist-info/RECORD +0 -309
- {orca_sdk-0.0.97.dist-info → orca_sdk-0.0.100.dist-info}/WHEEL +0 -0
orca_sdk/conftest.py
CHANGED
|
@@ -8,6 +8,7 @@ from datasets import ClassLabel, Dataset, Features, Value
|
|
|
8
8
|
|
|
9
9
|
from ._utils.auth import _create_api_key, _delete_org
|
|
10
10
|
from .classification_model import ClassificationModel
|
|
11
|
+
from .client import orca_api
|
|
11
12
|
from .credentials import OrcaCredentials
|
|
12
13
|
from .datasource import Datasource
|
|
13
14
|
from .embedding_model import PretrainedEmbeddingModel
|
|
@@ -43,6 +44,13 @@ def _create_org_id():
|
|
|
43
44
|
return "10e50000-0000-4000-a000-" + str(uuid4())[24:]
|
|
44
45
|
|
|
45
46
|
|
|
47
|
+
@pytest.fixture()
|
|
48
|
+
def base_url_reset():
|
|
49
|
+
original_base_url = orca_api.base_url
|
|
50
|
+
yield
|
|
51
|
+
orca_api.base_url = original_base_url
|
|
52
|
+
|
|
53
|
+
|
|
46
54
|
@pytest.fixture(scope="session")
|
|
47
55
|
def org_id():
|
|
48
56
|
return _create_org_id()
|
|
@@ -192,6 +200,8 @@ def writable_memoryset(datasource: Datasource, api_key: str) -> Generator[Labele
|
|
|
192
200
|
OrcaCredentials.set_api_key(api_key, check_validity=False)
|
|
193
201
|
|
|
194
202
|
if LabeledMemoryset.exists("test_writable_memoryset"):
|
|
203
|
+
memoryset.refresh()
|
|
204
|
+
|
|
195
205
|
memory_ids = [memoryset[i].memory_id for i in range(len(memoryset))]
|
|
196
206
|
|
|
197
207
|
if memory_ids:
|
orca_sdk/credentials.py
CHANGED
|
@@ -1,17 +1,10 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from typing import Literal, NamedTuple
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
list_api_keys,
|
|
9
|
-
)
|
|
10
|
-
from ._generated_api_client.client import get_base_url, get_headers, set_headers
|
|
11
|
-
from ._generated_api_client.models import (
|
|
12
|
-
CreateApiKeyRequest,
|
|
13
|
-
CreateApiKeyRequestScopeItem,
|
|
14
|
-
)
|
|
4
|
+
import httpx
|
|
5
|
+
from httpx import ConnectError, Headers
|
|
6
|
+
|
|
7
|
+
from .client import orca_api
|
|
15
8
|
|
|
16
9
|
Scope = Literal["ADMINISTER", "PREDICT"]
|
|
17
10
|
"""
|
|
@@ -46,7 +39,7 @@ class OrcaCredentials:
|
|
|
46
39
|
"""
|
|
47
40
|
Get the Orca API base URL that is currently being used
|
|
48
41
|
"""
|
|
49
|
-
return
|
|
42
|
+
return str(orca_api.base_url)
|
|
50
43
|
|
|
51
44
|
@staticmethod
|
|
52
45
|
def list_api_keys() -> list[ApiKeyInfo]:
|
|
@@ -57,8 +50,12 @@ class OrcaCredentials:
|
|
|
57
50
|
A list of named tuples, with the name and creation date time of the API key
|
|
58
51
|
"""
|
|
59
52
|
return [
|
|
60
|
-
ApiKeyInfo(
|
|
61
|
-
|
|
53
|
+
ApiKeyInfo(
|
|
54
|
+
name=api_key["name"],
|
|
55
|
+
created_at=datetime.fromisoformat(api_key["created_at"]),
|
|
56
|
+
scopes=set(api_key["scope"]),
|
|
57
|
+
)
|
|
58
|
+
for api_key in orca_api.GET("/auth/api_key")
|
|
62
59
|
]
|
|
63
60
|
|
|
64
61
|
@staticmethod
|
|
@@ -70,7 +67,7 @@ class OrcaCredentials:
|
|
|
70
67
|
True if you are authenticated, False otherwise
|
|
71
68
|
"""
|
|
72
69
|
try:
|
|
73
|
-
return
|
|
70
|
+
return orca_api.GET("/auth")
|
|
74
71
|
except ValueError as e:
|
|
75
72
|
if "Invalid API key" in str(e):
|
|
76
73
|
return False
|
|
@@ -88,10 +85,11 @@ class OrcaCredentials:
|
|
|
88
85
|
Returns:
|
|
89
86
|
The secret value of the API key. Make sure to save this value as it will not be shown again.
|
|
90
87
|
"""
|
|
91
|
-
res =
|
|
92
|
-
|
|
88
|
+
res = orca_api.POST(
|
|
89
|
+
"/auth/api_key",
|
|
90
|
+
json={"name": name, "scope": list(scopes)},
|
|
93
91
|
)
|
|
94
|
-
return res
|
|
92
|
+
return res["api_key"]
|
|
95
93
|
|
|
96
94
|
@staticmethod
|
|
97
95
|
def revoke_api_key(name: str) -> None:
|
|
@@ -104,7 +102,7 @@ class OrcaCredentials:
|
|
|
104
102
|
Raises:
|
|
105
103
|
ValueError: if the API key is not found
|
|
106
104
|
"""
|
|
107
|
-
|
|
105
|
+
orca_api.DELETE("/auth/api_key/{name_or_id}", params={"name_or_id": name})
|
|
108
106
|
|
|
109
107
|
@staticmethod
|
|
110
108
|
def set_headers(headers: dict[str, str]):
|
|
@@ -118,7 +116,7 @@ class OrcaCredentials:
|
|
|
118
116
|
New keys are merged into the existing headers, this will overwrite headers with the
|
|
119
117
|
same name, but leave other headers untouched.
|
|
120
118
|
"""
|
|
121
|
-
|
|
119
|
+
orca_api.headers.update(Headers(headers))
|
|
122
120
|
|
|
123
121
|
@staticmethod
|
|
124
122
|
def set_api_key(api_key: str, check_validity: bool = True):
|
|
@@ -137,4 +135,44 @@ class OrcaCredentials:
|
|
|
137
135
|
"""
|
|
138
136
|
OrcaCredentials.set_headers({"Api-Key": api_key})
|
|
139
137
|
if check_validity:
|
|
140
|
-
|
|
138
|
+
orca_api.GET("/auth")
|
|
139
|
+
|
|
140
|
+
@staticmethod
|
|
141
|
+
def set_base_url(base_url: str, check_validity: bool = True):
|
|
142
|
+
"""
|
|
143
|
+
Set the base URL for the Orca API
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
base_url: The base URL to set
|
|
147
|
+
check_validity: Whether to check if there is an API running at the given base URL
|
|
148
|
+
|
|
149
|
+
Raises:
|
|
150
|
+
ValueError: if there is no healthy API running at the given base URL and `check_validity` is True
|
|
151
|
+
"""
|
|
152
|
+
# check if the base url is reachable before setting it
|
|
153
|
+
if check_validity:
|
|
154
|
+
try:
|
|
155
|
+
httpx.get(base_url, timeout=1)
|
|
156
|
+
except ConnectError as e:
|
|
157
|
+
raise ValueError(f"No API found at {base_url}") from e
|
|
158
|
+
|
|
159
|
+
orca_api.base_url = base_url
|
|
160
|
+
|
|
161
|
+
# check if the api passes the health check
|
|
162
|
+
if check_validity:
|
|
163
|
+
orca_api.GET("/")
|
|
164
|
+
|
|
165
|
+
@staticmethod
|
|
166
|
+
def is_healthy() -> bool:
|
|
167
|
+
"""
|
|
168
|
+
Check whether the API is healthy
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
True if the API is healthy, False otherwise
|
|
172
|
+
"""
|
|
173
|
+
try:
|
|
174
|
+
orca_api.GET("/")
|
|
175
|
+
orca_api.GET("/gpu/")
|
|
176
|
+
except Exception:
|
|
177
|
+
return False
|
|
178
|
+
return True
|
orca_sdk/credentials_test.py
CHANGED
|
@@ -2,6 +2,7 @@ from uuid import uuid4
|
|
|
2
2
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
|
+
from .client import orca_api
|
|
5
6
|
from .credentials import OrcaCredentials
|
|
6
7
|
|
|
7
8
|
|
|
@@ -35,3 +36,22 @@ def test_set_invalid_api_key(api_key):
|
|
|
35
36
|
with pytest.raises(ValueError, match="Invalid API key"):
|
|
36
37
|
OrcaCredentials.set_api_key(str(uuid4()))
|
|
37
38
|
assert not OrcaCredentials.is_authenticated()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_set_base_url(base_url_reset):
|
|
42
|
+
OrcaCredentials.set_base_url("http://api.orcadb.ai")
|
|
43
|
+
assert str(orca_api.base_url) == "http://api.orcadb.ai"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_set_invalid_base_url():
|
|
47
|
+
with pytest.raises(ValueError, match="No API found at http://localhost:1582"):
|
|
48
|
+
OrcaCredentials.set_base_url("http://localhost:1582")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_is_healthy():
|
|
52
|
+
assert OrcaCredentials.is_healthy()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_is_healthy_false(base_url_reset):
|
|
56
|
+
OrcaCredentials.set_base_url("http://localhost:1582", check_validity=False)
|
|
57
|
+
assert not OrcaCredentials.is_healthy()
|
orca_sdk/datasource.py
CHANGED
|
@@ -1,40 +1,28 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
import os
|
|
4
5
|
import tempfile
|
|
5
6
|
import zipfile
|
|
6
7
|
from datetime import datetime
|
|
7
8
|
from io import BytesIO
|
|
8
9
|
from os import PathLike
|
|
9
10
|
from pathlib import Path
|
|
10
|
-
from typing import Union
|
|
11
|
+
from typing import Literal, Union, cast
|
|
11
12
|
|
|
12
13
|
import pandas as pd
|
|
13
14
|
import pyarrow as pa
|
|
14
15
|
from datasets import Dataset, DatasetDict
|
|
16
|
+
from httpx._types import FileTypes # type: ignore
|
|
15
17
|
from pyarrow import parquet
|
|
16
18
|
from torch.utils.data import DataLoader as TorchDataLoader
|
|
17
19
|
from torch.utils.data import Dataset as TorchDataset
|
|
18
20
|
from tqdm.auto import tqdm
|
|
19
21
|
|
|
20
|
-
from ._generated_api_client.api import (
|
|
21
|
-
create_datasource_from_content,
|
|
22
|
-
delete_datasource,
|
|
23
|
-
get_datasource,
|
|
24
|
-
list_datasources,
|
|
25
|
-
)
|
|
26
|
-
from ._generated_api_client.api.datasource.create_datasource_from_files_datasource_upload_post import (
|
|
27
|
-
_parse_response as parse_create_response,
|
|
28
|
-
)
|
|
29
|
-
from ._generated_api_client.client import get_client
|
|
30
|
-
from ._generated_api_client.models import (
|
|
31
|
-
ColumnType,
|
|
32
|
-
CreateDatasourceFromContentRequest,
|
|
33
|
-
DatasourceMetadata,
|
|
34
|
-
)
|
|
35
22
|
from ._utils.common import CreateMode, DropMode
|
|
36
23
|
from ._utils.data_parsing import hf_dataset_from_torch
|
|
37
24
|
from ._utils.tqdm_file_reader import TqdmFileReader
|
|
25
|
+
from .client import DatasourceMetadata, orca_api
|
|
38
26
|
|
|
39
27
|
|
|
40
28
|
def _upload_files_to_datasource(
|
|
@@ -55,8 +43,7 @@ def _upload_files_to_datasource(
|
|
|
55
43
|
Returns:
|
|
56
44
|
Metadata for the created datasource
|
|
57
45
|
"""
|
|
58
|
-
|
|
59
|
-
files = []
|
|
46
|
+
files: list[tuple[Literal["files"], FileTypes]] = []
|
|
60
47
|
|
|
61
48
|
# Calculate total size for all files
|
|
62
49
|
total_size = sum(file_path.stat().st_size for file_path in file_paths)
|
|
@@ -65,16 +52,13 @@ def _upload_files_to_datasource(
|
|
|
65
52
|
for file_path in file_paths:
|
|
66
53
|
buffered_reader = open(file_path, "rb")
|
|
67
54
|
tqdm_reader = TqdmFileReader(buffered_reader, pbar)
|
|
68
|
-
files.append(("files", (file_path.name, tqdm_reader)))
|
|
55
|
+
files.append(("files", (file_path.name, cast(bytes, tqdm_reader))))
|
|
69
56
|
|
|
70
57
|
# Use manual HTTP request for file uploads
|
|
71
|
-
metadata =
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
files=files,
|
|
76
|
-
data={"name": name, "description": description},
|
|
77
|
-
)
|
|
58
|
+
metadata = orca_api.POST(
|
|
59
|
+
"/datasource/upload",
|
|
60
|
+
files=files,
|
|
61
|
+
data={"name": name, "description": description},
|
|
78
62
|
)
|
|
79
63
|
|
|
80
64
|
return metadata
|
|
@@ -129,19 +113,19 @@ class Datasource:
|
|
|
129
113
|
|
|
130
114
|
def __init__(self, metadata: DatasourceMetadata):
|
|
131
115
|
# for internal use only, do not document
|
|
132
|
-
self.id = metadata
|
|
133
|
-
self.name = metadata
|
|
134
|
-
self.length = metadata
|
|
135
|
-
self.created_at = metadata
|
|
136
|
-
self.updated_at = metadata
|
|
137
|
-
self.description = metadata
|
|
116
|
+
self.id = metadata["id"]
|
|
117
|
+
self.name = metadata["name"]
|
|
118
|
+
self.length = metadata["length"]
|
|
119
|
+
self.created_at = datetime.fromisoformat(metadata["created_at"])
|
|
120
|
+
self.updated_at = datetime.fromisoformat(metadata["updated_at"])
|
|
121
|
+
self.description = metadata["description"]
|
|
138
122
|
self.columns = {
|
|
139
|
-
column
|
|
140
|
-
f"enum({', '.join(f'{option!r}' for option in column
|
|
141
|
-
if column
|
|
142
|
-
else "str" if column
|
|
123
|
+
column["name"]: (
|
|
124
|
+
f"enum({', '.join(f'{option!r}' for option in column['enum_options'] or []) if 'enum_options' in column else ''})"
|
|
125
|
+
if column["type"] == "ENUM"
|
|
126
|
+
else "str" if column["type"] == "STRING" else column["type"].lower()
|
|
143
127
|
)
|
|
144
|
-
for column in metadata
|
|
128
|
+
for column in metadata["columns"]
|
|
145
129
|
}
|
|
146
130
|
|
|
147
131
|
def __eq__(self, other) -> bool:
|
|
@@ -158,39 +142,6 @@ class Datasource:
|
|
|
158
142
|
+ "})"
|
|
159
143
|
)
|
|
160
144
|
|
|
161
|
-
def download(self, output_path: str | PathLike) -> None:
|
|
162
|
-
"""
|
|
163
|
-
Download the datasource as a ZIP and extract them to a specified path.
|
|
164
|
-
|
|
165
|
-
Params:
|
|
166
|
-
output_path: The local file path or directory where the downloaded files will be saved.
|
|
167
|
-
|
|
168
|
-
Returns:
|
|
169
|
-
None
|
|
170
|
-
|
|
171
|
-
Raises:
|
|
172
|
-
RuntimeError: If the download fails.
|
|
173
|
-
"""
|
|
174
|
-
|
|
175
|
-
output_path = Path(output_path)
|
|
176
|
-
client = get_client().get_httpx_client()
|
|
177
|
-
url = f"/datasource/{self.id}/download"
|
|
178
|
-
response = client.get(url)
|
|
179
|
-
if response.status_code == 404:
|
|
180
|
-
raise LookupError(f"Datasource {self.id} not found")
|
|
181
|
-
if response.status_code != 200:
|
|
182
|
-
raise RuntimeError(f"Failed to download datasource: {response.status_code} {response.text}")
|
|
183
|
-
|
|
184
|
-
with tempfile.NamedTemporaryFile(suffix=".zip") as tmp_zip:
|
|
185
|
-
tmp_zip.write(response.content)
|
|
186
|
-
tmp_zip.flush()
|
|
187
|
-
with zipfile.ZipFile(tmp_zip.name, "r") as zf:
|
|
188
|
-
output_path.mkdir(parents=True, exist_ok=True)
|
|
189
|
-
for file in zf.namelist():
|
|
190
|
-
out_file = output_path / Path(file).name
|
|
191
|
-
with zf.open(file) as af:
|
|
192
|
-
out_file.write_bytes(af.read())
|
|
193
|
-
|
|
194
145
|
@classmethod
|
|
195
146
|
def from_hf_dataset(
|
|
196
147
|
cls, name: str, dataset: Dataset, if_exists: CreateMode = "error", description: str | None = None
|
|
@@ -317,10 +268,10 @@ class Datasource:
|
|
|
317
268
|
if existing is not None:
|
|
318
269
|
return existing
|
|
319
270
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
271
|
+
metadata = orca_api.POST(
|
|
272
|
+
"/datasource",
|
|
273
|
+
json={"name": name, "description": description, "content": data},
|
|
274
|
+
)
|
|
324
275
|
return cls(metadata=metadata)
|
|
325
276
|
|
|
326
277
|
@classmethod
|
|
@@ -351,10 +302,10 @@ class Datasource:
|
|
|
351
302
|
if existing is not None:
|
|
352
303
|
return existing
|
|
353
304
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
305
|
+
metadata = orca_api.POST(
|
|
306
|
+
"/datasource",
|
|
307
|
+
json={"name": name, "description": description, "content": data},
|
|
308
|
+
)
|
|
358
309
|
return cls(metadata=metadata)
|
|
359
310
|
|
|
360
311
|
@classmethod
|
|
@@ -410,16 +361,10 @@ class Datasource:
|
|
|
410
361
|
parquet.write_table(pyarrow_table, buffer)
|
|
411
362
|
parquet_bytes = buffer.getvalue()
|
|
412
363
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
response=client.get_httpx_client().request(
|
|
418
|
-
method="post",
|
|
419
|
-
url="/datasource/upload",
|
|
420
|
-
files=[("files", ("data.parquet", parquet_bytes))],
|
|
421
|
-
data={"name": name, "description": description},
|
|
422
|
-
)
|
|
364
|
+
metadata = orca_api.POST(
|
|
365
|
+
"/datasource/upload",
|
|
366
|
+
files=[("files", ("data.parquet", parquet_bytes))],
|
|
367
|
+
data={"name": name, "description": description},
|
|
423
368
|
)
|
|
424
369
|
|
|
425
370
|
return cls(metadata=metadata)
|
|
@@ -471,12 +416,12 @@ class Datasource:
|
|
|
471
416
|
return cls(metadata=metadata)
|
|
472
417
|
|
|
473
418
|
@classmethod
|
|
474
|
-
def open(cls,
|
|
419
|
+
def open(cls, name_or_id: str) -> Datasource:
|
|
475
420
|
"""
|
|
476
421
|
Get a handle to a datasource by name or id in the OrcaCloud
|
|
477
422
|
|
|
478
423
|
Params:
|
|
479
|
-
|
|
424
|
+
name_or_id: The name or unique identifier of the datasource to get
|
|
480
425
|
|
|
481
426
|
Returns:
|
|
482
427
|
A handle to the existing datasource in the OrcaCloud
|
|
@@ -484,7 +429,7 @@ class Datasource:
|
|
|
484
429
|
Raises:
|
|
485
430
|
LookupError: If the datasource does not exist
|
|
486
431
|
"""
|
|
487
|
-
return cls(
|
|
432
|
+
return cls(orca_api.GET("/datasource/{name_or_id}", params={"name_or_id": name_or_id}))
|
|
488
433
|
|
|
489
434
|
@classmethod
|
|
490
435
|
def exists(cls, name_or_id: str) -> bool:
|
|
@@ -511,7 +456,7 @@ class Datasource:
|
|
|
511
456
|
Returns:
|
|
512
457
|
A list of all datasource handles in the OrcaCloud
|
|
513
458
|
"""
|
|
514
|
-
return [cls(metadata) for metadata in
|
|
459
|
+
return [cls(metadata) for metadata in orca_api.GET("/datasource")]
|
|
515
460
|
|
|
516
461
|
@classmethod
|
|
517
462
|
def drop(cls, name_or_id: str, if_not_exists: DropMode = "error") -> None:
|
|
@@ -527,7 +472,7 @@ class Datasource:
|
|
|
527
472
|
LookupError: If the datasource does not exist and if_not_exists is `"error"`
|
|
528
473
|
"""
|
|
529
474
|
try:
|
|
530
|
-
|
|
475
|
+
orca_api.DELETE("/datasource/{name_or_id}", params={"name_or_id": name_or_id})
|
|
531
476
|
logging.info(f"Deleted datasource {name_or_id}")
|
|
532
477
|
except LookupError:
|
|
533
478
|
if if_not_exists == "error":
|
|
@@ -535,3 +480,45 @@ class Datasource:
|
|
|
535
480
|
|
|
536
481
|
def __len__(self) -> int:
|
|
537
482
|
return self.length
|
|
483
|
+
|
|
484
|
+
def download(
|
|
485
|
+
self, output_dir: str | PathLike, file_type: Literal["hf_dataset", "json", "csv"] = "hf_dataset"
|
|
486
|
+
) -> None:
|
|
487
|
+
"""
|
|
488
|
+
Download the datasource to a specified path in the specified format type
|
|
489
|
+
|
|
490
|
+
Params:
|
|
491
|
+
output_dir: The local directory where the downloaded file will be saved.
|
|
492
|
+
file_type: The type of file to download.
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
None
|
|
496
|
+
"""
|
|
497
|
+
extension = "zip" if file_type == "hf_dataset" else file_type
|
|
498
|
+
output_path = Path(output_dir) / f"{self.name}.{extension}"
|
|
499
|
+
with open(output_path, "wb") as download_file:
|
|
500
|
+
with orca_api.stream("GET", f"/datasource/{self.id}/download", params={"file_type": file_type}) as response:
|
|
501
|
+
total_chunks = int(response.headers["X-Total-Chunks"]) if "X-Total-Chunks" in response.headers else None
|
|
502
|
+
with tqdm(desc=f"Downloading", total=total_chunks, disable=total_chunks is None) as progress:
|
|
503
|
+
for chunk in response.iter_bytes():
|
|
504
|
+
download_file.write(chunk)
|
|
505
|
+
progress.update(1)
|
|
506
|
+
|
|
507
|
+
# extract the zip file
|
|
508
|
+
if extension == "zip":
|
|
509
|
+
extract_dir = Path(output_dir) / self.name
|
|
510
|
+
with zipfile.ZipFile(output_path, "r") as zip_ref:
|
|
511
|
+
zip_ref.extractall(extract_dir)
|
|
512
|
+
output_path.unlink() # Remove the zip file after extraction
|
|
513
|
+
logging.info(f"Downloaded {extract_dir}")
|
|
514
|
+
else:
|
|
515
|
+
logging.info(f"Downloaded {output_path}")
|
|
516
|
+
|
|
517
|
+
def to_list(self) -> list[dict]:
|
|
518
|
+
"""
|
|
519
|
+
Convert the datasource to a list of dictionaries.
|
|
520
|
+
|
|
521
|
+
Returns:
|
|
522
|
+
A list of dictionaries representation of the datasource.
|
|
523
|
+
"""
|
|
524
|
+
return orca_api.GET("/datasource/{name_or_id}/download", params={"name_or_id": self.id, "file_type": "json"})
|
orca_sdk/datasource_test.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
3
|
import tempfile
|
|
4
|
+
from typing import cast
|
|
3
5
|
from uuid import uuid4
|
|
4
6
|
|
|
7
|
+
import numpy as np
|
|
5
8
|
import pandas as pd
|
|
6
9
|
import pyarrow as pa
|
|
7
10
|
import pytest
|
|
11
|
+
from datasets import Dataset
|
|
8
12
|
|
|
9
13
|
from .datasource import Datasource
|
|
10
14
|
|
|
@@ -99,13 +103,6 @@ def test_drop_datasource_invalid_input():
|
|
|
99
103
|
Datasource.drop("not valid id")
|
|
100
104
|
|
|
101
105
|
|
|
102
|
-
def test_download_datasource(datasource):
|
|
103
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
|
104
|
-
output_path = os.path.join(temp_dir, "datasource.zip")
|
|
105
|
-
datasource.download(output_path)
|
|
106
|
-
assert os.path.exists(output_path)
|
|
107
|
-
|
|
108
|
-
|
|
109
106
|
def test_from_list():
|
|
110
107
|
# Test creating datasource from list of dictionaries
|
|
111
108
|
data = [
|
|
@@ -296,3 +293,40 @@ def test_from_disk_already_exists():
|
|
|
296
293
|
assert datasource2.name == datasource1.name
|
|
297
294
|
finally:
|
|
298
295
|
os.unlink(f.name)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def test_to_list(hf_dataset, datasource):
|
|
299
|
+
assert datasource.to_list() == hf_dataset.to_list()
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def test_download_datasource(hf_dataset, datasource):
|
|
303
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
304
|
+
# Dataset download
|
|
305
|
+
datasource.download(temp_dir)
|
|
306
|
+
downloaded_hf_dataset_dir = f"{temp_dir}/{datasource.name}"
|
|
307
|
+
assert os.path.exists(downloaded_hf_dataset_dir)
|
|
308
|
+
assert os.path.isdir(downloaded_hf_dataset_dir)
|
|
309
|
+
assert not os.path.exists(f"{downloaded_hf_dataset_dir}.zip")
|
|
310
|
+
dataset_from_downloaded_hf_dataset = Dataset.load_from_disk(downloaded_hf_dataset_dir)
|
|
311
|
+
assert dataset_from_downloaded_hf_dataset.column_names == hf_dataset.column_names
|
|
312
|
+
assert dataset_from_downloaded_hf_dataset.to_dict() == hf_dataset.to_dict()
|
|
313
|
+
|
|
314
|
+
# JSON download
|
|
315
|
+
datasource.download(temp_dir, file_type="json")
|
|
316
|
+
downloaded_json_file = f"{temp_dir}/{datasource.name}.json"
|
|
317
|
+
assert os.path.exists(downloaded_json_file)
|
|
318
|
+
with open(downloaded_json_file, "r") as f:
|
|
319
|
+
content = json.load(f)
|
|
320
|
+
assert content == hf_dataset.to_list()
|
|
321
|
+
|
|
322
|
+
# CSV download
|
|
323
|
+
datasource.download(temp_dir, file_type="csv")
|
|
324
|
+
downloaded_csv_file = f"{temp_dir}/{datasource.name}.csv"
|
|
325
|
+
assert os.path.exists(downloaded_csv_file)
|
|
326
|
+
dataset_from_downloaded_csv = cast(Dataset, Dataset.from_csv(downloaded_csv_file))
|
|
327
|
+
assert dataset_from_downloaded_csv.column_names == hf_dataset.column_names
|
|
328
|
+
assert (
|
|
329
|
+
dataset_from_downloaded_csv.remove_columns("score").to_dict()
|
|
330
|
+
== hf_dataset.remove_columns("score").to_dict()
|
|
331
|
+
)
|
|
332
|
+
assert np.allclose(dataset_from_downloaded_csv["score"], hf_dataset["score"])
|