easy_ml 0.2.0.pre.rc85 → 0.2.0.pre.rc89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/datasets_controller.rb +18 -2
  3. data/app/controllers/easy_ml/predictions_controller.rb +9 -1
  4. data/app/frontend/components/dataset/PreprocessingConfig.tsx +523 -150
  5. data/app/frontend/pages/DatasetsPage.tsx +0 -1
  6. data/app/frontend/types/dataset.ts +5 -2
  7. data/app/models/easy_ml/column/imputers/base.rb +23 -2
  8. data/app/models/easy_ml/column/imputers/embedding_encoder.rb +18 -0
  9. data/app/models/easy_ml/column/imputers/imputer.rb +1 -0
  10. data/app/models/easy_ml/column/imputers/most_frequent.rb +1 -1
  11. data/app/models/easy_ml/column/imputers/one_hot_encoder.rb +1 -1
  12. data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +1 -1
  13. data/app/models/easy_ml/column/imputers.rb +47 -41
  14. data/app/models/easy_ml/column/selector.rb +2 -2
  15. data/app/models/easy_ml/column.rb +260 -56
  16. data/app/models/easy_ml/column_history.rb +6 -0
  17. data/app/models/easy_ml/column_list.rb +30 -1
  18. data/app/models/easy_ml/dataset/learner/lazy/embedding.rb +10 -0
  19. data/app/models/easy_ml/dataset/learner/lazy/query.rb +2 -0
  20. data/app/models/easy_ml/dataset/learner.rb +11 -0
  21. data/app/models/easy_ml/dataset.rb +6 -19
  22. data/app/models/easy_ml/lineage_history.rb +17 -0
  23. data/app/models/easy_ml/model.rb +11 -1
  24. data/app/models/easy_ml/models/xgboost.rb +37 -7
  25. data/app/models/easy_ml/pca_model.rb +21 -0
  26. data/app/models/easy_ml/prediction.rb +2 -1
  27. data/app/serializers/easy_ml/column_serializer.rb +13 -1
  28. data/config/initializers/inflections.rb +1 -0
  29. data/lib/easy_ml/data/dataset_manager/writer/append_only.rb +6 -8
  30. data/lib/easy_ml/data/dataset_manager/writer/base.rb +15 -2
  31. data/lib/easy_ml/data/dataset_manager/writer/partitioned.rb +0 -1
  32. data/lib/easy_ml/data/dataset_manager/writer.rb +2 -0
  33. data/lib/easy_ml/data/embeddings/compressor.rb +179 -0
  34. data/lib/easy_ml/data/embeddings/embedder.rb +226 -0
  35. data/lib/easy_ml/data/embeddings.rb +61 -0
  36. data/lib/easy_ml/data/polars_column.rb +3 -0
  37. data/lib/easy_ml/data/polars_reader.rb +54 -23
  38. data/lib/easy_ml/data/polars_schema.rb +28 -2
  39. data/lib/easy_ml/data/splits/file_split.rb +7 -2
  40. data/lib/easy_ml/data.rb +1 -0
  41. data/lib/easy_ml/embedding_store.rb +92 -0
  42. data/lib/easy_ml/engine.rb +4 -2
  43. data/lib/easy_ml/predict.rb +42 -20
  44. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +5 -0
  45. data/lib/easy_ml/railtie/templates/migration/add_is_primary_key_to_easy_ml_columns.rb.tt +9 -0
  46. data/lib/easy_ml/railtie/templates/migration/add_metadata_to_easy_ml_predictions.rb.tt +6 -0
  47. data/lib/easy_ml/railtie/templates/migration/add_pca_model_id_to_easy_ml_columns.rb.tt +9 -0
  48. data/lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_dataset_histories.rb.tt +13 -0
  49. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_pca_models.rb.tt +14 -0
  50. data/lib/easy_ml/version.rb +1 -1
  51. data/lib/easy_ml.rb +1 -0
  52. data/public/easy_ml/assets/.vite/manifest.json +2 -2
  53. data/public/easy_ml/assets/assets/Application-DfPoyRr8.css +1 -0
  54. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-KENNRQpC.js +533 -0
  55. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-KENNRQpC.js.map +1 -0
  56. metadata +59 -6
  57. data/lib/tasks/profile.rake +0 -40
  58. data/public/easy_ml/assets/assets/Application-nnn_XLuL.css +0 -1
  59. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-CD8voxfL.js +0 -522
  60. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-CD8voxfL.js.map +0 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 53104f4ab5d52062d772983a09a1d31aecd524a7780b7104538327a346faab5e
4
- data.tar.gz: 023a0bf486a12c23e6a8724ae329e49849e8168ad647a4557d3d3ef2adec1a47
3
+ metadata.gz: a0eb5ce84bdd93da3ea53e97f1b1ceab81a529a9bb076596f4edf7e49349eadf
4
+ data.tar.gz: 5262f39ff5a1236729d28a8fa6715d1b12e6dd5b4319225a6df512493872bba0
5
5
  SHA512:
6
- metadata.gz: fb9e184aea0eff595d296285e5a66a9ad4778a8e48b6a64f39b5628ff92cdd440d06953d6a5dbf63b6b0ca312db6edef717e57fcd2f12fcbea63caaaa586eb67
7
- data.tar.gz: b379bca69fc1817ec29da1f56df6a0c6e121b443849b1d4f6032b14ec86805dd50f629a84989bd0c80bdba691c0d9f51cdf4d80dc69e5bba71502281a840d96c
6
+ metadata.gz: 8e8094ed3309b80e0ee70543667e7af982e0805ba6fe81a62ad2f46297eae4487440a4e6ffdca75f32ef045ef8b25440a8396032cf00f9fd6c191d63fc8c0386
7
+ data.tar.gz: 97d057eb4ffa2acdb319a52de427cc3f3e7c42db8de349d063b36cd91479c68eca10ec150689d11070f0d20f24b620c0ce7a77f3e335420dd8c73612d706d1a0
@@ -73,7 +73,23 @@ module EasyML
73
73
 
74
74
  # Iterate over columns to check and update preprocessing_steps
75
75
  dataset_params[:columns_attributes]&.each do |_, column_attrs|
76
- column_attrs[:preprocessing_steps] = nil if column_attrs.dig(:preprocessing_steps, :training, :method) == "none"
76
+ if column_attrs.dig(:preprocessing_steps, :training, :method) == "none"
77
+ column_attrs[:preprocessing_steps] = nil
78
+ elsif column_attrs.dig(:preprocessing_steps, :training)
79
+ # Ensure encoding is properly set for categorical columns
80
+ training_config = column_attrs.dig(:preprocessing_steps, :training)
81
+ if training_config[:params]
82
+ # Remove old encoding params as they're now part of the encoding field
83
+ training_config[:params].delete(:one_hot)
84
+ training_config[:params].delete(:ordinal_encoding)
85
+ end
86
+
87
+ # Ensure embedding params are present when encoding is embedding
88
+ if training_config[:encoding] == "embedding" && training_config[:params]
89
+ training_config[:params][:llm] ||= "openai"
90
+ training_config[:params][:model] ||= "text-embedding-3-small"
91
+ end
92
+ end
77
93
  end
78
94
 
79
95
  # Handle feature ID assignment for existing features
@@ -165,7 +181,7 @@ module EasyML
165
181
  private
166
182
 
167
183
  def preprocessing_params
168
- [:method, { params: [:constant, :categorical_min, :one_hot, :ordinal_encoding, { clip: %i[min max] }] }]
184
+ [:method, :encoding, { params: [:constant, :categorical_min, :llm, :model, :preset, :dimensions, { clip: %i[min max] }] }]
169
185
  end
170
186
 
171
187
  def dataset_params
@@ -22,8 +22,16 @@ module EasyML
22
22
  return render json: { error: "Missing required fields: #{fields}" }, status: :not_found
23
23
  end
24
24
 
25
- prediction = EasyML::Predict.predict(slug, input)
25
+ type = (params[:type] || :predict).to_sym
26
+ allowed_types = [:predict, :predict_proba]
27
+ unless allowed_types.include?(type)
28
+ return render json: { error: "Invalid type: #{type}" }, status: :not_found
29
+ end
30
+
31
+ prediction = EasyML::Predict.send(type, slug, input)
26
32
 
33
+ render json: { prediction: EasyML::PredictionSerializer.new(prediction).serializable_hash.dig(:data, :attributes) }, status: :ok
34
+ rescue ActiveRecord::RecordNotFound
27
35
  render json: { prediction: EasyML::PredictionSerializer.new(prediction).serializable_hash.dig(:data, :attributes) }, status: :ok
28
36
  rescue ActiveRecord::RecordNotFound
29
37
  render json: { error: "Model not found" }, status: :not_found