easy_ml 0.2.0.pre.rc76 → 0.2.0.pre.rc78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/models_controller.rb +3 -2
  3. data/app/frontend/components/ModelForm.tsx +16 -0
  4. data/app/frontend/components/ScheduleModal.tsx +0 -2
  5. data/app/frontend/components/dataset/PreprocessingConfig.tsx +7 -6
  6. data/app/jobs/easy_ml/application_job.rb +1 -0
  7. data/app/jobs/easy_ml/batch_job.rb +47 -6
  8. data/app/jobs/easy_ml/compute_feature_job.rb +10 -10
  9. data/app/jobs/easy_ml/reaper.rb +14 -10
  10. data/app/jobs/easy_ml/refresh_dataset_job.rb +2 -0
  11. data/app/jobs/easy_ml/sync_datasource_job.rb +1 -0
  12. data/app/models/concerns/easy_ml/dataframe_serialization.rb +1 -17
  13. data/app/models/easy_ml/column/imputers/base.rb +1 -1
  14. data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +1 -5
  15. data/app/models/easy_ml/column/imputers/today.rb +1 -1
  16. data/app/models/easy_ml/column/selector.rb +0 -8
  17. data/app/models/easy_ml/column.rb +1 -1
  18. data/app/models/easy_ml/dataset/learner/base.rb +2 -2
  19. data/app/models/easy_ml/dataset/learner/eager.rb +3 -1
  20. data/app/models/easy_ml/dataset/learner/lazy.rb +4 -1
  21. data/app/models/easy_ml/dataset/refresh_reasons.rb +12 -0
  22. data/app/models/easy_ml/dataset.rb +29 -76
  23. data/app/models/easy_ml/datasource.rb +0 -6
  24. data/app/models/easy_ml/feature.rb +27 -38
  25. data/app/models/easy_ml/model.rb +20 -2
  26. data/app/models/easy_ml/models/xgboost/evals_callback.rb +3 -2
  27. data/app/models/easy_ml/models/xgboost.rb +52 -36
  28. data/app/models/easy_ml/retraining_run.rb +1 -1
  29. data/app/serializers/easy_ml/dataset_serializer.rb +1 -1
  30. data/app/serializers/easy_ml/model_serializer.rb +1 -0
  31. data/lib/easy_ml/core/tuner.rb +7 -4
  32. data/lib/easy_ml/data/dataset_manager/normalizer.rb +0 -0
  33. data/lib/easy_ml/data/dataset_manager/reader/base.rb +80 -0
  34. data/lib/easy_ml/data/dataset_manager/reader/batch.rb +106 -0
  35. data/lib/easy_ml/data/dataset_manager/reader/data_frame.rb +23 -0
  36. data/lib/easy_ml/data/dataset_manager/reader/file.rb +75 -0
  37. data/lib/easy_ml/data/dataset_manager/reader.rb +58 -0
  38. data/lib/easy_ml/data/dataset_manager/writer/append_only.rb +67 -0
  39. data/lib/easy_ml/data/dataset_manager/writer/base.rb +139 -0
  40. data/lib/easy_ml/data/dataset_manager/writer/named.rb +14 -0
  41. data/lib/easy_ml/data/dataset_manager/writer/partitioned/partition_reasons.rb +15 -0
  42. data/lib/easy_ml/data/dataset_manager/writer/partitioned.rb +150 -0
  43. data/lib/easy_ml/data/dataset_manager/writer.rb +80 -0
  44. data/lib/easy_ml/data/dataset_manager.rb +140 -0
  45. data/lib/easy_ml/data/partition/boundaries.rb +60 -0
  46. data/lib/easy_ml/data/partition.rb +7 -0
  47. data/lib/easy_ml/data/polars_column.rb +19 -5
  48. data/lib/easy_ml/data/synced_directory.rb +1 -2
  49. data/lib/easy_ml/data.rb +2 -0
  50. data/lib/easy_ml/engine.rb +16 -14
  51. data/lib/easy_ml/feature_store.rb +21 -188
  52. data/lib/easy_ml/reasons.rb +41 -0
  53. data/lib/easy_ml/support/lockable.rb +1 -5
  54. data/lib/easy_ml/version.rb +1 -1
  55. data/lib/easy_ml.rb +1 -1
  56. data/public/easy_ml/assets/.vite/manifest.json +1 -1
  57. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +522 -0
  58. data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-B1qLZuyu.js.map → Application.tsx-Bbf3mD_b.js.map} +1 -1
  59. metadata +24 -9
  60. data/app/models/easy_ml/datasources/polars_datasource.rb +0 -69
  61. data/lib/easy_ml/data/filter_extensions.rb +0 -31
  62. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js +0 -522
  63. /data/app/models/{lineage_history.rb → easy_ml/lineage_history.rb} +0 -0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc76
4
+ version: 0.2.0.pre.rc78
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-02-13 00:00:00.000000000 Z
11
+ date: 2025-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -198,14 +198,14 @@ dependencies:
198
198
  requirements:
199
199
  - - "~>"
200
200
  - !ruby/object:Gem::Version
201
- version: 0.16.0
201
+ version: 0.17.0
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
206
  - - "~>"
207
207
  - !ruby/object:Gem::Version
208
- version: 0.16.0
208
+ version: 0.17.0
209
209
  - !ruby/object:Gem::Dependency
210
210
  name: pycall
211
211
  requirement: !ruby/object:Gem::Requirement
@@ -604,12 +604,12 @@ files:
604
604
  - app/models/easy_ml/dataset/learner/lazy/query.rb
605
605
  - app/models/easy_ml/dataset/learner/lazy/string.rb
606
606
  - app/models/easy_ml/dataset/learner/query.rb
607
+ - app/models/easy_ml/dataset/refresh_reasons.rb
607
608
  - app/models/easy_ml/dataset_history.rb
608
609
  - app/models/easy_ml/datasource.rb
609
610
  - app/models/easy_ml/datasource_history.rb
610
611
  - app/models/easy_ml/datasources/base_datasource.rb
611
612
  - app/models/easy_ml/datasources/file_datasource.rb
612
- - app/models/easy_ml/datasources/polars_datasource.rb
613
613
  - app/models/easy_ml/datasources/s3_datasource.rb
614
614
  - app/models/easy_ml/deploy.rb
615
615
  - app/models/easy_ml/event.rb
@@ -631,6 +631,7 @@ files:
631
631
  - app/models/easy_ml/import/retraining_job.rb
632
632
  - app/models/easy_ml/import/splitter.rb
633
633
  - app/models/easy_ml/lineage.rb
634
+ - app/models/easy_ml/lineage_history.rb
634
635
  - app/models/easy_ml/model.rb
635
636
  - app/models/easy_ml/model_file.rb
636
637
  - app/models/easy_ml/model_file_history.rb
@@ -657,7 +658,6 @@ files:
657
658
  - app/models/easy_ml/splitters/random_splitter.rb
658
659
  - app/models/easy_ml/tuner_job.rb
659
660
  - app/models/easy_ml/tuner_run.rb
660
- - app/models/lineage_history.rb
661
661
  - app/serializers/easy_ml/column_serializer.rb
662
662
  - app/serializers/easy_ml/dataset_serializer.rb
663
663
  - app/serializers/easy_ml/datasource_serializer.rb
@@ -705,8 +705,22 @@ files:
705
705
  - lib/easy_ml/core_ext/hash.rb
706
706
  - lib/easy_ml/core_ext/pathname.rb
707
707
  - lib/easy_ml/data.rb
708
+ - lib/easy_ml/data/dataset_manager.rb
709
+ - lib/easy_ml/data/dataset_manager/normalizer.rb
710
+ - lib/easy_ml/data/dataset_manager/reader.rb
711
+ - lib/easy_ml/data/dataset_manager/reader/base.rb
712
+ - lib/easy_ml/data/dataset_manager/reader/batch.rb
713
+ - lib/easy_ml/data/dataset_manager/reader/data_frame.rb
714
+ - lib/easy_ml/data/dataset_manager/reader/file.rb
715
+ - lib/easy_ml/data/dataset_manager/writer.rb
716
+ - lib/easy_ml/data/dataset_manager/writer/append_only.rb
717
+ - lib/easy_ml/data/dataset_manager/writer/base.rb
718
+ - lib/easy_ml/data/dataset_manager/writer/named.rb
719
+ - lib/easy_ml/data/dataset_manager/writer/partitioned.rb
720
+ - lib/easy_ml/data/dataset_manager/writer/partitioned/partition_reasons.rb
708
721
  - lib/easy_ml/data/date_converter.rb
709
- - lib/easy_ml/data/filter_extensions.rb
722
+ - lib/easy_ml/data/partition.rb
723
+ - lib/easy_ml/data/partition/boundaries.rb
710
724
  - lib/easy_ml/data/polars_column.rb
711
725
  - lib/easy_ml/data/polars_in_memory.rb
712
726
  - lib/easy_ml/data/polars_reader.rb
@@ -765,6 +779,7 @@ files:
765
779
  - lib/easy_ml/railtie/templates/migration/remove_evaluator_from_retraining_jobs.rb.tt
766
780
  - lib/easy_ml/railtie/templates/migration/remove_preprocessor_statistics_from_easy_ml_datasets.rb.tt
767
781
  - lib/easy_ml/railtie/templates/migration/update_preprocessing_steps_to_jsonb.rb.tt
782
+ - lib/easy_ml/reasons.rb
768
783
  - lib/easy_ml/support.rb
769
784
  - lib/easy_ml/support/age.rb
770
785
  - lib/easy_ml/support/est.rb
@@ -784,8 +799,8 @@ files:
784
799
  - public/easy_ml/assets/.vite/manifest-assets.json
785
800
  - public/easy_ml/assets/.vite/manifest.json
786
801
  - public/easy_ml/assets/assets/Application-nnn_XLuL.css
787
- - public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js
788
- - public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js.map
802
+ - public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js
803
+ - public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js.map
789
804
  homepage: https://github.com/brettshollenberger/easy_ml
790
805
  licenses:
791
806
  - MIT
@@ -1,69 +0,0 @@
1
- module EasyML
2
- module Datasources
3
- class PolarsDatasource < BaseDatasource
4
- include EasyML::DataframeSerialization
5
-
6
- validates :df, presence: true
7
- add_configuration_attributes :df
8
-
9
- def query(**kwargs)
10
- EasyML::Data::PolarsInMemory.query(df, **kwargs)
11
- end
12
-
13
- def in_batches(of: 10_000)
14
- total_rows = df.shape[0]
15
- (0...total_rows).step(of) do |start|
16
- end_index = [start + of, total_rows].min
17
- yield df.slice(start, end_index - start)
18
- end
19
- end
20
-
21
- def all_files
22
- []
23
- end
24
-
25
- def files
26
- []
27
- end
28
-
29
- def last_updated_at
30
- datasource.updated_at
31
- end
32
-
33
- def sha
34
- nil
35
- end
36
-
37
- def data
38
- df
39
- end
40
-
41
- def df
42
- datasource.df
43
- end
44
-
45
- def exists?
46
- df.present?
47
- end
48
-
49
- def error_not_exists
50
- "Must have a dataframe"
51
- end
52
-
53
- def store_in_configuration
54
- return unless df
55
-
56
- datasource.configuration = (datasource.configuration || {}).merge(
57
- "df" => serialize_dataframe(df),
58
- )
59
- end
60
-
61
- def read_from_configuration
62
- return unless datasource.configuration&.key?("df")
63
-
64
- df_data = datasource.configuration["df"]
65
- datasource.df = deserialize_dataframe(df_data)
66
- end
67
- end
68
- end
69
- end
@@ -1,31 +0,0 @@
1
- module EasyML
2
- module Data
3
- module FilterExtensions
4
- def is_primary_key_filter?(primary_key)
5
- return false unless primary_key
6
- primary_key = [primary_key] unless primary_key.is_a?(Array)
7
- # Filter expressions in Polars are represented as strings like:
8
- # [([(col("LOAN_APP_ID")) > (dyn int: 4)]) & ([(col("LOAN_APP_ID")) < (dyn int: 16)])]
9
- expr_str = to_s
10
- return false unless expr_str.include?(primary_key.first)
11
-
12
- # Check for common primary key operations
13
- primary_key_ops = [">", "<", ">=", "<=", "=", "eq", "gt", "lt", "ge", "le"]
14
- primary_key_ops.any? { |op| expr_str.include?(op) }
15
- end
16
-
17
- def extract_primary_key_values
18
- expr_str = to_s
19
- # Extract numeric values from the expression
20
- # This will match both integers and floats
21
- values = expr_str.scan(/(?:dyn int|float): (-?\d+(?:\.\d+)?)/).flatten.map(&:to_f)
22
- values.uniq
23
- end
24
- end
25
- end
26
- end
27
-
28
- # Extend Polars classes with our filter functionality
29
- [Polars::Expr].each do |klass|
30
- klass.include(EasyML::Data::FilterExtensions)
31
- end