easy_ml 0.2.0.pre.rc76 → 0.2.0.pre.rc78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/easy_ml/models_controller.rb +3 -2
- data/app/frontend/components/ModelForm.tsx +16 -0
- data/app/frontend/components/ScheduleModal.tsx +0 -2
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +7 -6
- data/app/jobs/easy_ml/application_job.rb +1 -0
- data/app/jobs/easy_ml/batch_job.rb +47 -6
- data/app/jobs/easy_ml/compute_feature_job.rb +10 -10
- data/app/jobs/easy_ml/reaper.rb +14 -10
- data/app/jobs/easy_ml/refresh_dataset_job.rb +2 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +1 -0
- data/app/models/concerns/easy_ml/dataframe_serialization.rb +1 -17
- data/app/models/easy_ml/column/imputers/base.rb +1 -1
- data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +1 -5
- data/app/models/easy_ml/column/imputers/today.rb +1 -1
- data/app/models/easy_ml/column/selector.rb +0 -8
- data/app/models/easy_ml/column.rb +1 -1
- data/app/models/easy_ml/dataset/learner/base.rb +2 -2
- data/app/models/easy_ml/dataset/learner/eager.rb +3 -1
- data/app/models/easy_ml/dataset/learner/lazy.rb +4 -1
- data/app/models/easy_ml/dataset/refresh_reasons.rb +12 -0
- data/app/models/easy_ml/dataset.rb +29 -76
- data/app/models/easy_ml/datasource.rb +0 -6
- data/app/models/easy_ml/feature.rb +27 -38
- data/app/models/easy_ml/model.rb +20 -2
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +3 -2
- data/app/models/easy_ml/models/xgboost.rb +52 -36
- data/app/models/easy_ml/retraining_run.rb +1 -1
- data/app/serializers/easy_ml/dataset_serializer.rb +1 -1
- data/app/serializers/easy_ml/model_serializer.rb +1 -0
- data/lib/easy_ml/core/tuner.rb +7 -4
- data/lib/easy_ml/data/dataset_manager/normalizer.rb +0 -0
- data/lib/easy_ml/data/dataset_manager/reader/base.rb +80 -0
- data/lib/easy_ml/data/dataset_manager/reader/batch.rb +106 -0
- data/lib/easy_ml/data/dataset_manager/reader/data_frame.rb +23 -0
- data/lib/easy_ml/data/dataset_manager/reader/file.rb +75 -0
- data/lib/easy_ml/data/dataset_manager/reader.rb +58 -0
- data/lib/easy_ml/data/dataset_manager/writer/append_only.rb +67 -0
- data/lib/easy_ml/data/dataset_manager/writer/base.rb +139 -0
- data/lib/easy_ml/data/dataset_manager/writer/named.rb +14 -0
- data/lib/easy_ml/data/dataset_manager/writer/partitioned/partition_reasons.rb +15 -0
- data/lib/easy_ml/data/dataset_manager/writer/partitioned.rb +150 -0
- data/lib/easy_ml/data/dataset_manager/writer.rb +80 -0
- data/lib/easy_ml/data/dataset_manager.rb +140 -0
- data/lib/easy_ml/data/partition/boundaries.rb +60 -0
- data/lib/easy_ml/data/partition.rb +7 -0
- data/lib/easy_ml/data/polars_column.rb +19 -5
- data/lib/easy_ml/data/synced_directory.rb +1 -2
- data/lib/easy_ml/data.rb +2 -0
- data/lib/easy_ml/engine.rb +16 -14
- data/lib/easy_ml/feature_store.rb +21 -188
- data/lib/easy_ml/reasons.rb +41 -0
- data/lib/easy_ml/support/lockable.rb +1 -5
- data/lib/easy_ml/version.rb +1 -1
- data/lib/easy_ml.rb +1 -1
- data/public/easy_ml/assets/.vite/manifest.json +1 -1
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +522 -0
- data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-B1qLZuyu.js.map → Application.tsx-Bbf3mD_b.js.map} +1 -1
- metadata +24 -9
- data/app/models/easy_ml/datasources/polars_datasource.rb +0 -69
- data/lib/easy_ml/data/filter_extensions.rb +0 -31
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js +0 -522
- /data/app/models/{lineage_history.rb → easy_ml/lineage_history.rb} +0 -0
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: easy_ml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.0.pre.
|
4
|
+
version: 0.2.0.pre.rc78
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Shollenberger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -198,14 +198,14 @@ dependencies:
|
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version: 0.
|
201
|
+
version: 0.17.0
|
202
202
|
type: :runtime
|
203
203
|
prerelease: false
|
204
204
|
version_requirements: !ruby/object:Gem::Requirement
|
205
205
|
requirements:
|
206
206
|
- - "~>"
|
207
207
|
- !ruby/object:Gem::Version
|
208
|
-
version: 0.
|
208
|
+
version: 0.17.0
|
209
209
|
- !ruby/object:Gem::Dependency
|
210
210
|
name: pycall
|
211
211
|
requirement: !ruby/object:Gem::Requirement
|
@@ -604,12 +604,12 @@ files:
|
|
604
604
|
- app/models/easy_ml/dataset/learner/lazy/query.rb
|
605
605
|
- app/models/easy_ml/dataset/learner/lazy/string.rb
|
606
606
|
- app/models/easy_ml/dataset/learner/query.rb
|
607
|
+
- app/models/easy_ml/dataset/refresh_reasons.rb
|
607
608
|
- app/models/easy_ml/dataset_history.rb
|
608
609
|
- app/models/easy_ml/datasource.rb
|
609
610
|
- app/models/easy_ml/datasource_history.rb
|
610
611
|
- app/models/easy_ml/datasources/base_datasource.rb
|
611
612
|
- app/models/easy_ml/datasources/file_datasource.rb
|
612
|
-
- app/models/easy_ml/datasources/polars_datasource.rb
|
613
613
|
- app/models/easy_ml/datasources/s3_datasource.rb
|
614
614
|
- app/models/easy_ml/deploy.rb
|
615
615
|
- app/models/easy_ml/event.rb
|
@@ -631,6 +631,7 @@ files:
|
|
631
631
|
- app/models/easy_ml/import/retraining_job.rb
|
632
632
|
- app/models/easy_ml/import/splitter.rb
|
633
633
|
- app/models/easy_ml/lineage.rb
|
634
|
+
- app/models/easy_ml/lineage_history.rb
|
634
635
|
- app/models/easy_ml/model.rb
|
635
636
|
- app/models/easy_ml/model_file.rb
|
636
637
|
- app/models/easy_ml/model_file_history.rb
|
@@ -657,7 +658,6 @@ files:
|
|
657
658
|
- app/models/easy_ml/splitters/random_splitter.rb
|
658
659
|
- app/models/easy_ml/tuner_job.rb
|
659
660
|
- app/models/easy_ml/tuner_run.rb
|
660
|
-
- app/models/lineage_history.rb
|
661
661
|
- app/serializers/easy_ml/column_serializer.rb
|
662
662
|
- app/serializers/easy_ml/dataset_serializer.rb
|
663
663
|
- app/serializers/easy_ml/datasource_serializer.rb
|
@@ -705,8 +705,22 @@ files:
|
|
705
705
|
- lib/easy_ml/core_ext/hash.rb
|
706
706
|
- lib/easy_ml/core_ext/pathname.rb
|
707
707
|
- lib/easy_ml/data.rb
|
708
|
+
- lib/easy_ml/data/dataset_manager.rb
|
709
|
+
- lib/easy_ml/data/dataset_manager/normalizer.rb
|
710
|
+
- lib/easy_ml/data/dataset_manager/reader.rb
|
711
|
+
- lib/easy_ml/data/dataset_manager/reader/base.rb
|
712
|
+
- lib/easy_ml/data/dataset_manager/reader/batch.rb
|
713
|
+
- lib/easy_ml/data/dataset_manager/reader/data_frame.rb
|
714
|
+
- lib/easy_ml/data/dataset_manager/reader/file.rb
|
715
|
+
- lib/easy_ml/data/dataset_manager/writer.rb
|
716
|
+
- lib/easy_ml/data/dataset_manager/writer/append_only.rb
|
717
|
+
- lib/easy_ml/data/dataset_manager/writer/base.rb
|
718
|
+
- lib/easy_ml/data/dataset_manager/writer/named.rb
|
719
|
+
- lib/easy_ml/data/dataset_manager/writer/partitioned.rb
|
720
|
+
- lib/easy_ml/data/dataset_manager/writer/partitioned/partition_reasons.rb
|
708
721
|
- lib/easy_ml/data/date_converter.rb
|
709
|
-
- lib/easy_ml/data/
|
722
|
+
- lib/easy_ml/data/partition.rb
|
723
|
+
- lib/easy_ml/data/partition/boundaries.rb
|
710
724
|
- lib/easy_ml/data/polars_column.rb
|
711
725
|
- lib/easy_ml/data/polars_in_memory.rb
|
712
726
|
- lib/easy_ml/data/polars_reader.rb
|
@@ -765,6 +779,7 @@ files:
|
|
765
779
|
- lib/easy_ml/railtie/templates/migration/remove_evaluator_from_retraining_jobs.rb.tt
|
766
780
|
- lib/easy_ml/railtie/templates/migration/remove_preprocessor_statistics_from_easy_ml_datasets.rb.tt
|
767
781
|
- lib/easy_ml/railtie/templates/migration/update_preprocessing_steps_to_jsonb.rb.tt
|
782
|
+
- lib/easy_ml/reasons.rb
|
768
783
|
- lib/easy_ml/support.rb
|
769
784
|
- lib/easy_ml/support/age.rb
|
770
785
|
- lib/easy_ml/support/est.rb
|
@@ -784,8 +799,8 @@ files:
|
|
784
799
|
- public/easy_ml/assets/.vite/manifest-assets.json
|
785
800
|
- public/easy_ml/assets/.vite/manifest.json
|
786
801
|
- public/easy_ml/assets/assets/Application-nnn_XLuL.css
|
787
|
-
- public/easy_ml/assets/assets/entrypoints/Application.tsx-
|
788
|
-
- public/easy_ml/assets/assets/entrypoints/Application.tsx-
|
802
|
+
- public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js
|
803
|
+
- public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js.map
|
789
804
|
homepage: https://github.com/brettshollenberger/easy_ml
|
790
805
|
licenses:
|
791
806
|
- MIT
|
@@ -1,69 +0,0 @@
|
|
1
|
-
module EasyML
|
2
|
-
module Datasources
|
3
|
-
class PolarsDatasource < BaseDatasource
|
4
|
-
include EasyML::DataframeSerialization
|
5
|
-
|
6
|
-
validates :df, presence: true
|
7
|
-
add_configuration_attributes :df
|
8
|
-
|
9
|
-
def query(**kwargs)
|
10
|
-
EasyML::Data::PolarsInMemory.query(df, **kwargs)
|
11
|
-
end
|
12
|
-
|
13
|
-
def in_batches(of: 10_000)
|
14
|
-
total_rows = df.shape[0]
|
15
|
-
(0...total_rows).step(of) do |start|
|
16
|
-
end_index = [start + of, total_rows].min
|
17
|
-
yield df.slice(start, end_index - start)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def all_files
|
22
|
-
[]
|
23
|
-
end
|
24
|
-
|
25
|
-
def files
|
26
|
-
[]
|
27
|
-
end
|
28
|
-
|
29
|
-
def last_updated_at
|
30
|
-
datasource.updated_at
|
31
|
-
end
|
32
|
-
|
33
|
-
def sha
|
34
|
-
nil
|
35
|
-
end
|
36
|
-
|
37
|
-
def data
|
38
|
-
df
|
39
|
-
end
|
40
|
-
|
41
|
-
def df
|
42
|
-
datasource.df
|
43
|
-
end
|
44
|
-
|
45
|
-
def exists?
|
46
|
-
df.present?
|
47
|
-
end
|
48
|
-
|
49
|
-
def error_not_exists
|
50
|
-
"Must have a dataframe"
|
51
|
-
end
|
52
|
-
|
53
|
-
def store_in_configuration
|
54
|
-
return unless df
|
55
|
-
|
56
|
-
datasource.configuration = (datasource.configuration || {}).merge(
|
57
|
-
"df" => serialize_dataframe(df),
|
58
|
-
)
|
59
|
-
end
|
60
|
-
|
61
|
-
def read_from_configuration
|
62
|
-
return unless datasource.configuration&.key?("df")
|
63
|
-
|
64
|
-
df_data = datasource.configuration["df"]
|
65
|
-
datasource.df = deserialize_dataframe(df_data)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
@@ -1,31 +0,0 @@
|
|
1
|
-
module EasyML
|
2
|
-
module Data
|
3
|
-
module FilterExtensions
|
4
|
-
def is_primary_key_filter?(primary_key)
|
5
|
-
return false unless primary_key
|
6
|
-
primary_key = [primary_key] unless primary_key.is_a?(Array)
|
7
|
-
# Filter expressions in Polars are represented as strings like:
|
8
|
-
# [([(col("LOAN_APP_ID")) > (dyn int: 4)]) & ([(col("LOAN_APP_ID")) < (dyn int: 16)])]
|
9
|
-
expr_str = to_s
|
10
|
-
return false unless expr_str.include?(primary_key.first)
|
11
|
-
|
12
|
-
# Check for common primary key operations
|
13
|
-
primary_key_ops = [">", "<", ">=", "<=", "=", "eq", "gt", "lt", "ge", "le"]
|
14
|
-
primary_key_ops.any? { |op| expr_str.include?(op) }
|
15
|
-
end
|
16
|
-
|
17
|
-
def extract_primary_key_values
|
18
|
-
expr_str = to_s
|
19
|
-
# Extract numeric values from the expression
|
20
|
-
# This will match both integers and floats
|
21
|
-
values = expr_str.scan(/(?:dyn int|float): (-?\d+(?:\.\d+)?)/).flatten.map(&:to_f)
|
22
|
-
values.uniq
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# Extend Polars classes with our filter functionality
|
29
|
-
[Polars::Expr].each do |klass|
|
30
|
-
klass.include(EasyML::Data::FilterExtensions)
|
31
|
-
end
|