easy_ml 0.2.0.pre.rc9 → 0.2.0.pre.rc10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 27e223058d1e119b3f70e39fee2425e50e3d94780fbf9a54ea496e0d713f2492
4
- data.tar.gz: 42b8ffa08fe474412946628bfb7de71d83bccc4195a779db4fafd87002799c42
3
+ metadata.gz: 798cbe6a945c8321a2289622602ff4a0c71b6f3558eff1523a0352107cd41473
4
+ data.tar.gz: 4795d77060af37384555cea4e2a3c2c6c0f4ce9577d3fdc91b73b6aca0f864d3
5
5
  SHA512:
6
- metadata.gz: e448d3ed98f5a3b96893115865d37a3890684de543e3a7746b11c43c184d568d775c3a38c0b06fae34b00aeaa1eb76c012f3becc91e7637aeacf85ec902ec64f
7
- data.tar.gz: 83f5d575fe8ff06d3d57ac287faacf820905313ac16d6e36bf8d54f672363a62043c078ca9121cd8e7d10b2eab08125c53e005474ca17f543e429a6feb96f79e
6
+ metadata.gz: 921c16d1d885c441eee2b5249d4eb4eb8fa988d7ef5768579bc76c1784cef55f4cc40b0ad2c2c81fd28fb66f246a8f67b89637e48a05b4abcf6454ede516383a
7
+ data.tar.gz: '0039a45b69bfe5c7e306305e980784b509e4721e04a2775209a6200155fa407e58fb2efb058fb3dffde725a74c51555493f4337df67c6793a356dba31b286128'
@@ -0,0 +1 @@
1
+ {}
@@ -0,0 +1,17 @@
1
+ {
2
+ "entrypoints/Application.tsx": {
3
+ "file": "assets/Application-GDgZ4vVt.js",
4
+ "name": "entrypoints/Application.tsx",
5
+ "src": "entrypoints/Application.tsx",
6
+ "isEntry": true,
7
+ "css": [
8
+ "assets/Application-tsa3Id3n.css"
9
+ ]
10
+ },
11
+ "entrypoints/application.js": {
12
+ "file": "assets/application-DBfCPIOZ.js",
13
+ "name": "entrypoints/application.js",
14
+ "src": "entrypoints/application.js",
15
+ "isEntry": true
16
+ }
17
+ }
@@ -0,0 +1 @@
1
+ {}
@@ -0,0 +1,11 @@
1
+ {
2
+ "entrypoints/Application.tsx": {
3
+ "file": "assets/Application-GDgZ4vVt.js",
4
+ "name": "entrypoints/Application.tsx",
5
+ "src": "entrypoints/Application.tsx",
6
+ "isEntry": true,
7
+ "css": [
8
+ "assets/Application-tsa3Id3n.css"
9
+ ]
10
+ }
11
+ }
data/bin/build ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env bash
2
+ set -e
3
+
4
+ bin/build_vite
5
+ gem build easy_ml.gemspec
data/bin/build_vite ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env bash
2
+ set -e
3
+
4
+ echo "Building production assets for EasyML gem..."
5
+ # Run the Vite production build
6
+ bundle exec vite build
7
+ echo "Production assets built successfully."
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "easy_ml"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ require "irb"
11
+ IRB.start(__FILE__)
data/bin/rspec ADDED
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rspec' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
12
+ ENV['SPRING_APPLICATION_ROOT'] = './spec/internal'
13
+
14
+ bundle_binstub = File.expand_path("bundle", __dir__)
15
+
16
+ if File.file?(bundle_binstub)
17
+ if File.read(bundle_binstub, 300).include?("This file was generated by Bundler")
18
+ load(bundle_binstub)
19
+ else
20
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
21
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
22
+ end
23
+ end
24
+
25
+ require "rubygems"
26
+ require "bundler/setup"
27
+
28
+ load Gem.bin_path("rspec-core", "rspec")
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/bin/vite ADDED
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'vite' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
12
+
13
+ bundle_binstub = File.expand_path("bundle", __dir__)
14
+
15
+ if File.file?(bundle_binstub)
16
+ if File.read(bundle_binstub, 300).include?("This file was generated by Bundler")
17
+ load(bundle_binstub)
18
+ else
19
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
20
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
21
+ end
22
+ end
23
+
24
+ require "rubygems"
25
+ require "bundler/setup"
26
+
27
+ load Gem.bin_path("vite_ruby", "vite")
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc9"
4
+ VERSION = "0.2.0-rc10"
5
5
 
6
6
  module Version
7
7
  end
@@ -0,0 +1,11 @@
1
+ {
2
+ "entrypoints/Application.tsx": {
3
+ "file": "assets/entrypoints/Application.tsx-GDgZ4vVt.js",
4
+ "name": "entrypoints/Application.tsx",
5
+ "src": "entrypoints/Application.tsx",
6
+ "isEntry": true,
7
+ "css": [
8
+ "assets/Application-tsa3Id3n.css"
9
+ ]
10
+ }
11
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc9
4
+ version: 0.2.0.pre.rc10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-01-07 00:00:00.000000000 Z
11
+ date: 2025-01-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -437,8 +437,8 @@ executables: []
437
437
  extensions: []
438
438
  extra_rdoc_files: []
439
439
  files:
440
- - README.md
441
- - Rakefile
440
+ - app/.vite/manifest-assets.json
441
+ - app/.vite/manifest.json
442
442
  - app/controllers/easy_ml/application_controller.rb
443
443
  - app/controllers/easy_ml/columns_controller.rb
444
444
  - app/controllers/easy_ml/datasets_controller.rb
@@ -447,6 +447,8 @@ files:
447
447
  - app/controllers/easy_ml/models_controller.rb
448
448
  - app/controllers/easy_ml/retraining_runs_controller.rb
449
449
  - app/controllers/easy_ml/settings_controller.rb
450
+ - app/easy_ml/.vite/manifest-assets.json
451
+ - app/easy_ml/.vite/manifest.json
450
452
  - app/easy_ml/assets/Application-GDgZ4vVt.js
451
453
  - app/easy_ml/assets/Application-GDgZ4vVt.js.map
452
454
  - app/easy_ml/assets/Application-tsa3Id3n.css
@@ -578,6 +580,12 @@ files:
578
580
  - app/serializers/easy_ml/retraining_run_serializer.rb
579
581
  - app/serializers/easy_ml/settings_serializer.rb
580
582
  - app/views/layouts/easy_ml/application.html.erb
583
+ - bin/build
584
+ - bin/build_vite
585
+ - bin/console
586
+ - bin/rspec
587
+ - bin/setup
588
+ - bin/vite
581
589
  - config/initializers/resque.rb
582
590
  - config/resque-pool.yml
583
591
  - config/routes.rb
@@ -651,6 +659,8 @@ files:
651
659
  - lib/easy_ml/support/utc.rb
652
660
  - lib/easy_ml/version.rb
653
661
  - lib/tasks/vite.rake
662
+ - public/easy_ml/assets/.vite/manifest-assets.json
663
+ - public/easy_ml/assets/.vite/manifest.json
654
664
  - public/easy_ml/assets/assets/Application-tsa3Id3n.css
655
665
  - public/easy_ml/assets/assets/entrypoints/Application.tsx-GDgZ4vVt.js
656
666
  homepage: https://github.com/brettshollenberger/easy_ml
data/README.md DELETED
@@ -1,497 +0,0 @@
1
- <img src="easy_ml.svg" alt="EasyML Logo" style="width: 310px; height: 300px;">
2
-
3
- # EasyML
4
-
5
- ~~You can't do machine learning in Ruby.~~
6
-
7
- Deploy models in minutes.
8
-
9
- ## What is EasyML?
10
-
11
- EasyML is a **low code/no code**, end-to-end machine learning framework for Ruby on Rails.
12
-
13
- **Get productionized models in minutes.** It takes the guesswork out of:
14
-
15
- - Preprocessing data
16
- - Storing and batch computing features
17
- - Training models
18
- - Metric visualization
19
- - Deployment and versioning
20
- - Evaluating model performance
21
-
22
- With a dead-simple point-and-click interface, EasyML makes it stupid easy to train and deploy.
23
-
24
- Oh yeah, and it's open source!
25
-
26
- ## Features
27
-
28
- - **No Code (if you want)**: EasyML ships as a Rails engine. Just mount it in your app and get started.
29
- - **Opinionated Framework**: Provides a structured approach to data and model management, ensuring best practices are followed.
30
- - **Model Lifecycle On Rails**: Want predictions directly from your Rails app? You can do that.
31
- - **Easily Extensible**: Want a model that's not supported? Send a pull request!
32
-
33
- ## Current and Planned Features
34
-
35
- ### Models Available
36
-
37
- | XGBoost | LightGBM | TensorFlow | PyTorch |
38
- | ------- | -------- | ---------- | ------- |
39
- | ✅ | ❌ | ❌ | ❌ |
40
-
41
- ### Datasources Available
42
-
43
- | S3 | File | Polars | SQL Databases | REST APIs |
44
- | --- | ---- | ------ | ------------- | --------- |
45
- | ✅ | ✅ | ✅ | ❌ | ❌ |
46
-
47
- _Note: Features marked with ❌ are part of the roadmap and are not yet implemented._
48
-
49
- ## Quick Start:
50
-
51
- Building a Production pipeline is as easy as 1,2,3!
52
-
53
- ### 1. Create Your Dataset
54
-
55
- ```ruby
56
- class MyDataset < EasyML::Data::Dataset
57
- datasource :s3, s3_bucket: "my-bucket" # Every time the data changes, we'll pull new data
58
- target "revenue" # What are we trying to predict?
59
- splitter :date, date_column: "created_at" # How should we partition data into training, test, and validation datasets?
60
- transforms DataPipeline # Class that manages data transformation, adding new columns, etc.
61
- preprocessing_steps({
62
- training: {
63
- annual_revenue: { median: true, clip: { min: 0, max: 500_000 } }
64
- }
65
- }) # If annual revenue is missing, use the median value, after clipping the values into the approved list
66
- end
67
- ```
68
-
69
- ### 2. Create a Model
70
-
71
- ```ruby
72
- class MyModel < EasyML::Models::XGBoost
73
- dataset MyDataset
74
- task :regression # Or classification
75
- hyperparameters({
76
- max_depth: 5,
77
- learning_rate: 0.1,
78
- objective: "reg:squarederror"
79
- })
80
- end
81
- ```
82
-
83
- ### 3. Create a Trainer
84
-
85
- ```ruby
86
- class MyTrainer < EasyML::Trainer
87
- model MyModel
88
- evaluator MyMetrics
89
- end
90
-
91
- class MyMetrics
92
- def metric_we_make_money(y_pred, y_true)
93
- return true if model_makes_money?
94
- return false if model_lose_money?
95
- end
96
-
97
- def metric_sales_team_has_enough_leads(y_pred, y_true)
98
- return false if sales_will_be_sitting_on_their_hands?
99
- end
100
- end
101
- ```
102
-
103
- Now you're ready to predict in production!
104
-
105
- ```ruby
106
- MyTrainer.train # Yay, we did it!
107
- MyTrainer.deploy # Let the production hosts know it's live!
108
- MyTrainer.predict(customer_data: "I am worth a lot of money")
109
- # prediction: true!
110
- ```
111
-
112
- ## Mount The Engine
113
-
114
- ```ruby
115
- Rails.application.routes.draw do
116
- mount EasyML::Engine, at: "easy_ml"
117
- end
118
- ```
119
-
120
- ## Data Management
121
-
122
- EasyML provides a comprehensive data management system that handles all preprocessing tasks, including splitting data into train, test, and validation sets, and avoiding data leakage. The primary abstraction for data handling is the `Dataset` class, which ensures data is properly managed and prepared for machine learning tasks.
123
-
124
- ### Preprocessing Features
125
-
126
- EasyML offers a variety of preprocessing features to prepare your data for machine learning models. Here's a complete list of available preprocessing steps and examples of when to use them:
127
-
128
- - **Mean Imputation**: Replace missing values with the mean of the feature. Use this when you want to maintain the average value of the data.
129
-
130
- ```ruby
131
- annual_revenue: {
132
- mean: true
133
- }
134
- ```
135
-
136
- - **Median Imputation**: Replace missing values with the median of the feature. This is useful when you want to maintain the central tendency of the data without being affected by outliers.
137
-
138
- ```ruby
139
- annual_revenue: {
140
- median: true
141
- }
142
- ```
143
-
144
- - **Forward Fill (ffill)**: Fill missing values with the last observed value. Use this for time series data where the last known value is a reasonable estimate for missing values.
145
-
146
- ```ruby
147
- created_date: {
148
- ffill: true
149
- }
150
- ```
151
-
152
- - **Most Frequent Imputation**: Replace missing values with the most frequently occurring value. This is useful for categorical data where the mode is a reasonable estimate for missing values.
153
-
154
- ```ruby
155
- loan_purpose: {
156
- most_frequent: true
157
- }
158
- ```
159
-
160
- - **Constant Imputation**: Replace missing values with a constant value. Use this when you have a specific value that should be used for missing data.
161
-
162
- ```ruby
163
- loan_purpose: {
164
- constant: { fill_value: 'unknown' }
165
- }
166
- ```
167
-
168
- - **Today Imputation**: Fill missing date values with the current date. Use this for features that should default to the current date.
169
-
170
- ```ruby
171
- created_date: {
172
- today: true
173
- }
174
- ```
175
-
176
- - **One-Hot Encoding**: Convert categorical variables into a set of binary variables. Use this when you have categorical data that needs to be converted into a numerical format for model training.
177
-
178
- ```ruby
179
- loan_purpose: {
180
- one_hot: true
181
- }
182
- ```
183
-
184
- - **Ordinal Encoding**: Convert categorical variables into integer labels. Use this when you have categorical data that can be ordinally encoded.
185
-
186
- ```ruby
187
- loan_purpose: {
188
- categorical: {
189
- ordinal_encoding: true
190
- }
191
- }
192
- ```
193
-
194
- ### Other Dataset Features
195
-
196
- - **Data Splitting**: Automatically split data into train, test, and validation sets using various strategies, such as date-based splitting.
197
- - **Data Synchronization**: Ensure data is synced from its source, such as S3 or local files.
198
- - **Batch Processing**: Process data in batches to handle large datasets efficiently.
199
- - **Null Handling**: Alert and handle null values in datasets to ensure data quality.
200
-
201
- ## Feature Store
202
-
203
- The Feature Store is a powerful component of EasyML that helps you manage, compute, and serve features for your machine learning models. Here's how to use it effectively:
204
-
205
- ### Setting Up Features
206
-
207
- 1. Create a `features` directory in your application:
208
-
209
- ```bash
210
- mkdir app/features
211
- ```
212
-
213
- 2. Create feature classes in this directory. Each feature should include the `EasyML::Features` module:
214
-
215
- ```ruby
216
- class MyFeature
217
- include EasyML::Features
218
-
219
- def transform(df, feature)
220
- # Your feature transformation logic here
221
- end
222
-
223
- feature name: "My Feature",
224
- description: "Description of what this feature does"
225
- end
226
- ```
227
-
228
- ### Feature Types and Configurations
229
-
230
- #### Simple Transform-Only Features
231
-
232
- For features that can be computed using only the input columns:
233
-
234
- ```ruby
235
- class DidConvert
236
- include EasyML::Features
237
-
238
- def transform(df, feature)
239
- df.with_column(
240
- (Polars.col("rev") > 0).alias("did_convert")
241
- )
242
- end
243
-
244
- feature name: "did_convert",
245
- description: "Boolean indicating if conversion occurred"
246
- end
247
- ```
248
-
249
- #### Batch Processing Features
250
-
251
- For features that require processing large datasets in chunks:
252
-
253
- ```ruby
254
- class LastConversionTimeFeature
255
- include EasyML::Features
256
-
257
- def batch(reader, feature)
258
- # Efficiently query only the company_id column for batching
259
- # This will create batches of batch_size records (default 1000)
260
- reader.query(select: ["company_id"], unique: true)["company_id"]
261
- end
262
-
263
- def fit(reader, feature, options = {})
264
- batch_start = options.dig(:batch_start)
265
- batch_end = options.dig(:batch_end)
266
-
267
- # More efficient than is_in for continuous ranges
268
- df = reader.query(
269
- filter: Polars.col("company_id").is_between(batch_start, batch_end),
270
- select: ["id", "company_id", "converted_at", "created_at"],
271
- sort: ["company_id", "created_at"]
272
- )
273
-
274
- # For each company, find the last time they converted before each application
275
- #
276
- # This value will be cached in the feature store for fast inference retrieval
277
- df.with_columns([
278
- Polars.col("converted_at")
279
- .shift(1)
280
- .filter(Polars.col("converted_at").is_not_null())
281
- .over("company_id")
282
- .alias("last_conversion_time"),
283
-
284
- # Also compute days since last conversion
285
- (Polars.col("created_at") - Polars.col("last_conversion_time"))
286
- .dt.days()
287
- .alias("days_since_last_conversion")
288
- ])[["id", "last_conversion_time", "days_since_last_conversion"]]
289
- end
290
-
291
- def transform(df, feature)
292
- # Pull the pre-computed values from the feature store
293
- stored_df = feature.query(filter: Polars.col("id").is_in(df["id"]))
294
- return df if stored_df.empty?
295
-
296
- df.join(stored_df, on: "id", how: "left")
297
- end
298
-
299
- feature name: "Last Conversion Time",
300
- description: "Computes the last time a company converted before each application",
301
- batch_size: 1000, # Process 1000 companies at a time
302
- primary_key: "id",
303
- cache_for: 24.hours # Cache feature values for 24 hours after running fit
304
- end
305
- ```
306
-
307
- This example demonstrates several key concepts:
308
-
309
- 1. **Efficient Batching**: The `batch` method uses the reader to lazily query only the necessary column for batching
310
- 1. **Batches Groups Together**: All records with the same `company_id` need to be in the same batch to properly compute the feature, so we create a custom batch (instead of using the primary key `id` column, which would split up companies into different batches)
311
- 1. **Column Selection**: Only selects required columns in the reader query
312
- 1. **Feature Computation**: Computes multiple related features (last conversion time and days since) in a single pass.
313
- 1. **Automatic Feature Store Caching**: The feature store automatically caches feature values returned from the `fit` method
314
-
315
- ### Performance Optimization
316
-
317
- #### Caching During Development
318
-
319
- Use `cache_for` to save processing time during development:
320
-
321
- ```ruby
322
- feature name: "My Feature",
323
- cache_for: 24.hours # After running fit, this feature will be cached for 24 hours (unless new data is read from datasource, like S3)
324
- ```
325
-
326
- #### Early Returns
327
-
328
- Always implement early returns in your transform method to avoid unnecessary reprocessing:
329
-
330
- ```ruby
331
- def transform(df, feature)
332
- return df if df["required_column"].nil?
333
- # Feature computation logic
334
- end
335
- ```
336
-
337
- #### Using Reader vs DataFrame
338
-
339
- - The Polars `reader` is a lazy reader that allows you to query data incrementally.
340
- - If your feature includes a `batch` method or uses the `batch_size` variable, you will receive a reader instead of a dataframe in the `fit` method
341
-
342
- ```ruby
343
- def fit(reader, feature)
344
- df = reader.query(select: ["column1", "column2"])
345
- # Process only needed columns
346
- end
347
- ```
348
-
349
- - If you don't have a `batch` method or don't use the `batch_size` variable, you will receive a dataframe in the `fit` method
350
-
351
- ````ruby
352
- def fit(df, feature)
353
- # process directly on dataframe
354
- end
355
-
356
- - To ensure you get a reader instead of a dataframe, include the `batch` method
357
-
358
- ```ruby
359
- def batch(reader, feature)
360
- reader.query(select: ["column1"])["column1"]
361
- end
362
-
363
- feature name: "My Feature", batch_size: 1_000
364
- ````
365
-
366
- ### Production Considerations
367
-
368
- #### Handling Missing Data
369
-
370
- When processing historical data:
371
-
372
- 1. Check for missing dates:
373
-
374
- ```ruby
375
- def transform(df, feature)
376
- missing_dates = feature.store.missing_dates(start_date, end_date)
377
- return df if missing_dates.empty?
378
-
379
- # Process only missing dates
380
- process_dates(df, missing_dates)
381
- end
382
- ```
383
-
384
- ### Best Practices
385
-
386
- 1. Always specify a `primary_key` to allow the feature store to partition your data
387
- 1. Use `batch/fit` to process large datasets in batches
388
- 1. Use `batch/fit` to allow faster inference feature computation
389
- 1. Use transform-only features when all required columns will be available on the inference dataset
390
- 1. Use `cache_for` to save processing time during development
391
- 1. Only query necessary columns using the reader
392
-
393
- ## Installation
394
-
395
- Install necessary Python dependencies
396
-
397
- 1. **Install Python dependencies (don't worry, all code is in Ruby, we just call through to Python)**
398
-
399
- ```bash
400
- pip install wandb optuna
401
- ```
402
-
403
- 1. **Install the gem**:
404
-
405
- ```bash
406
- gem install easy_ml
407
- ```
408
-
409
- 2. **Run the generator to store model versions**:
410
-
411
- ```bash
412
- rails generate easy_ml:migration
413
- rails db:create # If this is a new app
414
- rails db:migrate
415
- ```
416
-
417
- 3. Add the `easy_ml` dir to your `.gitignore` — This is where datasets and model files will be downloaded
418
-
419
- ```
420
- # .gitignore
421
- easy_ml/
422
- ```
423
-
424
- ## Usage
425
-
426
- To use EasyML in your Rails application, follow these steps:
427
-
428
- 1. **Define your preprocessing steps** in a configuration hash. For example:
429
-
430
- ```ruby
431
- preprocessing_steps = {
432
- training: {
433
- annual_revenue: {
434
- median: true,
435
- clip: { min: 0, max: 1_000_000 }
436
- },
437
- loan_purpose: {
438
- categorical: {
439
- categorical_min: 2,
440
- one_hot: true
441
- }
442
- }
443
- }
444
- }
445
- ```
446
-
447
- 2. **Create a dataset** using the `EasyML::Data::Dataset` class, providing necessary configurations such as data source, target, and preprocessing steps.
448
-
449
- 3. **Train a model** using the `EasyML::Models` module, specifying the model class and configuration.
450
-
451
- 4. **Deploy the model** by marking it as live and storing it in the configured S3 bucket.
452
-
453
- ## Development
454
-
455
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
456
-
457
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
458
-
459
- ## Contributing
460
-
461
- 1. Install Appraisals gemfiles:
462
-
463
- ```bash
464
- bundle exec appraisal install
465
- ```
466
-
467
- 1. Creating a test app:
468
-
469
- a. Follow the typical steps
470
- b. Declare an environment variable: `EASY_ML_DEV=true`, using Figaro, dotenv, or similar to load develoment assets
471
- c. Run `yarn vite dev` in both the `easy_ml` gem and test app directories
472
-
473
- 1. Building production assets
474
-
475
- ```bash
476
- bin/vite_build
477
- ```
478
-
479
- 1. Ensure you run tests against all supported Rails versions
480
-
481
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/easy_ml. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/easy_ml/blob/main/CODE_OF_CONDUCT.md).
482
-
483
- ## License
484
-
485
- The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
486
-
487
- ## Code of Conduct
488
-
489
- Everyone interacting in the EasyML project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/easy_ml/blob/main/CODE_OF_CONDUCT.md).
490
-
491
- ## Expected Future Enhancements
492
-
493
- - **Support for Additional Models**: Integration with LightGBM, TensorFlow, and PyTorch.
494
- - **Expanded Data Source Support**: Ability to pull data from SQL databases and REST APIs.
495
- - **Enhanced Deployment Options**: More flexible deployment strategies and integration with CI/CD pipelines.
496
- - **Advanced Monitoring and Logging**: Improved tools for monitoring model performance and logging.
497
- - **User Interface Improvements**: Enhanced UI components for managing models and datasets.
data/Rakefile DELETED
@@ -1,57 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "sprockets/railtie"
4
- require "bundler/gem_tasks"
5
- require "rspec/core/rake_task"
6
-
7
- RSpec::Core::RakeTask.new(:spec)
8
-
9
- require "rubocop/rake_task"
10
-
11
- RuboCop::RakeTask.new
12
-
13
- task default: %i[spec rubocop]
14
-
15
- Bundler.require(:default)
16
-
17
- # Load your gem's code
18
- require_relative "lib/easy_ml"
19
-
20
- # Load the annotate tasks
21
- require "annotate/annotate_models"
22
-
23
- task :environment do
24
- require "combustion"
25
- require "sprockets"
26
- Combustion.path = "spec/internal"
27
- Combustion.initialize! :active_record do |config|
28
- config.assets = ActiveSupport::OrderedOptions.new # Stub to avoid errors
29
- config.assets.enabled = false # Set false since assets are handled by Vite
30
- end
31
- EasyML::Engine.eager_load!
32
- end
33
-
34
- namespace :easy_ml do
35
- task annotate_models: :environment do
36
- model_dir = File.expand_path("app/models", EasyML::Engine.root)
37
- $LOAD_PATH.unshift(model_dir) unless $LOAD_PATH.include?(model_dir)
38
-
39
- AnnotateModels.do_annotations(
40
- is_rake: true,
41
- model_dir: [EasyML::Engine.root.join("app/models/easy_ml").to_s],
42
- root_dir: [EasyML::Engine.root.join("app/models/easy_ml").to_s],
43
- include_modules: true, # Include modules/namespaces in the annotation
44
- )
45
- end
46
-
47
- task :create_test_migrations do
48
- require "combustion"
49
- require "rails/generators"
50
- require_relative "lib/easy_ml/railtie/generators/migration/migration_generator"
51
-
52
- db_files = Dir.glob(EasyML::Engine.root.join("spec/internal/db/migrate/**/*"))
53
-
54
- FileUtils.rm(db_files)
55
- Rails::Generators.invoke("easy_ml:migration", [], { destination_root: EasyML::Engine.root.join("spec/internal") })
56
- end
57
- end