easy_ml 0.2.0.pre.rc83 → 0.2.0.pre.rc84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2d5a13f7e4d772c20491b6c8c1ffd52674e0e29972ead72409904118fdc33035
4
- data.tar.gz: 56f66ab6a77dbadb6512e92fe1cf4651419ad46c7b2be42c12ea219e70f7991b
3
+ metadata.gz: ff472f1d00b1629047b7b414ae6d8104bc34529602808d919e5117ba4fd99077
4
+ data.tar.gz: 676ba30c99251193848aa2f02957e18d1abc9fdfef112ab0699720e8edc96d1a
5
5
  SHA512:
6
- metadata.gz: 53fdbb0198863035ba8ac69b36936ce20a5d7a3fe057f13f8cb95e67e437f31ff5606e8f976a71f9d7772a788ce986c6e0afb61c5a76ef2eac5163e736144d9d
7
- data.tar.gz: cbfc250d20481840fe45342df010e9f2a1adb8fe10d963a068d127d784e6b20e72c4ae7b99d8ece5b71c7e32a1ace81bb6d71c10f1a3ad019b7ec987ef891dd0
6
+ metadata.gz: a69606a0cf1a10939c5fa29ac8425e2cfbec08dc99e4c0dda510b7cc227171654a89f11ac9cba987636e5cf2fa7ada7d98e179dc04f880514fbf6013f2f31bda
7
+ data.tar.gz: 5bf4737b1d4fa4dcefa999be84e76152c3a665826a5e55d72a9a120fbf5dd79cf53a44a4f38dc78fa6abc40f38e1c13779c0ab7b98f219fe29c29005fd1e5134
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc83"
4
+ VERSION = "0.2.0-rc84"
5
5
 
6
6
  module Version
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc83
4
+ version: 0.2.0.pre.rc84
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-02-20 00:00:00.000000000 Z
11
+ date: 2025-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -719,9 +719,6 @@ files:
719
719
  - lib/easy_ml/data/dataset_manager/writer/partitioned.rb
720
720
  - lib/easy_ml/data/dataset_manager/writer/partitioned/partition_reasons.rb
721
721
  - lib/easy_ml/data/date_converter.rb
722
- - lib/easy_ml/data/embeddings.rb
723
- - lib/easy_ml/data/embeddings/adapters.rb
724
- - lib/easy_ml/data/embeddings/compression.rb
725
722
  - lib/easy_ml/data/partition.rb
726
723
  - lib/easy_ml/data/partition/boundaries.rb
727
724
  - lib/easy_ml/data/polars_column.rb
@@ -1,56 +0,0 @@
1
- module EasyML
2
- module Data
3
- class Embeddings
4
- class Adapters
5
- attr_accessor :model, :config
6
-
7
- ADAPTERS = {
8
- anthropic: Langchain::LLM::Anthropic,
9
- gemini: Langchain::LLM::GoogleGemini,
10
- openai: Langchain::LLM::OpenAI,
11
- ollama: Langchain::LLM::Ollama,
12
- }
13
-
14
- DEFAULTS = {
15
- api_key: {
16
- anthropic: ENV["ANTHROPIC_API_KEY"],
17
- gemini: ENV["GEMINI_API_KEY"],
18
- openai: ENV["OPENAI_API_KEY"],
19
- ollama: ENV["OLLAMA_API_KEY"],
20
- },
21
- }
22
-
23
- def initialize(model, config = {})
24
- @model = model.to_sym
25
- @config = config.symbolize_keys
26
- apply_defaults
27
- end
28
-
29
- def embed(df, col)
30
- pick
31
- texts = df[col].to_a
32
- df = df.with_column(
33
- embeddings: adapter.embed(text: texts),
34
- )
35
- end
36
-
37
- private
38
-
39
- def pick
40
- @adapter ||= ADAPTERS[@model].new(config)
41
- self
42
- end
43
-
44
- def apply_defaults
45
- @config = @config.deep_symbolize_keys
46
-
47
- DEFAULTS.each do |k, v|
48
- unless @config.key?(k)
49
- @config[k] = v[@model]
50
- end
51
- end
52
- end
53
- end
54
- end
55
- end
56
- end
@@ -1,43 +0,0 @@
1
- module EasyML
2
- module Data
3
- class Embeddings
4
- COMPRESSION_DEFAULT = {
5
- present: :balanced,
6
- }
7
-
8
- attr_reader :df, :column, :model, :adapter, :compression,
9
- :embeddings, :compressed_embeddings
10
-
11
- def initialize(options = {})
12
- @df = options[:df]
13
- @column = options[:column]
14
- @model = options[:model]
15
- @config = options[:config] || {}
16
- @compression = options[:compression] || COMPRESSION_DEFAULT
17
- end
18
-
19
- def create
20
- embed
21
- compress
22
- end
23
-
24
- def embed
25
- @embeddings ||= adapter.embed(df, column)
26
- end
27
-
28
- def compress
29
- @compressed_embeddings ||= compression_adapter.compress(embeddings)
30
- end
31
-
32
- private
33
-
34
- def adapter
35
- @adapter ||= EasyML::Data::Embeddings::Adapters.new(model, config)
36
- end
37
-
38
- def compression_adapter
39
- @compression_adapter ||= EasyML::Data::Embeddings::Compression.new(compression)
40
- end
41
- end
42
- end
43
- end