easy_ml 0.2.0.pre.rc83 → 0.2.0.pre.rc84
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ff472f1d00b1629047b7b414ae6d8104bc34529602808d919e5117ba4fd99077
|
4
|
+
data.tar.gz: 676ba30c99251193848aa2f02957e18d1abc9fdfef112ab0699720e8edc96d1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a69606a0cf1a10939c5fa29ac8425e2cfbec08dc99e4c0dda510b7cc227171654a89f11ac9cba987636e5cf2fa7ada7d98e179dc04f880514fbf6013f2f31bda
|
7
|
+
data.tar.gz: 5bf4737b1d4fa4dcefa999be84e76152c3a665826a5e55d72a9a120fbf5dd79cf53a44a4f38dc78fa6abc40f38e1c13779c0ab7b98f219fe29c29005fd1e5134
|
data/lib/easy_ml/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: easy_ml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.0.pre.
|
4
|
+
version: 0.2.0.pre.rc84
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Shollenberger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -719,9 +719,6 @@ files:
|
|
719
719
|
- lib/easy_ml/data/dataset_manager/writer/partitioned.rb
|
720
720
|
- lib/easy_ml/data/dataset_manager/writer/partitioned/partition_reasons.rb
|
721
721
|
- lib/easy_ml/data/date_converter.rb
|
722
|
-
- lib/easy_ml/data/embeddings.rb
|
723
|
-
- lib/easy_ml/data/embeddings/adapters.rb
|
724
|
-
- lib/easy_ml/data/embeddings/compression.rb
|
725
722
|
- lib/easy_ml/data/partition.rb
|
726
723
|
- lib/easy_ml/data/partition/boundaries.rb
|
727
724
|
- lib/easy_ml/data/polars_column.rb
|
@@ -1,56 +0,0 @@
|
|
1
|
-
module EasyML
|
2
|
-
module Data
|
3
|
-
class Embeddings
|
4
|
-
class Adapters
|
5
|
-
attr_accessor :model, :config
|
6
|
-
|
7
|
-
ADAPTERS = {
|
8
|
-
anthropic: Langchain::LLM::Anthropic,
|
9
|
-
gemini: Langchain::LLM::GoogleGemini,
|
10
|
-
openai: Langchain::LLM::OpenAI,
|
11
|
-
ollama: Langchain::LLM::Ollama,
|
12
|
-
}
|
13
|
-
|
14
|
-
DEFAULTS = {
|
15
|
-
api_key: {
|
16
|
-
anthropic: ENV["ANTHROPIC_API_KEY"],
|
17
|
-
gemini: ENV["GEMINI_API_KEY"],
|
18
|
-
openai: ENV["OPENAI_API_KEY"],
|
19
|
-
ollama: ENV["OLLAMA_API_KEY"],
|
20
|
-
},
|
21
|
-
}
|
22
|
-
|
23
|
-
def initialize(model, config = {})
|
24
|
-
@model = model.to_sym
|
25
|
-
@config = config.symbolize_keys
|
26
|
-
apply_defaults
|
27
|
-
end
|
28
|
-
|
29
|
-
def embed(df, col)
|
30
|
-
pick
|
31
|
-
texts = df[col].to_a
|
32
|
-
df = df.with_column(
|
33
|
-
embeddings: adapter.embed(text: texts),
|
34
|
-
)
|
35
|
-
end
|
36
|
-
|
37
|
-
private
|
38
|
-
|
39
|
-
def pick
|
40
|
-
@adapter ||= ADAPTERS[@model].new(config)
|
41
|
-
self
|
42
|
-
end
|
43
|
-
|
44
|
-
def apply_defaults
|
45
|
-
@config = @config.deep_symbolize_keys
|
46
|
-
|
47
|
-
DEFAULTS.each do |k, v|
|
48
|
-
unless @config.key?(k)
|
49
|
-
@config[k] = v[@model]
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1 +0,0 @@
|
|
1
|
-
|
@@ -1,43 +0,0 @@
|
|
1
|
-
module EasyML
|
2
|
-
module Data
|
3
|
-
class Embeddings
|
4
|
-
COMPRESSION_DEFAULT = {
|
5
|
-
present: :balanced,
|
6
|
-
}
|
7
|
-
|
8
|
-
attr_reader :df, :column, :model, :adapter, :compression,
|
9
|
-
:embeddings, :compressed_embeddings
|
10
|
-
|
11
|
-
def initialize(options = {})
|
12
|
-
@df = options[:df]
|
13
|
-
@column = options[:column]
|
14
|
-
@model = options[:model]
|
15
|
-
@config = options[:config] || {}
|
16
|
-
@compression = options[:compression] || COMPRESSION_DEFAULT
|
17
|
-
end
|
18
|
-
|
19
|
-
def create
|
20
|
-
embed
|
21
|
-
compress
|
22
|
-
end
|
23
|
-
|
24
|
-
def embed
|
25
|
-
@embeddings ||= adapter.embed(df, column)
|
26
|
-
end
|
27
|
-
|
28
|
-
def compress
|
29
|
-
@compressed_embeddings ||= compression_adapter.compress(embeddings)
|
30
|
-
end
|
31
|
-
|
32
|
-
private
|
33
|
-
|
34
|
-
def adapter
|
35
|
-
@adapter ||= EasyML::Data::Embeddings::Adapters.new(model, config)
|
36
|
-
end
|
37
|
-
|
38
|
-
def compression_adapter
|
39
|
-
@compression_adapter ||= EasyML::Data::Embeddings::Compression.new(compression)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|