gte 0.0.1-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ use gte::embedder::Embedder;
2
+
3
+ #[test]
4
+ #[ignore = "requires ext/gte/tests/fixtures/e5/tokenizer.json and model.onnx"]
5
+ fn test_e5_single_embedding_shape() {
6
+ const DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/fixtures/e5");
7
+
8
+ let embedder = Embedder::from_dir(DIR, 0, 3).expect("embedder should initialize");
9
+ let result = embedder
10
+ .embed(vec!["query: Hello world".to_string()])
11
+ .expect("embed should succeed");
12
+
13
+ assert_eq!(result.shape()[0], 1);
14
+ assert!(result.shape()[1] > 0);
15
+ }
16
+
17
+ #[test]
18
+ #[ignore = "requires ext/gte/tests/fixtures/clip/tokenizer.json and model.onnx"]
19
+ fn test_clip_single_embedding_shape() {
20
+ const DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/fixtures/clip");
21
+
22
+ let embedder = Embedder::from_dir(DIR, 0, 3).expect("embedder should initialize");
23
+ let result = embedder
24
+ .embed(vec!["a photo of a cat".to_string()])
25
+ .expect("embed should succeed");
26
+
27
+ assert_eq!(result.shape()[0], 1);
28
+ assert!(result.shape()[1] > 0);
29
+ }
30
+
31
+ #[test]
32
+ #[ignore = "requires ext/gte/tests/fixtures/e5/tokenizer.json and model.onnx"]
33
+ fn test_e5_batch_embedding_shape() {
34
+ const DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/fixtures/e5");
35
+
36
+ let embedder = Embedder::from_dir(DIR, 0, 3).expect("embedder should initialize");
37
+ let texts = vec![
38
+ "query: first sentence".to_string(),
39
+ "query: second sentence".to_string(),
40
+ "query: third sentence for batch".to_string(),
41
+ ];
42
+
43
+ let result = embedder.embed(texts).expect("batch embed should succeed");
44
+
45
+ assert_eq!(result.shape()[0], 3);
46
+ assert!(result.shape()[1] > 0);
47
+ }
48
+
49
+ #[test]
50
+ #[ignore = "requires ext/gte/tests/fixtures/e5/tokenizer.json and model.onnx"]
51
+ fn test_e5_long_input_truncation_no_error() {
52
+ const DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/fixtures/e5");
53
+
54
+ let embedder = Embedder::from_dir(DIR, 0, 3).expect("embedder should initialize");
55
+ let very_long_text = "word ".repeat(1000);
56
+ let result = embedder
57
+ .embed(vec![very_long_text])
58
+ .expect("long input should be truncated without error");
59
+
60
+ assert_eq!(result.shape()[0], 1);
61
+ assert!(result.shape()[1] > 0);
62
+ }
@@ -0,0 +1,44 @@
1
+ use gte::tokenizer::Tokenizer;
2
+
3
+ #[test]
4
+ #[ignore = "requires ext/gte/tests/fixtures/e5/tokenizer.json"]
5
+ fn test_e5_tokenizer_output_shape() {
6
+ const TOKENIZER: &str = concat!(
7
+ env!("CARGO_MANIFEST_DIR"),
8
+ "/tests/fixtures/e5/tokenizer.json"
9
+ );
10
+
11
+ let tokenizer = Tokenizer::new(TOKENIZER, 512, true).expect("tokenizer should load");
12
+ let texts = vec![
13
+ "Hello, world!".to_string(),
14
+ "A second, longer sentence to test padding behavior.".to_string(),
15
+ ];
16
+
17
+ let tokenized = tokenizer.tokenize(&texts).expect("tokenize should succeed");
18
+
19
+ assert_eq!(tokenized.rows, 2, "batch size should be 2");
20
+ assert!(tokenized.cols > 0, "sequence length should be non-zero");
21
+ assert_eq!(tokenized.input_ids.len(), tokenized.rows * tokenized.cols);
22
+ assert_eq!(tokenized.attn_masks.len(), tokenized.rows * tokenized.cols);
23
+
24
+ let type_ids = tokenized.type_ids.as_ref().expect("type_ids should exist");
25
+ assert_eq!(type_ids.len(), tokenized.rows * tokenized.cols);
26
+ }
27
+
28
+ #[test]
29
+ #[ignore = "requires ext/gte/tests/fixtures/e5/tokenizer.json"]
30
+ fn test_e5_truncation_at_max_length() {
31
+ const TOKENIZER: &str = concat!(
32
+ env!("CARGO_MANIFEST_DIR"),
33
+ "/tests/fixtures/e5/tokenizer.json"
34
+ );
35
+
36
+ let tokenizer = Tokenizer::new(TOKENIZER, 16, false).expect("tokenizer should load");
37
+ let long_text = "word ".repeat(200);
38
+ let tokenized = tokenizer
39
+ .tokenize(&[long_text])
40
+ .expect("tokenize should not error on long input");
41
+
42
+ assert_eq!(tokenized.rows, 1);
43
+ assert_eq!(tokenized.cols, 16, "sequence length should be truncated to max_length");
44
+ }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/lib/gte.rb ADDED
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'gte/gte'
4
+
5
+ module GTE
6
+ VERSION = File.read(File.expand_path('../VERSION', __dir__)).strip
7
+
8
+ class Model
9
+ def initialize(dir, num_threads: 0, optimization_level: 3)
10
+ @embedder = GTE::Embedder.new(dir, num_threads, optimization_level)
11
+ end
12
+
13
+ def embed(texts)
14
+ if texts.is_a?(String)
15
+ @embedder.embed_one(texts)
16
+ else
17
+ @embedder.embed(Array(texts))
18
+ end
19
+ end
20
+
21
+ def [](input)
22
+ case input
23
+ when String then embed(input).row(0)
24
+ when Array then embed(input)
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.new(dir, num_threads: 0, optimization_level: 3)
30
+ Model.new(dir, num_threads: num_threads, optimization_level: optimization_level)
31
+ end
32
+ end
metadata ADDED
@@ -0,0 +1,144 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gte
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: arm64-darwin
6
+ authors:
7
+ - elcuervo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-04-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake-compiler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rb_sys
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec-benchmark
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email:
85
+ - elcuervo@elcuervo.net
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - Gemfile
91
+ - LICENSE
92
+ - README.md
93
+ - Rakefile
94
+ - VERSION
95
+ - ext/gte/Cargo.toml
96
+ - ext/gte/benches/hot_path.rs
97
+ - ext/gte/build.rs
98
+ - ext/gte/extconf.rb
99
+ - ext/gte/src/embedder.rs
100
+ - ext/gte/src/error.rs
101
+ - ext/gte/src/lib.rs
102
+ - ext/gte/src/model_config.rs
103
+ - ext/gte/src/postprocess.rs
104
+ - ext/gte/src/ruby_embedder.rs
105
+ - ext/gte/src/session.rs
106
+ - ext/gte/src/tokenizer.rs
107
+ - ext/gte/tests/embedder_unit_test.rs
108
+ - ext/gte/tests/inference_integration_test.rs
109
+ - ext/gte/tests/tokenizer_unit_test.rs
110
+ - lib/gte.rb
111
+ - lib/gte/3.0/gte.bundle
112
+ - lib/gte/3.1/gte.bundle
113
+ - lib/gte/3.2/gte.bundle
114
+ - lib/gte/3.3/gte.bundle
115
+ - lib/gte/3.4/gte.bundle
116
+ - lib/gte/4.0/gte.bundle
117
+ homepage: https://github.com/elcuervo/gte
118
+ licenses:
119
+ - MIT
120
+ metadata:
121
+ rubygems_mfa_required: 'true'
122
+ post_install_message:
123
+ rdoc_options: []
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '3.0'
131
+ - - "<"
132
+ - !ruby/object:Gem::Version
133
+ version: 4.1.dev
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubygems_version: 3.5.23
141
+ signing_key:
142
+ specification_version: 4
143
+ summary: General Text Embeddings
144
+ test_files: []