discourse_ai-tokenizers 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/discourse_ai/{tokenizers → tokenizer}/all_mpnet_base_v2_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/anthropic_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/basic_tokenizer.rb +7 -7
- data/lib/discourse_ai/{tokenizers → tokenizer}/bert_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/bge_large_en_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/bge_m3_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/gemini_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/llama3_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/mistral_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/multilingual_e5_large_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/open_ai_tokenizer.rb +1 -1
- data/lib/discourse_ai/{tokenizers → tokenizer}/qwen_tokenizer.rb +1 -1
- data/lib/discourse_ai/tokenizers/version.rb +1 -1
- data/lib/discourse_ai/tokenizers.rb +13 -12
- data/sig/discourse_ai/tokenizers.rbs +1 -1
- metadata +13 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9e8c362a4c2c227617258dee4d75e3d5555e1c4fde8575cd060a0efa2eb36b9
|
4
|
+
data.tar.gz: e9b3a6f950399628d6faec4bcf5a472c556a8f936c8fe82d511c63077f2d6c64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69ef898cca64debd9d89297caf97531a9523e6d1fac769eaf86db05aca5aa3568b1e54b5065e09bda35b67deb447e38a1ef31997e3fb23ac0fa77d0a12af2983
|
7
|
+
data.tar.gz: b5561b0d77c81fb5076adfbd74a4232ba1a0c8b50fbb1de0c1f59de0c0f8753e79890f9439b375146dc28b59db434da3003d49cae789b3cd19ddcfb3f94c3ecb
|
@@ -1,18 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module DiscourseAi
|
4
|
-
module
|
4
|
+
module Tokenizer
|
5
5
|
# Base class for tokenizers to inherit from
|
6
6
|
class BasicTokenizer
|
7
7
|
class << self
|
8
8
|
def available_llm_tokenizers
|
9
9
|
[
|
10
|
-
DiscourseAi::
|
11
|
-
DiscourseAi::
|
12
|
-
DiscourseAi::
|
13
|
-
DiscourseAi::
|
14
|
-
DiscourseAi::
|
15
|
-
DiscourseAi::
|
10
|
+
DiscourseAi::Tokenizer::AnthropicTokenizer,
|
11
|
+
DiscourseAi::Tokenizer::GeminiTokenizer,
|
12
|
+
DiscourseAi::Tokenizer::Llama3Tokenizer,
|
13
|
+
DiscourseAi::Tokenizer::MistralTokenizer,
|
14
|
+
DiscourseAi::Tokenizer::OpenAiTokenizer,
|
15
|
+
DiscourseAi::Tokenizer::QwenTokenizer
|
16
16
|
]
|
17
17
|
end
|
18
18
|
|
@@ -3,18 +3,19 @@
|
|
3
3
|
require "tokenizers"
|
4
4
|
require "tiktoken_ruby"
|
5
5
|
require_relative "tokenizers/version"
|
6
|
-
|
7
|
-
require_relative "
|
8
|
-
require_relative "
|
9
|
-
require_relative "
|
10
|
-
require_relative "
|
11
|
-
require_relative "
|
12
|
-
require_relative "
|
13
|
-
require_relative "
|
14
|
-
require_relative "
|
15
|
-
require_relative "
|
16
|
-
require_relative "
|
17
|
-
require_relative "
|
6
|
+
|
7
|
+
require_relative "tokenizer/basic_tokenizer"
|
8
|
+
require_relative "tokenizer/bert_tokenizer"
|
9
|
+
require_relative "tokenizer/anthropic_tokenizer"
|
10
|
+
require_relative "tokenizer/open_ai_tokenizer"
|
11
|
+
require_relative "tokenizer/all_mpnet_base_v2_tokenizer"
|
12
|
+
require_relative "tokenizer/multilingual_e5_large_tokenizer"
|
13
|
+
require_relative "tokenizer/bge_large_en_tokenizer"
|
14
|
+
require_relative "tokenizer/bge_m3_tokenizer"
|
15
|
+
require_relative "tokenizer/llama3_tokenizer"
|
16
|
+
require_relative "tokenizer/gemini_tokenizer"
|
17
|
+
require_relative "tokenizer/qwen_tokenizer"
|
18
|
+
require_relative "tokenizer/mistral_tokenizer"
|
18
19
|
|
19
20
|
module DiscourseAi
|
20
21
|
module Tokenizers
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: discourse_ai-tokenizers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rafael Silva
|
@@ -96,19 +96,19 @@ files:
|
|
96
96
|
- LICENSE.txt
|
97
97
|
- README.md
|
98
98
|
- Rakefile
|
99
|
+
- lib/discourse_ai/tokenizer/all_mpnet_base_v2_tokenizer.rb
|
100
|
+
- lib/discourse_ai/tokenizer/anthropic_tokenizer.rb
|
101
|
+
- lib/discourse_ai/tokenizer/basic_tokenizer.rb
|
102
|
+
- lib/discourse_ai/tokenizer/bert_tokenizer.rb
|
103
|
+
- lib/discourse_ai/tokenizer/bge_large_en_tokenizer.rb
|
104
|
+
- lib/discourse_ai/tokenizer/bge_m3_tokenizer.rb
|
105
|
+
- lib/discourse_ai/tokenizer/gemini_tokenizer.rb
|
106
|
+
- lib/discourse_ai/tokenizer/llama3_tokenizer.rb
|
107
|
+
- lib/discourse_ai/tokenizer/mistral_tokenizer.rb
|
108
|
+
- lib/discourse_ai/tokenizer/multilingual_e5_large_tokenizer.rb
|
109
|
+
- lib/discourse_ai/tokenizer/open_ai_tokenizer.rb
|
110
|
+
- lib/discourse_ai/tokenizer/qwen_tokenizer.rb
|
99
111
|
- lib/discourse_ai/tokenizers.rb
|
100
|
-
- lib/discourse_ai/tokenizers/all_mpnet_base_v2_tokenizer.rb
|
101
|
-
- lib/discourse_ai/tokenizers/anthropic_tokenizer.rb
|
102
|
-
- lib/discourse_ai/tokenizers/basic_tokenizer.rb
|
103
|
-
- lib/discourse_ai/tokenizers/bert_tokenizer.rb
|
104
|
-
- lib/discourse_ai/tokenizers/bge_large_en_tokenizer.rb
|
105
|
-
- lib/discourse_ai/tokenizers/bge_m3_tokenizer.rb
|
106
|
-
- lib/discourse_ai/tokenizers/gemini_tokenizer.rb
|
107
|
-
- lib/discourse_ai/tokenizers/llama3_tokenizer.rb
|
108
|
-
- lib/discourse_ai/tokenizers/mistral_tokenizer.rb
|
109
|
-
- lib/discourse_ai/tokenizers/multilingual_e5_large_tokenizer.rb
|
110
|
-
- lib/discourse_ai/tokenizers/open_ai_tokenizer.rb
|
111
|
-
- lib/discourse_ai/tokenizers/qwen_tokenizer.rb
|
112
112
|
- lib/discourse_ai/tokenizers/version.rb
|
113
113
|
- sig/discourse_ai/tokenizers.rbs
|
114
114
|
- vendor/Meta-Llama-3-70B-Instruct.json
|