discourse_ai-tokenizers 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d23181327ee259c76aa29f86a2b40702b39524705d11afd14239dfe6e3e90009
4
- data.tar.gz: 76c2d7bbe1c4ebe97dff5576aec832e8664831ade0b819cf9afd244600f1be38
3
+ metadata.gz: 36b5e98f002fe493df0c192a5ba86cf1a65d7c5d58207a3ee51a151c71d25002
4
+ data.tar.gz: c20fdaa5692731610370d9c8bf790a12ace12a5b3513d95f238e64369396dfcf
5
5
  SHA512:
6
- metadata.gz: 02ad662edd31f57b8cba0b1ea221bb7c9f1684d65b9fcd1002739eebf4c1393152ee3f17ea675a8bd596eed4ba9a4fc61cea20b49bd7f3c5b86f64d0e2772bbb
7
- data.tar.gz: 9869f1d01ce0388ac2bec619060e7ace28ad5155ff336667072d965a9405217b0ad21e36b258f384ef5456f3e3c3ee35b9c10c536a11e562c4ca5cb948c1ff81
6
+ metadata.gz: f83d3e648f680f40c099add8596111d25e74cadb21109bcc7eb1914b12c19b42b118435cd0c99e2781002ea5090325a59b69ded692a05fc8ea98c86a6f13bd5e
7
+ data.tar.gz: 0bc123e4127d01bb85650147b4c56134c2789b12ec3edffa0377512482e731e1718b0811f7df18b9b8978e0f4556a10de030de1d4548496e11bc180979d9cf4b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.1] - 2025-07-07
4
+
5
+ - Refactor OpenAiO200kTokenizer class to OpenAiTokenizer as primary class name
6
+ - Update backward compatibility alias (OpenAiO200kTokenizer now aliases OpenAiTokenizer)
7
+ - Update version to 0.3.1
8
+
9
+ ## [0.3.0] - 2025-07-04
10
+
11
+ - Add OpenAiCl100kTokenizer class for cl100k_base encoding
12
+ - Refactor OpenAiTokenizer to OpenAiO200kTokenizer with backward compatibility alias
13
+ - Update version to 0.3.0
14
+
3
15
  ## [0.2.0] - 2025-07-02
4
16
 
5
17
  - Initial release
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DiscourseAi
4
+ module Tokenizer
5
+ # Wrapper for OpenAI tokenizer library for compatibility with Discourse AI API
6
+ class OpenAiCl100kTokenizer < OpenAiTokenizer
7
+ class << self
8
+ def tokenizer
9
+ @tokenizer ||= Tiktoken.get_encoding("cl100k_base")
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
@@ -54,5 +54,7 @@ module DiscourseAi
54
54
  end
55
55
  end
56
56
  end
57
+
58
+ OpenAiO200kTokenizer = OpenAiTokenizer
57
59
  end
58
60
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module DiscourseAi
4
4
  module Tokenizers
5
- VERSION = "0.2.0"
5
+ VERSION = "0.3.1"
6
6
  end
7
7
  end
@@ -8,6 +8,7 @@ require_relative "tokenizer/basic_tokenizer"
8
8
  require_relative "tokenizer/bert_tokenizer"
9
9
  require_relative "tokenizer/anthropic_tokenizer"
10
10
  require_relative "tokenizer/open_ai_tokenizer"
11
+ require_relative "tokenizer/open_ai_cl100k_tokenizer"
11
12
  require_relative "tokenizer/all_mpnet_base_v2_tokenizer"
12
13
  require_relative "tokenizer/multilingual_e5_large_tokenizer"
13
14
  require_relative "tokenizer/bge_large_en_tokenizer"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: discourse_ai-tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rafael Silva
@@ -106,6 +106,7 @@ files:
106
106
  - lib/discourse_ai/tokenizer/llama3_tokenizer.rb
107
107
  - lib/discourse_ai/tokenizer/mistral_tokenizer.rb
108
108
  - lib/discourse_ai/tokenizer/multilingual_e5_large_tokenizer.rb
109
+ - lib/discourse_ai/tokenizer/open_ai_cl100k_tokenizer.rb
109
110
  - lib/discourse_ai/tokenizer/open_ai_tokenizer.rb
110
111
  - lib/discourse_ai/tokenizer/qwen_tokenizer.rb
111
112
  - lib/discourse_ai/tokenizers.rb