roseflow-tiktoken 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -1
- data/README.md +3 -1
- data/lib/roseflow/tiktoken/tokenizer.rb +7 -3
- data/lib/roseflow/tiktoken/version.rb +1 -1
- data/roseflow-tiktoken.gemspec +2 -2
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7f3ff6fdcfd71f07e2319202fb56ac40e462c98e4a897b239bb4e1150cf1e13
|
4
|
+
data.tar.gz: 0cca81f6bd18edc889124e21598109a546ac0dfdac49c84b38f615eeae3f9b9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a737814488c6fcd78d66fd66f620da0eb2808203be94e3bfecacb0ab71ce432a071140cea65cce8b49db8227096e1930fd8c47437fe051e7fe144b8d4272f2c
|
7
|
+
data.tar.gz: bdc94e15b3d375e100e682a46b978abb81eb561aed3bf52bd206123af85f2dad279f9df91f2ada274e7ca1fca04e250cec3e0bbd35ad368f6634c5c631804541
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[tiktoken](https://github.com/openai/tiktoken) is a fast BPE tokenizer for use with OpenAI's models. `roseflow-tiktoken` gem helps you use the tokenizer in Ruby, especially with (Roseflow)[https://github.com/ljuti/roseflow].
|
4
4
|
|
5
|
-
|
5
|
+
This gem wraps the (`tiktoken_ruby` gem)[https://github.com/IAPark/tiktoken_ruby] for convenient use in Roseflow.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -45,6 +45,8 @@ tokenizer.decode([19952, 420, 925, 1139, 11460, 13]) # => "Turn this string into
|
|
45
45
|
| `p50k_edit` | Use for edit models like `text-davinci-edit-001`, `code-davinci-edit-001` |
|
46
46
|
| `r50k_base` (or `gpt2`) | GPT-3 models like `davinci` |
|
47
47
|
|
48
|
+
If a model is not provided or is unknown to the library, it will default to `cl100k_base` encoding.
|
49
|
+
|
48
50
|
## Development
|
49
51
|
|
50
52
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -1,12 +1,11 @@
|
|
1
|
-
require "
|
1
|
+
require "tiktoken_ruby"
|
2
2
|
|
3
3
|
module Roseflow
|
4
4
|
module Tiktoken
|
5
5
|
class Tokenizer
|
6
6
|
def initialize(model: nil)
|
7
|
-
@tokenizer = PyCall.import_module("tiktoken")
|
8
7
|
@model = model
|
9
|
-
@encoding =
|
8
|
+
@encoding = determine_encoding(model)
|
10
9
|
end
|
11
10
|
|
12
11
|
def encode(input)
|
@@ -41,6 +40,11 @@ module Roseflow
|
|
41
40
|
|
42
41
|
private
|
43
42
|
|
43
|
+
def determine_encoding(model)
|
44
|
+
encoding = model ? ::Tiktoken.encoding_for_model(model) : ::Tiktoken.get_encoding("cl100k_base")
|
45
|
+
encoding.is_a?(::Tiktoken::Encoding) ? encoding : ::Tiktoken.get_encoding("cl100k_base")
|
46
|
+
end
|
47
|
+
|
44
48
|
def tokens_per_message_for_model(model)
|
45
49
|
case model
|
46
50
|
when "gpt-4"
|
data/roseflow-tiktoken.gemspec
CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |spec|
|
|
6
6
|
spec.name = "roseflow-tiktoken"
|
7
7
|
spec.version = Roseflow::Tiktoken.gem_version
|
8
8
|
spec.authors = ["Lauri Jutila"]
|
9
|
-
spec.email = ["
|
9
|
+
spec.email = ["ljuti@users.noreply.github.com"]
|
10
10
|
|
11
11
|
spec.summary = "Tiktoken tokenizer for Roseflow."
|
12
12
|
spec.description = "Tiktoken tokenizer for Roseflow."
|
@@ -29,5 +29,5 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
|
32
|
-
spec.add_dependency "
|
32
|
+
spec.add_dependency "tiktoken_ruby"
|
33
33
|
end
|
metadata
CHANGED
@@ -1,32 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: roseflow-tiktoken
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lauri Jutila
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: tiktoken_ruby
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '0'
|
27
27
|
description: Tiktoken tokenizer for Roseflow.
|
28
28
|
email:
|
29
|
-
-
|
29
|
+
- ljuti@users.noreply.github.com
|
30
30
|
executables: []
|
31
31
|
extensions: []
|
32
32
|
extra_rdoc_files: []
|