token_estimator 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -0
- data/lib/token_estimator/version.rb +1 -1
- data/lib/token_estimator.rb +2 -4
- data/token_estimator-0.1.1.gem +0 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e76555160bf7038e96625963f99f434979da61bab11f70c0e7293d83db8c1588
|
4
|
+
data.tar.gz: 46d04371c15c88c39d96f87223c7a0713f9ceae367c52a48b50ca59b57d417e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 110fb3caff83e609a5b4fa9e28ff92c1b0a866d9f30b669ea72b071e36f80e96377c451581dd94fa1155280311c9fd2fbb9ab5e9516c66c6c6b507aaba4972c7
|
7
|
+
data.tar.gz: 339ca2a1c63812c424cc2d1771d4dd665151ca069965511e469dd17587764a31957f463a2c5f5222e677413c9b1117f5331c1bd2762bd4d7c96260496e586f94
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -72,6 +72,9 @@ Counts tokens from a JSON object.
|
|
72
72
|
#### `count_tokens_from_html`
|
73
73
|
Counts tokens from an HTML string.
|
74
74
|
|
75
|
+
#### `TokenEstimator::Estimator::SUPPORTED_FILE_TYPES`
|
76
|
+
Return the supported file types.
|
77
|
+
|
75
78
|
## Roadmap
|
76
79
|
Here is a checklist of the formats we currently support for token counting and those we plan to support in the future:
|
77
80
|
|
data/lib/token_estimator.rb
CHANGED
@@ -13,6 +13,8 @@ module TokenEstimator
|
|
13
13
|
class UnsupportedFileTypeError < StandardError; end
|
14
14
|
|
15
15
|
class Estimator
|
16
|
+
SUPPORTED_FILE_TYPES = [".txt", ".csv", ".pdf", ".json", ".md", ".html", ".xlsx"]
|
17
|
+
|
16
18
|
def initialize(tokenizer_name)
|
17
19
|
@tokenizer = Tokenizers.from_pretrained(tokenizer_name)
|
18
20
|
end
|
@@ -95,10 +97,6 @@ module TokenEstimator
|
|
95
97
|
tokens.count
|
96
98
|
end
|
97
99
|
|
98
|
-
def supported_file_types
|
99
|
-
[".txt", ".csv", ".pdf", ".json", ".md", ".html", ".xlsx"]
|
100
|
-
end
|
101
|
-
|
102
100
|
private
|
103
101
|
|
104
102
|
def extract_text_from_excel(xlsx)
|
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: token_estimator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aemabit
|
@@ -139,6 +139,7 @@ files:
|
|
139
139
|
- lib/token_estimator/version.rb
|
140
140
|
- sig/token_estimator.rbs
|
141
141
|
- token_estimator-0.1.0.gem
|
142
|
+
- token_estimator-0.1.1.gem
|
142
143
|
homepage: https://github.com/aemabit/token_estimator
|
143
144
|
licenses:
|
144
145
|
- MIT
|