PyPI - count-tokens - Versions diffs - 0.1.0__tar.gz - Mend

count-tokens 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

count_tokens-0.1.0/PKG-INFO +73 -0
count_tokens-0.1.0/README.md +56 -0
count_tokens-0.1.0/count_tokens/__init__.py +0 -0
count_tokens-0.1.0/count_tokens/count.py +56 -0
count_tokens-0.1.0/pyproject.toml +24 -0

count_tokens-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,73 @@
+Metadata-Version: 2.1
+Name: count-tokens
+Version: 0.1.0
+Summary: count number of tokens in the text file using toktoken tokenizer from OpenAI
+License: MIT
+Author: Krystian Safjan
+Author-email: ksafjan@gmail.com
+Requires-Python: >=3.9,<4.0
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Dist: tiktoken (>=0.4.0,<0.5.0)
+Description-Content-Type: text/markdown
+# Count tokens
+Simple tools that have one purpose - count tokens in a text file.
+## Requirements
+This package is using [tiktoken](https://github.com/openai/tiktoken) library for tokenization.
+```shell
+## Installation
+For usage from comman line install the package in isolated environement with pipx:
+```sh
+$ pipx install count_tokens
+```
+or install it in your current environment with pip.
+## Usage
+Open terminal and run:
+```shell
+$ count-tokens document.txt
+```
+You should see something like this:
+```shell
+File: document.txt
+Encoding: cl100k_base
+Number of tokens: 67
+```
+if you want to see just the tokens count run:
+```shell
+$ count-tokens document.txt --quiet
+```
+and the output will be:
+```shell
+67
+```
+## Related Projects
+- [tiktoken](https://github.com/openai/tiktoken) - tokenization library used by this package
+## Credits
+Thanks to the authors of the tiktoken library for open sourcing their work.
+## License
+[MIT](https://izikeros.mit-license.org/) © [Krystian Safjan](https://safjan.com).

count_tokens-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,56 @@
+# Count tokens
+Simple tools that have one purpose - count tokens in a text file.
+## Requirements
+This package is using [tiktoken](https://github.com/openai/tiktoken) library for tokenization.
+```shell
+## Installation
+For usage from comman line install the package in isolated environement with pipx:
+```sh
+$ pipx install count_tokens
+```
+or install it in your current environment with pip.
+## Usage
+Open terminal and run:
+```shell
+$ count-tokens document.txt
+```
+You should see something like this:
+```shell
+File: document.txt
+Encoding: cl100k_base
+Number of tokens: 67
+```
+if you want to see just the tokens count run:
+```shell
+$ count-tokens document.txt --quiet
+```
+and the output will be:
+```shell
+67
+```
+## Related Projects
+- [tiktoken](https://github.com/openai/tiktoken) - tokenization library used by this package
+## Credits
+Thanks to the authors of the tiktoken library for open sourcing their work.
+## License
+[MIT](https://izikeros.mit-license.org/) © [Krystian Safjan](https://safjan.com).

count_tokens-0.1.0/count_tokens/__init__.py ADDED Viewed

File without changes

count_tokens-0.1.0/count_tokens/count.py ADDED Viewed

@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+import argparse
+import tiktoken
+def num_tokens_from_string(string: str, encoding_name: str = "cl100k_base") -> int:
+    """Returns the number of tokens in a text string."""
+    encoding = tiktoken.get_encoding(encoding_name)
+    num_tokens = len(encoding.encode(string))
+    return num_tokens
+num_tokens_from_string(
+    "tiktoken is great!",
+)
+def count_tokens(file_path, encoding_name):
+    with open(file_path) as file:
+        text = file.read()
+    return num_tokens_from_string(text, encoding_name)
+def main():
+    parser = argparse.ArgumentParser(
+        description="Count the number of tokens in a text file."
+    )
+    parser.add_argument("file", help="Path to the input text file")
+    # add option -q quiets the output
+    parser.add_argument(
+        "-q", "--quiet", action="store_true", help="Print only the number of tokens"
+    )
+    # add option -e to specify the encoding
+    parser.add_argument(
+        "-e",
+        "--encoding",
+        default="cl100k_base",
+        help="Encoding to use (default: cl100k_base)",
+    )
+    args = parser.parse_args()
+    file_path = args.file
+    encoding_name = args.encoding
+    num_tokens = count_tokens(file_path, encoding_name)
+    if not args.quiet:
+        print(f"File: {file_path}")
+        print(f"Encoding: {encoding_name}")
+        print(f"Number of tokens: {num_tokens}")
+    else:
+        print(num_tokens)
+if __name__ == "__main__":
+    main()

count_tokens-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,24 @@
+[tool.poetry]
+name = "count-tokens"
+version = "0.1.0"
+description = "count number of tokens in the text file using toktoken tokenizer from OpenAI\u001b"
+authors = ["Krystian Safjan <ksafjan@gmail.com>"]
+license = "MIT"
+readme = "README.md"
+packages = [{include = "count_tokens"}]
+[tool.poetry.dependencies]
+python = "^3.9"
+tiktoken = "^0.4.0"
+[tool.poetry.scripts]
+count-tokens = 'count_tokens.count:main'
+[tool.poetry.group.dev.dependencies]
+tox = "^4.6.3"
+pre-commit = "^3.3.3"
+pytest = "^7.4.0"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"